Browse Source

Get ready to pull april data

Petra Lamborn 5 years ago
parent
commit
59c8bce9b4
3 changed files with 74 additions and 24 deletions
  1. 3
    0
      .gitignore
  2. 52
    18
      py/clustering.py
  3. 19
    6
      py/util.py

+ 3
- 0
.gitignore View File

@@ -111,3 +111,6 @@ tags
111 111
 # postgresql
112 112
 
113 113
 database.ini
114
+
115
+# pickle
116
+*.pkl

+ 52
- 18
py/clustering.py View File

@@ -1,4 +1,4 @@
1
-from util import getQuery
1
+from util import getQuery, pickleQuery
2 2
 import pandas as p
3 3
 import matplotlib.pyplot as plt
4 4
 import seaborn as sns
@@ -12,23 +12,57 @@ import seaborn as sns
12 12
 # 
13 13
 # qparams = ['%%1117', '20/04/2017']
14 14
 
15
+#query = """
16
+#SELECT read_date, period, AVG(kwh_tot) AS average
17
+#FROM public.coup_tall_april
18
+#GROUP BY read_date, period
19
+#ORDER BY read_date, period;
20
+#"""
21
+#
22
+#qparams = []
23
+#
24
+#df = getQuery(query, qparams)
25
+#
26
+#print(df.info())
27
+#
28
+#sns.set()
29
+#
30
+##sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', data = df)
31
+#sns.lineplot(x = 'period', y = 'average', hue = 'read_date', data = df)
32
+#
33
+#plt.show()
34
+
15 35
 query = """
16
-SELECT read_date, period, AVG(kwh_tot) AS average
17
-FROM public.coup_tall_april
18
-GROUP BY read_date, period
19
-ORDER BY read_date, period;
36
+SELECT comb.icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
37
+FROM
38
+(
39
+    SELECT read_time, icp_id
40
+    FROM
41
+    (
42
+        SELECT read_time 
43
+        FROM GENERATE_SERIES('2017-04-01 00:30:00'::timestamp, '2017-05-01 00:00:00'::timestamp, 
44
+            '30 minutes'::interval) read_time
45
+    ) AS tsdata CROSS JOIN
46
+    (
47
+        SELECT *
48
+        FROM
49
+        (
50
+            SELECT icp_id, COUNT(DISTINCT read_date) AS data_days 
51
+            FROM coup_prd.coupdatamaster
52
+            WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
53
+                AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
54
+                AND content_code = 'UN'
55
+            GROUP BY icp_id
56
+        ) AS cir 
57
+        WHERE data_days >= 360
58
+    ) AS qual_icp
59
+) AS comb
60
+LEFT JOIN
61
+(
62
+    SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
63
+    FROM public.coup_tall_april
64
+) AS tall_timestamp 
65
+ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
20 66
 """
21 67
 
22
-qparams = []
23
-
24
-df = getQuery(query, qparams)
25
-
26
-print(df.info())
27
-
28
-sns.set()
29
-
30
-#sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', data = df)
31
-sns.lineplot(x = 'period', y = 'average', hue = 'read_date', data = df)
32
-
33
-plt.show()
34
-
68
+pickleQuery(query, "April.pkl")

+ 19
- 6
py/util.py View File

@@ -2,13 +2,15 @@ import psycopg2 as pg
2 2
 from configparser import ConfigParser
3 3
 import pandas.io.sql as psql
4 4
 
5
+
5 6
 def config(filename='database.ini', section='postgresql'):
6
-    """Config parser from http://www.postgresqltutorial.com/postgresql-python/connect/"""
7
+    """Config parser from
8
+    http://www.postgresqltutorial.com/postgresql-python/connect/"""
7 9
     # create a parser
8 10
     parser = ConfigParser()
9 11
     # read config file
10 12
     parser.read(filename)
11
- 
13
+
12 14
     # get section, default to postgresql
13 15
     db = {}
14 16
     if parser.has_section(section):
@@ -16,11 +18,13 @@ def config(filename='database.ini', section='postgresql'):
16 18
         for param in params:
17 19
             db[param[0]] = param[1]
18 20
     else:
19
-        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
20
- 
21
+        raise Exception(
22
+            'Section {0} not found in the {1} file'.format(section, filename))
23
+
21 24
     return db
22 25
 
23
-def getQuery(query, qparams = []):
26
+
27
+def getQuery(query, qparams=[]):
24 28
     """
25 29
     Get single query
26 30
     """
@@ -35,10 +39,12 @@ def getQuery(query, qparams = []):
35 39
         cur = conn.cursor()
36 40
 
37 41
         # Get table
38
-        dataframe = psql.read_sql(query, conn, params = qparams)
42
+        dataframe = psql.read_sql(query, conn, params=qparams)
39 43
 
40 44
         cur.close()
41 45
 
46
+        print("Table recieved")
47
+
42 48
         return dataframe
43 49
 
44 50
     except (Exception, pg.DatabaseError) as error:
@@ -50,6 +56,13 @@ def getQuery(query, qparams = []):
50 56
             conn.close()
51 57
             print('Database connection closed')
52 58
 
59
+
60
+def pickleQuery(query, path, qparams=[]):
61
+    dq = getQuery(query, qparams)
62
+    dq.to_pickle(path)
63
+    print("Table pickled")
64
+
65
+
53 66
 if __name__ == "__main__":
54 67
     dv = getQuery('SELECT version()').version[0]
55 68
     print('PostgreSQL database version:')