Browse Source

Get ready to pull april data

Petra Lamborn 5 years ago
parent
commit
59c8bce9b4
3 changed files with 74 additions and 24 deletions
  1. 3
    0
      .gitignore
  2. 52
    18
      py/clustering.py
  3. 19
    6
      py/util.py

+ 3
- 0
.gitignore View File

111
 # postgresql
111
 # postgresql
112
 
112
 
113
 database.ini
113
 database.ini
114
+
115
+# pickle
116
+*.pkl

+ 52
- 18
py/clustering.py View File

1
-from util import getQuery
1
+from util import getQuery, pickleQuery
2
 import pandas as p
2
 import pandas as p
3
 import matplotlib.pyplot as plt
3
 import matplotlib.pyplot as plt
4
 import seaborn as sns
4
 import seaborn as sns
12
 # 
12
 # 
13
 # qparams = ['%%1117', '20/04/2017']
13
 # qparams = ['%%1117', '20/04/2017']
14
 
14
 
15
+#query = """
16
+#SELECT read_date, period, AVG(kwh_tot) AS average
17
+#FROM public.coup_tall_april
18
+#GROUP BY read_date, period
19
+#ORDER BY read_date, period;
20
+#"""
21
+#
22
+#qparams = []
23
+#
24
+#df = getQuery(query, qparams)
25
+#
26
+#print(df.info())
27
+#
28
+#sns.set()
29
+#
30
+##sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', data = df)
31
+#sns.lineplot(x = 'period', y = 'average', hue = 'read_date', data = df)
32
+#
33
+#plt.show()
34
+
15
 query = """
35
 query = """
16
-SELECT read_date, period, AVG(kwh_tot) AS average
17
-FROM public.coup_tall_april
18
-GROUP BY read_date, period
19
-ORDER BY read_date, period;
36
+SELECT comb.icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
37
+FROM
38
+(
39
+    SELECT read_time, icp_id
40
+    FROM
41
+    (
42
+        SELECT read_time 
43
+        FROM GENERATE_SERIES('2017-04-01 00:30:00'::timestamp, '2017-05-01 00:00:00'::timestamp, 
44
+            '30 minutes'::interval) read_time
45
+    ) AS tsdata CROSS JOIN
46
+    (
47
+        SELECT *
48
+        FROM
49
+        (
50
+            SELECT icp_id, COUNT(DISTINCT read_date) AS data_days 
51
+            FROM coup_prd.coupdatamaster
52
+            WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
53
+                AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
54
+                AND content_code = 'UN'
55
+            GROUP BY icp_id
56
+        ) AS cir 
57
+        WHERE data_days >= 360
58
+    ) AS qual_icp
59
+) AS comb
60
+LEFT JOIN
61
+(
62
+    SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
63
+    FROM public.coup_tall_april
64
+) AS tall_timestamp 
65
+ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
20
 """
66
 """
21
 
67
 
22
-qparams = []
23
-
24
-df = getQuery(query, qparams)
25
-
26
-print(df.info())
27
-
28
-sns.set()
29
-
30
-#sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', data = df)
31
-sns.lineplot(x = 'period', y = 'average', hue = 'read_date', data = df)
32
-
33
-plt.show()
34
-
68
+pickleQuery(query, "April.pkl")

+ 19
- 6
py/util.py View File

2
 from configparser import ConfigParser
2
 from configparser import ConfigParser
3
 import pandas.io.sql as psql
3
 import pandas.io.sql as psql
4
 
4
 
5
+
5
 def config(filename='database.ini', section='postgresql'):
6
 def config(filename='database.ini', section='postgresql'):
6
-    """Config parser from http://www.postgresqltutorial.com/postgresql-python/connect/"""
7
+    """Config parser from
8
+    http://www.postgresqltutorial.com/postgresql-python/connect/"""
7
     # create a parser
9
     # create a parser
8
     parser = ConfigParser()
10
     parser = ConfigParser()
9
     # read config file
11
     # read config file
10
     parser.read(filename)
12
     parser.read(filename)
11
- 
13
+
12
     # get section, default to postgresql
14
     # get section, default to postgresql
13
     db = {}
15
     db = {}
14
     if parser.has_section(section):
16
     if parser.has_section(section):
16
         for param in params:
18
         for param in params:
17
             db[param[0]] = param[1]
19
             db[param[0]] = param[1]
18
     else:
20
     else:
19
-        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
20
- 
21
+        raise Exception(
22
+            'Section {0} not found in the {1} file'.format(section, filename))
23
+
21
     return db
24
     return db
22
 
25
 
23
-def getQuery(query, qparams = []):
26
+
27
+def getQuery(query, qparams=[]):
24
     """
28
     """
25
     Get single query
29
     Get single query
26
     """
30
     """
35
         cur = conn.cursor()
39
         cur = conn.cursor()
36
 
40
 
37
         # Get table
41
         # Get table
38
-        dataframe = psql.read_sql(query, conn, params = qparams)
42
+        dataframe = psql.read_sql(query, conn, params=qparams)
39
 
43
 
40
         cur.close()
44
         cur.close()
41
 
45
 
46
+        print("Table recieved")
47
+
42
         return dataframe
48
         return dataframe
43
 
49
 
44
     except (Exception, pg.DatabaseError) as error:
50
     except (Exception, pg.DatabaseError) as error:
50
             conn.close()
56
             conn.close()
51
             print('Database connection closed')
57
             print('Database connection closed')
52
 
58
 
59
+
60
+def pickleQuery(query, path, qparams=[]):
61
+    dq = getQuery(query, qparams)
62
+    dq.to_pickle(path)
63
+    print("Table pickled")
64
+
65
+
53
 if __name__ == "__main__":
66
 if __name__ == "__main__":
54
     dv = getQuery('SELECT version()').version[0]
67
     dv = getQuery('SELECT version()').version[0]
55
     print('PostgreSQL database version:')
68
     print('PostgreSQL database version:')