Browse Source

July and January stuff

Petra Lamborn 5 years ago
parent
commit
4292e8f941
3 changed files with 73 additions and 37 deletions
  1. 33
    33
      py/clustering.py
  2. 4
    4
      py/downkwh.py
  3. 36
    0
      sql/queries.pgsql

+ 33
- 33
py/clustering.py View File

@@ -34,7 +34,7 @@ from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
34 34
 # 
35 35
 # plt.show()
36 36
 
37
-df = p.read_pickle('../data/April17s.pkl')
37
+df = p.read_pickle('../data/jan19s.pkl')
38 38
 dforig = df
39 39
 
40 40
 print(df.info())
@@ -53,36 +53,36 @@ lobj = linkage(lmat, method = 'ward')
53 53
 print(lobj)
54 54
 print(cophenet(lobj, lmat))
55 55
 
56
-plt.figure(figsize = (25, 10))
57
-plt.title('ICP Clustering Dendrogram')
58
-plt.xlabel('ICP ID/(Number of ICPs)')
59
-plt.ylabel('distance')
60
-dendrogram(
61
-    lobj,
62
-    labels = cmat.index.values,
63
-    leaf_rotation=90,
64
-    leaf_font_size=8,
65
-    #show_leaf_counts = True,
66
-    #truncate_mode = 'lastp',
67
-    #p = 50,
68
-    #show_contracted = True,
69
-    color_threshold = 2.1
70
-)
71
-plt.show()
72
-
73
-#clusts = fcluster(lobj, 5, criterion='maxclust')
74
-#print(clusts)
75
-#print(cmat.index.values)
76
-#clustdf = p.DataFrame({'icp_id' : cmat.index.values, 'cluster' : [chr(x + ord('A') - 1) for x in clusts]})
77
-#print(clustdf)
78
-#mdf = p.merge(clustdf, dforig, on = 'icp_id', how = 'left')
79
-#print(mdf)
80
-#print(mdf.info())
81
-#print(mdf.cluster.describe())
82
-#
83
-#mdf.to_csv('~/windows/Documents/clusters-ward.csv')
84
-#
85
-#sns.set()
86
-#
87
-#sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'cluster', data = mdf)
56
+#plt.figure(figsize = (25, 10))
57
+#plt.title('ICP Clustering Dendrogram')
58
+#plt.xlabel('ICP ID/(Number of ICPs)')
59
+#plt.ylabel('distance')
60
+#dendrogram(
61
+#    lobj,
62
+#    labels = cmat.index.values,
63
+#    leaf_rotation=90,
64
+#    leaf_font_size=8,
65
+#    #show_leaf_counts = True,
66
+#    #truncate_mode = 'lastp',
67
+#    #p = 50,
68
+#    #show_contracted = True,
69
+#    color_threshold = 1.9
70
+#)
88 71
 #plt.show()
72
+
73
+clusts = fcluster(lobj, 6, criterion='maxclust')
74
+print(clusts)
75
+print(cmat.index.values)
76
+clustdf = p.DataFrame({'icp_id' : cmat.index.values, 'cluster' : [chr(x + ord('A') - 1) for x in clusts]})
77
+print(clustdf)
78
+mdf = p.merge(clustdf, dforig, on = 'icp_id', how = 'left')
79
+print(mdf)
80
+print(mdf.info())
81
+print(mdf.cluster.describe())
82
+
83
+mdf.to_csv('~/windows/Documents/clusters-ward.csv')
84
+
85
+sns.set()
86
+
87
+sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'cluster', data = mdf)
88
+plt.show()

+ 4
- 4
py/downkwh.py View File

@@ -11,7 +11,7 @@ FROM
11 11
     FROM
12 12
     (
13 13
         SELECT read_time 
14
-        FROM GENERATE_SERIES('2017-04-01 00:30:00'::timestamp, '2017-05-01 00:00:00'::timestamp, 
14
+        FROM GENERATE_SERIES('2017-01-01 00:30:00'::timestamp, '2017-02-01 00:00:00'::timestamp, 
15 15
             '30 minutes'::interval) read_time
16 16
     ) AS tsdata CROSS JOIN
17 17
     (
@@ -23,7 +23,7 @@ FROM
23 23
             WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
24 24
                 AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
25 25
                 AND content_code = 'UN'
26
-                AND icp_id LIKE '%%17'
26
+                AND icp_id LIKE '%%19'
27 27
             GROUP BY icp_id
28 28
         ) AS cir 
29 29
         WHERE data_days >= 360
@@ -32,9 +32,9 @@ FROM
32 32
 LEFT JOIN
33 33
 (
34 34
     SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
35
-    FROM public.coup_tall_april
35
+    FROM public.coup_tall_jan
36 36
 ) AS tall_timestamp 
37 37
 ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
38 38
 """
39 39
 
40
-pickleQuery(query, "../data/April17s.pkl")
40
+pickleQuery(query, "../data/jan19s.pkl")

+ 36
- 0
sql/queries.pgsql View File

@@ -262,3 +262,39 @@ LEFT JOIN
262 262
 ) AS tall_timestamp 
263 263
 ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id
264 264
 GROUP BY comb.icp_id;
265
+
266
+-- Move to July
267
+
268
+-- Create "tall" table for July 2017 (mod from above) 
269
+CREATE TABLE public.coup_tall_july AS
270
+SELECT  a.icp_id
271
+     , a.read_date
272
+     , c.period
273
+     , sum(c.read_kwh) as kwh_tot
274
+     , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
275
+     , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
276
+FROM    coup_prd.coupdatamaster a,
277
+	unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
278
+WHERE   a.read_date >= to_date('01/07/2017','dd/mm/yyyy')
279
+ and   a.read_date <  to_date('01/08/2017','dd/mm/yyyy')
280
+ and   a.content_code  ~ ('UN|CN|EG')
281
+GROUP BY 1, 2, 3
282
+ORDER BY 1, 2, 3;
283
+
284
+-- Move to January
285
+
286
+-- Create "tall" table for Jan 2017 (mod from above) 
287
+CREATE TABLE public.coup_tall_jan AS
288
+SELECT  a.icp_id
289
+     , a.read_date
290
+     , c.period
291
+     , sum(c.read_kwh) as kwh_tot
292
+     , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
293
+     , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
294
+FROM    coup_prd.coupdatamaster a,
295
+	unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
296
+WHERE   a.read_date >= to_date('01/01/2017','dd/mm/yyyy')
297
+ and   a.read_date <  to_date('01/02/2017','dd/mm/yyyy')
298
+ and   a.content_code  ~ ('UN|CN|EG')
299
+GROUP BY 1, 2, 3
300
+ORDER BY 1, 2, 3;