Browse Source

Data back to April 2016

Petra Lamborn 5 years ago
parent
commit
a5715417bc
4 changed files with 35 additions and 30 deletions
  1. 27
    27
      py/clustering.py
  2. 2
    2
      py/downkwh.py
  3. 2
    1
      py/util.py
  4. 4
    0
      sql/queries.pgsql

+ 27
- 27
py/clustering.py View File

@@ -36,7 +36,7 @@ from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
36 36
 # plt.show()
37 37
 
38 38
 numclusts = 9
39
-df = p.read_pickle('../data/2017-sample.pkl')
39
+df = p.read_pickle('../data/2016-17-sample.pkl')
40 40
 dforig = df
41 41
 
42 42
 print(df.info())
@@ -81,36 +81,36 @@ print(mdagg.info())
81 81
 print(mdagg.describe())
82 82
 # mdf.to_csv('~/windows/Documents/clusters-ward.csv')
83 83
 print("Saving")
84
-mdf.to_pickle('../data/9-clusters.pkl')
85
-mdagg.to_pickle('../data/9-clusters.agg.pkl')
84
+mdf.to_pickle('../data/9-clusters-1617.pkl')
85
+mdagg.to_pickle('../data/9-clusters-1617.agg.pkl')
86 86
 print("saved")
87 87
 
88 88
 # Algorithm via 
89 89
 # <https://stackoverflow.com/questions/38153829/custom-cluster-colors-of-scipy-dendrogram-in-python-link-color-func>
90
-# ldict = {icp_id:cpal[cluster] for icp_id, cluster in zip(clustdf.icp_id, clustdf.cluster)}
91
-# link_cols = {}
92
-# for i, i12 in enumerate(lobj[:,:2].astype(int)):
93
-#   c1, c2 = (link_cols[x] if x > len(lobj) else ldict[clustdf.icp_id[x]]
94
-#     for x in i12)
95
-#   link_cols[i+1+len(lobj)] = c1 if c1 == c2 else '#000000'
96
-# 
97
-# plt.figure(figsize = (25, 10))
98
-# plt.title('ICP Clustering Dendrogram')
99
-# plt.xlabel('ICP ID/(Number of ICPs)')
100
-# plt.ylabel('distance')
101
-# dendrogram(
102
-#     lobj,
103
-#     labels = cmat.index.values,
104
-#     leaf_rotation=90,
105
-#     leaf_font_size=8,
106
-#     # show_leaf_counts = True,
107
-#     # truncate_mode = 'lastp',
108
-#     # p = 50,
109
-#     # show_contracted = True,
110
-#     link_color_func = lambda x: link_cols[x],
111
-#     color_threshold = None
112
-# )
113
-# plt.show()
90
+ldict = {icp_id:cpal[cluster] for icp_id, cluster in zip(clustdf.icp_id, clustdf.cluster)}
91
+link_cols = {}
92
+for i, i12 in enumerate(lobj[:,:2].astype(int)):
93
+  c1, c2 = (link_cols[x] if x > len(lobj) else ldict[clustdf.icp_id[x]]
94
+    for x in i12)
95
+  link_cols[i+1+len(lobj)] = c1 if c1 == c2 else '#000000'
96
+
97
+plt.figure(figsize = (25, 10))
98
+plt.title('ICP Clustering Dendrogram')
99
+plt.xlabel('ICP ID/(Number of ICPs)')
100
+plt.ylabel('distance')
101
+dendrogram(
102
+    lobj,
103
+    labels = cmat.index.values,
104
+    leaf_rotation=90,
105
+    leaf_font_size=8,
106
+    # show_leaf_counts = True,
107
+    # truncate_mode = 'lastp',
108
+    # p = 50,
109
+    # show_contracted = True,
110
+    link_color_func = lambda x: link_cols[x],
111
+    color_threshold = None
112
+)
113
+plt.show()
114 114
 
115 115
 # sns.set()
116 116
 # 

+ 2
- 2
py/downkwh.py View File

@@ -39,9 +39,9 @@ import seaborn as sns
39 39
 # 
40 40
 # pickleQuery(query, "../data/jan19s.pkl")
41 41
 
42
-kwhdata = getkwh('2017-01-01', '2018-01-01', '2017-01-01 00:30:00', '2018-01-01 00:00:00', '%%1')
42
+kwhdata = getkwh('2016-04-01', '2018-01-01', '2016-04-01 00:30:00', '2018-01-01 00:00:00', '%%1')
43 43
 
44 44
 print(kwhdata.info())
45 45
 
46 46
 print("Pickling")
47
-kwhdata.to_pickle("../data/2017-sample.pkl")
47
+kwhdata.to_pickle("../data/2016-17-sample.pkl")

+ 2
- 1
py/util.py View File

@@ -3,6 +3,7 @@ from configparser import ConfigParser
3 3
 import pandas.io.sql as psql
4 4
 import datetime as dt
5 5
 import numpy as np
6
+from pprint import pprint
6 7
 
7 8
 
8 9
 def config(filename='database.ini', section='postgresql'):
@@ -110,7 +111,7 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
110 111
         # 'subset': subset
111 112
     }
112 113
     print("Getting data with parameters:")
113
-    print(pdict)
114
+    pprint(pdict)
114 115
     qdf = getQuery(query, pdict)
115 116
     print("Optimising")
116 117
     qdf['icp_id'] = qdf['icp_id'].astype(np.int32)

+ 4
- 0
sql/queries.pgsql View File

@@ -364,3 +364,7 @@ CREATE TABLE public.icp_sample AS
364 364
 SELECT * FROM public.best_icp
365 365
 ORDER BY random()
366 366
 LIMIT 1000;
367
+
368
+-- range of date values
369
+SELECT MIN(read_date) AS mindate, MAX(read_date) AS maxdate
370
+FROM coup_prd.coupdatamaster;