Browse Source

Data back to April 2016

Petra Lamborn 5 years ago
parent
commit
a5715417bc
4 changed files with 35 additions and 30 deletions
  1. 27
    27
      py/clustering.py
  2. 2
    2
      py/downkwh.py
  3. 2
    1
      py/util.py
  4. 4
    0
      sql/queries.pgsql

+ 27
- 27
py/clustering.py View File

36
 # plt.show()
36
 # plt.show()
37
 
37
 
38
 numclusts = 9
38
 numclusts = 9
39
-df = p.read_pickle('../data/2017-sample.pkl')
39
+df = p.read_pickle('../data/2016-17-sample.pkl')
40
 dforig = df
40
 dforig = df
41
 
41
 
42
 print(df.info())
42
 print(df.info())
81
 print(mdagg.describe())
81
 print(mdagg.describe())
82
 # mdf.to_csv('~/windows/Documents/clusters-ward.csv')
82
 # mdf.to_csv('~/windows/Documents/clusters-ward.csv')
83
 print("Saving")
83
 print("Saving")
84
-mdf.to_pickle('../data/9-clusters.pkl')
85
-mdagg.to_pickle('../data/9-clusters.agg.pkl')
84
+mdf.to_pickle('../data/9-clusters-1617.pkl')
85
+mdagg.to_pickle('../data/9-clusters-1617.agg.pkl')
86
 print("saved")
86
 print("saved")
87
 
87
 
88
 # Algorithm via 
88
 # Algorithm via 
89
 # <https://stackoverflow.com/questions/38153829/custom-cluster-colors-of-scipy-dendrogram-in-python-link-color-func>
89
 # <https://stackoverflow.com/questions/38153829/custom-cluster-colors-of-scipy-dendrogram-in-python-link-color-func>
90
-# ldict = {icp_id:cpal[cluster] for icp_id, cluster in zip(clustdf.icp_id, clustdf.cluster)}
91
-# link_cols = {}
92
-# for i, i12 in enumerate(lobj[:,:2].astype(int)):
93
-#   c1, c2 = (link_cols[x] if x > len(lobj) else ldict[clustdf.icp_id[x]]
94
-#     for x in i12)
95
-#   link_cols[i+1+len(lobj)] = c1 if c1 == c2 else '#000000'
96
-# 
97
-# plt.figure(figsize = (25, 10))
98
-# plt.title('ICP Clustering Dendrogram')
99
-# plt.xlabel('ICP ID/(Number of ICPs)')
100
-# plt.ylabel('distance')
101
-# dendrogram(
102
-#     lobj,
103
-#     labels = cmat.index.values,
104
-#     leaf_rotation=90,
105
-#     leaf_font_size=8,
106
-#     # show_leaf_counts = True,
107
-#     # truncate_mode = 'lastp',
108
-#     # p = 50,
109
-#     # show_contracted = True,
110
-#     link_color_func = lambda x: link_cols[x],
111
-#     color_threshold = None
112
-# )
113
-# plt.show()
90
+ldict = {icp_id:cpal[cluster] for icp_id, cluster in zip(clustdf.icp_id, clustdf.cluster)}
91
+link_cols = {}
92
+for i, i12 in enumerate(lobj[:,:2].astype(int)):
93
+  c1, c2 = (link_cols[x] if x > len(lobj) else ldict[clustdf.icp_id[x]]
94
+    for x in i12)
95
+  link_cols[i+1+len(lobj)] = c1 if c1 == c2 else '#000000'
96
+
97
+plt.figure(figsize = (25, 10))
98
+plt.title('ICP Clustering Dendrogram')
99
+plt.xlabel('ICP ID/(Number of ICPs)')
100
+plt.ylabel('distance')
101
+dendrogram(
102
+    lobj,
103
+    labels = cmat.index.values,
104
+    leaf_rotation=90,
105
+    leaf_font_size=8,
106
+    # show_leaf_counts = True,
107
+    # truncate_mode = 'lastp',
108
+    # p = 50,
109
+    # show_contracted = True,
110
+    link_color_func = lambda x: link_cols[x],
111
+    color_threshold = None
112
+)
113
+plt.show()
114
 
114
 
115
 # sns.set()
115
 # sns.set()
116
 # 
116
 # 

+ 2
- 2
py/downkwh.py View File

39
 # 
39
 # 
40
 # pickleQuery(query, "../data/jan19s.pkl")
40
 # pickleQuery(query, "../data/jan19s.pkl")
41
 
41
 
42
-kwhdata = getkwh('2017-01-01', '2018-01-01', '2017-01-01 00:30:00', '2018-01-01 00:00:00', '%%1')
42
+kwhdata = getkwh('2016-04-01', '2018-01-01', '2016-04-01 00:30:00', '2018-01-01 00:00:00', '%%1')
43
 
43
 
44
 print(kwhdata.info())
44
 print(kwhdata.info())
45
 
45
 
46
 print("Pickling")
46
 print("Pickling")
47
-kwhdata.to_pickle("../data/2017-sample.pkl")
47
+kwhdata.to_pickle("../data/2016-17-sample.pkl")

+ 2
- 1
py/util.py View File

3
 import pandas.io.sql as psql
3
 import pandas.io.sql as psql
4
 import datetime as dt
4
 import datetime as dt
5
 import numpy as np
5
 import numpy as np
6
+from pprint import pprint
6
 
7
 
7
 
8
 
8
 def config(filename='database.ini', section='postgresql'):
9
 def config(filename='database.ini', section='postgresql'):
110
         # 'subset': subset
111
         # 'subset': subset
111
     }
112
     }
112
     print("Getting data with parameters:")
113
     print("Getting data with parameters:")
113
-    print(pdict)
114
+    pprint(pdict)
114
     qdf = getQuery(query, pdict)
115
     qdf = getQuery(query, pdict)
115
     print("Optimising")
116
     print("Optimising")
116
     qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
117
     qdf['icp_id'] = qdf['icp_id'].astype(np.int32)

+ 4
- 0
sql/queries.pgsql View File

364
 SELECT * FROM public.best_icp
364
 SELECT * FROM public.best_icp
365
 ORDER BY random()
365
 ORDER BY random()
366
 LIMIT 1000;
366
 LIMIT 1000;
367
+
368
+-- range of date values
369
+SELECT MIN(read_date) AS mindate, MAX(read_date) AS maxdate
370
+FROM coup_prd.coupdatamaster;