|
@@ -1,4 +1,5 @@
|
1
|
1
|
from util import getQuery, pickleQuery
|
|
2
|
+import numpy as np
|
2
|
3
|
import pandas as p
|
3
|
4
|
import matplotlib.pyplot as plt
|
4
|
5
|
import seaborn as sns
|
|
@@ -34,7 +35,8 @@ from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
|
34
|
35
|
#
|
35
|
36
|
# plt.show()
|
36
|
37
|
|
37
|
|
-df = p.read_pickle('../data/jan19s.pkl')
|
|
38
|
+numclusts = 7
|
|
39
|
+df = p.read_pickle('../data/2017-20s.pkl')
|
38
|
40
|
dforig = df
|
39
|
41
|
|
40
|
42
|
print(df.info())
|
|
@@ -53,36 +55,62 @@ lobj = linkage(lmat, method = 'ward')
|
53
|
55
|
print(lobj)
|
54
|
56
|
print(cophenet(lobj, lmat))
|
55
|
57
|
|
56
|
|
-#plt.figure(figsize = (25, 10))
|
57
|
|
-#plt.title('ICP Clustering Dendrogram')
|
58
|
|
-#plt.xlabel('ICP ID/(Number of ICPs)')
|
59
|
|
-#plt.ylabel('distance')
|
60
|
|
-#dendrogram(
|
61
|
|
-# lobj,
|
62
|
|
-# labels = cmat.index.values,
|
63
|
|
-# leaf_rotation=90,
|
64
|
|
-# leaf_font_size=8,
|
65
|
|
-# #show_leaf_counts = True,
|
66
|
|
-# #truncate_mode = 'lastp',
|
67
|
|
-# #p = 50,
|
68
|
|
-# #show_contracted = True,
|
69
|
|
-# color_threshold = 1.9
|
70
|
|
-#)
|
71
|
|
-#plt.show()
|
72
|
58
|
|
73
|
|
-clusts = fcluster(lobj, 6, criterion='maxclust')
|
|
59
|
+
|
|
60
|
+clabs = [x + 1 for x in range(numclusts)]
|
|
61
|
+cpal = dict(zip(clabs, sns.color_palette("colorblind", numclusts).as_hex()))
|
|
62
|
+
|
|
63
|
+clusts = fcluster(lobj, numclusts, criterion='maxclust')
|
74
|
64
|
print(clusts)
|
75
|
65
|
print(cmat.index.values)
|
76
|
|
-clustdf = p.DataFrame({'icp_id' : cmat.index.values, 'cluster' : [chr(x + ord('A') - 1) for x in clusts]})
|
|
66
|
+clustdf = p.DataFrame({'icp_id' : cmat.index.values, 'cluster' : clusts})
|
77
|
67
|
print(clustdf)
|
78
|
68
|
mdf = p.merge(clustdf, dforig, on = 'icp_id', how = 'left')
|
79
|
69
|
print(mdf)
|
80
|
70
|
print(mdf.info())
|
|
71
|
+qlow = lambda x: x.quantile(0.250)
|
|
72
|
+qhigh = lambda x: x.quantile(0.750)
|
81
|
73
|
print(mdf.cluster.describe())
|
|
74
|
+mdagg = mdf.groupby(['read_time', 'cluster']).agg({
|
|
75
|
+ 'kwh_tot': ['median', 'mean', ('CI_low', qlow), ('CI_high', qhigh)]
|
|
76
|
+}, q = 0.025)
|
|
77
|
+mdagg.columns = ['_'.join(x) for x in mdagg.columns.ravel()]
|
|
78
|
+mdagg = mdagg.reset_index()
|
|
79
|
+print(mdagg)
|
|
80
|
+print(mdagg.info())
|
|
81
|
+print(mdagg.describe())
|
|
82
|
+# mdf.to_csv('~/windows/Documents/clusters-ward.csv')
|
82
|
83
|
|
83
|
|
-mdf.to_csv('~/windows/Documents/clusters-ward.csv')
|
|
84
|
+# Algorithm via
|
|
85
|
+# <https://stackoverflow.com/questions/38153829/custom-cluster-colors-of-scipy-dendrogram-in-python-link-color-func>
|
|
86
|
+ldict = {icp_id:cpal[cluster] for icp_id, cluster in zip(clustdf.icp_id, clustdf.cluster)}
|
|
87
|
+link_cols = {}
|
|
88
|
+for i, i12 in enumerate(lobj[:,:2].astype(int)):
|
|
89
|
+ c1, c2 = (link_cols[x] if x > len(lobj) else ldict[clustdf.icp_id[x]]
|
|
90
|
+ for x in i12)
|
|
91
|
+ link_cols[i+1+len(lobj)] = c1 if c1 == c2 else '#000000'
|
84
|
92
|
|
85
|
|
-sns.set()
|
|
93
|
+plt.figure(figsize = (25, 10))
|
|
94
|
+plt.title('ICP Clustering Dendrogram')
|
|
95
|
+plt.xlabel('ICP ID/(Number of ICPs)')
|
|
96
|
+plt.ylabel('distance')
|
|
97
|
+dendrogram(
|
|
98
|
+ lobj,
|
|
99
|
+ labels = cmat.index.values,
|
|
100
|
+ leaf_rotation=90,
|
|
101
|
+ leaf_font_size=8,
|
|
102
|
+ # show_leaf_counts = True,
|
|
103
|
+ # truncate_mode = 'lastp',
|
|
104
|
+ # p = 50,
|
|
105
|
+ # show_contracted = True,
|
|
106
|
+ link_color_func = lambda x: link_cols[x],
|
|
107
|
+ color_threshold = None
|
|
108
|
+)
|
|
109
|
+plt.show()
|
86
|
110
|
|
87
|
|
-sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'cluster', data = mdf)
|
|
111
|
+sns.set()
|
|
112
|
+ax = sns.lineplot(x = 'read_time', y = 'kwh_tot_mean', hue = 'cluster', data = mdagg, palette = cpal)
|
|
113
|
+for c in clabs:
|
|
114
|
+ fds = mdagg[mdagg.cluster == c]
|
|
115
|
+ ax.fill_between(fds.read_time.dt.to_pydatetime(), fds.kwh_tot_CI_low, fds.kwh_tot_CI_high, alpha = 0.1, color = cpal[c])
|
88
|
116
|
plt.show()
|