|
@@ -34,7 +34,7 @@ from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
|
34
|
34
|
#
|
35
|
35
|
# plt.show()
|
36
|
36
|
|
37
|
|
-df = p.read_pickle('../data/April17s.pkl')
|
|
37
|
+df = p.read_pickle('../data/jan19s.pkl')
|
38
|
38
|
dforig = df
|
39
|
39
|
|
40
|
40
|
print(df.info())
|
|
@@ -53,36 +53,36 @@ lobj = linkage(lmat, method = 'ward')
|
53
|
53
|
print(lobj)
|
54
|
54
|
print(cophenet(lobj, lmat))
|
55
|
55
|
|
56
|
|
-plt.figure(figsize = (25, 10))
|
57
|
|
-plt.title('ICP Clustering Dendrogram')
|
58
|
|
-plt.xlabel('ICP ID/(Number of ICPs)')
|
59
|
|
-plt.ylabel('distance')
|
60
|
|
-dendrogram(
|
61
|
|
- lobj,
|
62
|
|
- labels = cmat.index.values,
|
63
|
|
- leaf_rotation=90,
|
64
|
|
- leaf_font_size=8,
|
65
|
|
- #show_leaf_counts = True,
|
66
|
|
- #truncate_mode = 'lastp',
|
67
|
|
- #p = 50,
|
68
|
|
- #show_contracted = True,
|
69
|
|
- color_threshold = 2.1
|
70
|
|
-)
|
71
|
|
-plt.show()
|
72
|
|
-
|
73
|
|
-#clusts = fcluster(lobj, 5, criterion='maxclust')
|
74
|
|
-#print(clusts)
|
75
|
|
-#print(cmat.index.values)
|
76
|
|
-#clustdf = p.DataFrame({'icp_id' : cmat.index.values, 'cluster' : [chr(x + ord('A') - 1) for x in clusts]})
|
77
|
|
-#print(clustdf)
|
78
|
|
-#mdf = p.merge(clustdf, dforig, on = 'icp_id', how = 'left')
|
79
|
|
-#print(mdf)
|
80
|
|
-#print(mdf.info())
|
81
|
|
-#print(mdf.cluster.describe())
|
82
|
|
-#
|
83
|
|
-#mdf.to_csv('~/windows/Documents/clusters-ward.csv')
|
84
|
|
-#
|
85
|
|
-#sns.set()
|
86
|
|
-#
|
87
|
|
-#sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'cluster', data = mdf)
|
|
56
|
+#plt.figure(figsize = (25, 10))
|
|
57
|
+#plt.title('ICP Clustering Dendrogram')
|
|
58
|
+#plt.xlabel('ICP ID/(Number of ICPs)')
|
|
59
|
+#plt.ylabel('distance')
|
|
60
|
+#dendrogram(
|
|
61
|
+# lobj,
|
|
62
|
+# labels = cmat.index.values,
|
|
63
|
+# leaf_rotation=90,
|
|
64
|
+# leaf_font_size=8,
|
|
65
|
+# #show_leaf_counts = True,
|
|
66
|
+# #truncate_mode = 'lastp',
|
|
67
|
+# #p = 50,
|
|
68
|
+# #show_contracted = True,
|
|
69
|
+# color_threshold = 1.9
|
|
70
|
+#)
|
88
|
71
|
#plt.show()
|
|
72
|
+
|
|
73
|
+clusts = fcluster(lobj, 6, criterion='maxclust')
|
|
74
|
+print(clusts)
|
|
75
|
+print(cmat.index.values)
|
|
76
|
+clustdf = p.DataFrame({'icp_id' : cmat.index.values, 'cluster' : [chr(x + ord('A') - 1) for x in clusts]})
|
|
77
|
+print(clustdf)
|
|
78
|
+mdf = p.merge(clustdf, dforig, on = 'icp_id', how = 'left')
|
|
79
|
+print(mdf)
|
|
80
|
+print(mdf.info())
|
|
81
|
+print(mdf.cluster.describe())
|
|
82
|
+
|
|
83
|
+mdf.to_csv('~/windows/Documents/clusters-ward.csv')
|
|
84
|
+
|
|
85
|
+sns.set()
|
|
86
|
+
|
|
87
|
+sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'cluster', data = mdf)
|
|
88
|
+plt.show()
|