|
@@ -0,0 +1,22 @@
|
|
1
|
+# An algorithm for assigning a dataset to pre-existing clusters
|
|
2
|
+import pandas as p
|
|
3
|
+
|
|
4
|
+# Pre-existing aggregated clusters
|
|
5
|
+clusfile = '../data/9-clusters.agg.pkl'
|
|
6
|
+
|
|
7
|
+# A new dataset
|
|
8
|
+ndsfile = '../data/2016-17-sample.pkl'
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+clusdf = p.read_pickle(clusfile)
|
|
12
|
+clusdf = clusdf.pivot(index = 'read_time', columns = 'cluster', values = 'kwh_tot_mean')
|
|
13
|
+del clusdf.columns.name
|
|
14
|
+print(clusdf.info())
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+newdf = p.read_pickle(ndsfile).pivot(index = 'read_time',
|
|
19
|
+ columns = 'icp_id',
|
|
20
|
+ values = 'kwh_tot').loc[clusdf.index, :]
|
|
21
|
+print(newdf)
|
|
22
|
+print(newdf.info())
|