|
@@ -8,6 +8,12 @@ clusfile = '../data/9-clusters.agg.pkl'
|
8
|
8
|
# A new dataset
|
9
|
9
|
ndsfile = '../data/2016-17-sample.pkl'
|
10
|
10
|
|
|
11
|
+# Table of assigned clusters
|
|
12
|
+aclusfile = '../data/1617-asgn-table.pkl'
|
|
13
|
+
|
|
14
|
+# Aggregated dataset
|
|
15
|
+aggfile = '../data/1617-agg.pkl'
|
|
16
|
+
|
11
|
17
|
|
12
|
18
|
clusdf = p.read_pickle(clusfile)
|
13
|
19
|
clusdf = clusdf.pivot(index = 'read_time', columns = 'cluster', values = 'kwh_tot_mean')
|
|
@@ -44,14 +50,28 @@ for i in icps:
|
44
|
50
|
print('ICP {} has constant value; assigning to cluster -1'.format(i))
|
45
|
51
|
clusdict[i] = bestc
|
46
|
52
|
|
47
|
|
-# Need to make sure cluster is integer ZZZ
|
48
|
53
|
newclusdf = p.DataFrame.from_dict(clusdict, orient = 'index', columns = ['cluster'])
|
49
|
54
|
newclusdf.index.name = 'icp_id'
|
50
|
55
|
newclusdf = newclusdf.reset_index()
|
51
|
|
-# print(newclusdf)
|
|
56
|
+print(newclusdf)
|
|
57
|
+newclusdf.to_pickle(aclusfile)
|
|
58
|
+
|
52
|
59
|
|
53
|
60
|
newdf = p.melt(newdf.reset_index(), 'read_time', var_name = 'icp_id', value_name = 'kwh')
|
54
|
61
|
|
55
|
|
-######## ZZZ Something isn't working here
|
56
|
|
-anndf = newdf.set_index('icp_id').join(newclusdf)
|
|
62
|
+print(newdf.info())
|
|
63
|
+print(newclusdf.info())
|
|
64
|
+
|
|
65
|
+anndf = newdf.set_index('icp_id').join(newclusdf.set_index('icp_id')).reset_index()
|
57
|
66
|
print(anndf)
|
|
67
|
+
|
|
68
|
+qlow = lambda x: x.quantile(0.250)
|
|
69
|
+qhigh = lambda x: x.quantile(0.750)
|
|
70
|
+newagg = anndf.groupby(['read_time', 'cluster']).agg({
|
|
71
|
+ 'kwh': ['median', 'mean', ('CI_low', qlow), ('CI_high', qhigh)]
|
|
72
|
+})
|
|
73
|
+newagg.columns = ['_tot_'.join(x) for x in newagg.columns.ravel()]
|
|
74
|
+newagg = newagg.reset_index()
|
|
75
|
+
|
|
76
|
+print(newagg)
|
|
77
|
+newagg.to_pickle(aggfile)
|