5 years ago · 9defcc38aa
--- a/py/clusAssign.py
+++ b/py/clusAssign.py
@@ -1,5 +1,6 @@
 
				 # An algorithm for assigning a dataset to pre-existing clusters
			
 
				 import pandas as p
			
 
				+from pprint import pprint
			
 
				 
			
 
				 # Pre-existing aggregated clusters
			
 
				 clusfile = '../data/9-clusters.agg.pkl'
			
@@ -20,3 +21,37 @@ newdf = p.read_pickle(ndsfile).pivot(index = 'read_time',
 
				                                      values = 'kwh_tot').loc[clusdf.index, :]
			
 
				 print(newdf)
			
 
				 print(newdf.info())
			
 
				+
			
 
				+clusdict = {}
			
 
				+
			
 
				+clusters = list(clusdf.columns)
			
 
				+icps = list(newdf.columns)
			
 
				+
			
 
				+print(clusters)
			
 
				+
			
 
				+for i in icps:
			
 
				+    bestc = -1
			
 
				+    s = newdf.loc[:, i]
			
 
				+    if (s.min() != s.max()):
			
 
				+        bestr = -1
			
 
				+        for c in clusters:
			
 
				+            thisr = s.corr(clusdf.loc[:, c], method = 'pearson')
			
 
				+            if thisr > bestr:
			
 
				+                bestr = thisr
			
 
				+                bestc = c
			
 
				+        print('Assigning ICP {} to cluster {} with correlation {}.'.format(i, bestc, bestr))
			
 
				+    else:
			
 
				+        print('ICP {} has constant value; assigning to cluster -1'.format(i))
			
 
				+    clusdict[i] = bestc
			
 
				+
			
 
				+# Need to make sure cluster is integer ZZZ
			
 
				+newclusdf = p.DataFrame.from_dict(clusdict, orient = 'index', columns = ['cluster'])
			
 
				+newclusdf.index.name = 'icp_id'
			
 
				+newclusdf = newclusdf.reset_index()
			
 
				+# print(newclusdf)
			
 
				+
			
 
				+newdf = p.melt(newdf.reset_index(), 'read_time', var_name = 'icp_id', value_name = 'kwh')
			
 
				+
			
 
				+######## ZZZ Something isn't working here
			
 
				+anndf = newdf.set_index('icp_id').join(newclusdf)
			
 
				+print(anndf)