Browse Source

A mostly-done cluster assigning algorithm

Petra Lamborn 5 years ago
parent
commit
9defcc38aa
1 changed files with 35 additions and 0 deletions
  1. 35
    0
      py/clusAssign.py

+ 35
- 0
py/clusAssign.py View File

@@ -1,5 +1,6 @@
1 1
 # An algorithm for assigning a dataset to pre-existing clusters
2 2
 import pandas as p
3
+from pprint import pprint
3 4
 
4 5
 # Pre-existing aggregated clusters
5 6
 clusfile = '../data/9-clusters.agg.pkl'
@@ -20,3 +21,37 @@ newdf = p.read_pickle(ndsfile).pivot(index = 'read_time',
20 21
                                      values = 'kwh_tot').loc[clusdf.index, :]
21 22
 print(newdf)
22 23
 print(newdf.info())
24
+
25
+clusdict = {}
26
+
27
+clusters = list(clusdf.columns)
28
+icps = list(newdf.columns)
29
+
30
+print(clusters)
31
+
32
+for i in icps:
33
+    bestc = -1
34
+    s = newdf.loc[:, i]
35
+    if (s.min() != s.max()):
36
+        bestr = -1
37
+        for c in clusters:
38
+            thisr = s.corr(clusdf.loc[:, c], method = 'pearson')
39
+            if thisr > bestr:
40
+                bestr = thisr
41
+                bestc = c
42
+        print('Assigning ICP {} to cluster {} with correlation {}.'.format(i, bestc, bestr))
43
+    else:
44
+        print('ICP {} has constant value; assigning to cluster -1'.format(i))
45
+    clusdict[i] = bestc
46
+
47
+# Need to make sure cluster is integer ZZZ
48
+newclusdf = p.DataFrame.from_dict(clusdict, orient = 'index', columns = ['cluster'])
49
+newclusdf.index.name = 'icp_id'
50
+newclusdf = newclusdf.reset_index()
51
+# print(newclusdf)
52
+
53
+newdf = p.melt(newdf.reset_index(), 'read_time', var_name = 'icp_id', value_name = 'kwh')
54
+
55
+######## ZZZ Something isn't working here
56
+anndf = newdf.set_index('icp_id').join(newclusdf)
57
+print(anndf)