Browse Source

Fix a very important error; continue aggregation

Petra Lamborn 5 years ago
parent
commit
200dfde9ab
2 changed files with 8 additions and 6 deletions
  1. 7
    5
      py/agg.py
  2. 1
    1
      py/clustering.py

+ 7
- 5
py/agg.py View File

@@ -2,21 +2,23 @@
2 2
 import pandas as p
3 3
 
4 4
 def aggregator(widedf, clusdf):
5
-    clusters = clusdf.cluster.unique()
5
+    clusters = clusdf['cluster'].unique()
6 6
     clusters.sort()
7 7
     aggv = {}
8 8
     qlow  = lambda x: x.quantile(0.250)
9 9
     qhigh = lambda x: x.quantile(0.750)
10 10
     for c in clusters:
11
-        icps = clusters[clusters.cluster == c].icp_id.values
11
+        icps = clusdf[clusdf.cluster == c].icp_id.values
12 12
         subdf = widedf[icps]
13
-        agged = subdf.agg('median', 'mean', qlow, qhigh)
13
+        agged = subdf.agg(func = 'median', axis = 1)
14 14
         print(agged)
15 15
 
16 16
 
17 17
 def main():
18
-    wd = p.read_pickle()
19
-    cd = p.read_pickle()
18
+    wd = p.read_pickle("../data/2017-5k-wide.pkl")
19
+    cd = p.read_pickle("../data/5kclustable.pkl")
20
+    aggregator(wd, cd)
21
+
20 22
 
21 23
 
22 24
 if __name__ == "__main__":

+ 1
- 1
py/clustering.py View File

@@ -72,7 +72,7 @@ def dendro(lobj, clustdf, numclusts, icps, fname):
72 72
 def main():
73 73
     parser = ArgumentParser(description='Cluster from pre-existing distance correlation matrix in pickled dataframe')
74 74
     parser.add_argument("-i", "--input",  dest="input",  help = "input pickle path; default: ../data/5kdcorrmatrix.pkl",  metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
75
-    parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdcorrmatrix.pkl",  metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
75
+    parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdclustable.pkl",  metavar="[PATH]", default = "../data/5kdclustable.pkl")
76 76
     parser.add_argument("--method", dest="method", help = "clustering method; default 'ward'", metavar = "[METHOD]", default = "ward")
77 77
     parser.add_argument("--clusters",  dest="numclusters",  help = "number of clusters; default: 9", metavar = "[NUM]", default = 9, type = int)
78 78
     parser.add_argument("-d", "--dendrogram", dest = "incdendro", help = "draw dendrogram", action ="store_true")