Pārlūkot izejas kodu

Fix a very important error; continue aggregation

Petra Lamborn 5 gadus atpakaļ
vecāks
revīzija
422e98dc26
2 mainītis faili ar 8 papildinājumiem un 6 dzēšanām
  1. 7
    5
      py/agg.py
  2. 1
    1
      py/clustering.py

+ 7
- 5
py/agg.py Parādīt failu

2
 import pandas as p
2
 import pandas as p
3
 
3
 
4
 def aggregator(widedf, clusdf):
4
 def aggregator(widedf, clusdf):
5
-    clusters = clusdf.cluster.unique()
5
+    clusters = clusdf['cluster'].unique()
6
     clusters.sort()
6
     clusters.sort()
7
     aggv = {}
7
     aggv = {}
8
     qlow  = lambda x: x.quantile(0.250)
8
     qlow  = lambda x: x.quantile(0.250)
9
     qhigh = lambda x: x.quantile(0.750)
9
     qhigh = lambda x: x.quantile(0.750)
10
     for c in clusters:
10
     for c in clusters:
11
-        icps = clusters[clusters.cluster == c].icp_id.values
11
+        icps = clusdf[clusdf.cluster == c].icp_id.values
12
         subdf = widedf[icps]
12
         subdf = widedf[icps]
13
-        agged = subdf.agg('median', 'mean', qlow, qhigh)
13
+        agged = subdf.agg(func = 'median', axis = 1)
14
         print(agged)
14
         print(agged)
15
 
15
 
16
 
16
 
17
 def main():
17
 def main():
18
-    wd = p.read_pickle()
19
-    cd = p.read_pickle()
18
+    wd = p.read_pickle("../data/2017-5k-wide.pkl")
19
+    cd = p.read_pickle("../data/5kclustable.pkl")
20
+    aggregator(wd, cd)
21
+
20
 
22
 
21
 
23
 
22
 if __name__ == "__main__":
24
 if __name__ == "__main__":

+ 1
- 1
py/clustering.py Parādīt failu

72
 def main():
72
 def main():
73
     parser = ArgumentParser(description='Cluster from pre-existing distance correlation matrix in pickled dataframe')
73
     parser = ArgumentParser(description='Cluster from pre-existing distance correlation matrix in pickled dataframe')
74
     parser.add_argument("-i", "--input",  dest="input",  help = "input pickle path; default: ../data/5kdcorrmatrix.pkl",  metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
74
     parser.add_argument("-i", "--input",  dest="input",  help = "input pickle path; default: ../data/5kdcorrmatrix.pkl",  metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
75
-    parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdcorrmatrix.pkl",  metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
75
+    parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdclustable.pkl",  metavar="[PATH]", default = "../data/5kdclustable.pkl")
76
     parser.add_argument("--method", dest="method", help = "clustering method; default 'ward'", metavar = "[METHOD]", default = "ward")
76
     parser.add_argument("--method", dest="method", help = "clustering method; default 'ward'", metavar = "[METHOD]", default = "ward")
77
     parser.add_argument("--clusters",  dest="numclusters",  help = "number of clusters; default: 9", metavar = "[NUM]", default = 9, type = int)
77
     parser.add_argument("--clusters",  dest="numclusters",  help = "number of clusters; default: 9", metavar = "[NUM]", default = 9, type = int)
78
     parser.add_argument("-d", "--dendrogram", dest = "incdendro", help = "draw dendrogram", action ="store_true")
78
     parser.add_argument("-d", "--dendrogram", dest = "incdendro", help = "draw dendrogram", action ="store_true")