Browse Source

Several fixes for python scripts

Petra Lamborn 5 years ago
parent
commit
dd218c8faf
6 changed files with 22 additions and 23 deletions
  1. BIN
      img/test1kdendro.png
  2. 3
    3
      py/agg.py
  3. 7
    7
      py/clustering.py
  4. 2
    3
      py/dcorr.py
  5. 6
    6
      py/downkwh.py
  6. 4
    4
      py/downweather.py

BIN
img/test1kdendro.png View File


+ 3
- 3
py/agg.py View File

@@ -32,9 +32,9 @@ def aggregator(widedf, clusdf):
32 32
 
33 33
 def main():
34 34
     parser = ArgumentParser(description='Aggregate dataframe by specified clusters')
35
-    parser.add_argument("-i", "--input",  dest="input",      help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
36
-    parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path; default: ../data/5kclustable.pkl", metavar="[PATH]", default = "../data/5kclustable.pkl")
37
-    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/5k-ag.pkl", metavar="[PATH]", default = "../data/5k-ag.pkl")
35
+    parser.add_argument("-i", "--input",  dest="input",      help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="PATH", default = "../data/2017-5k-wide.pkl")
36
+    parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path; default: ../data/5kclustable.pkl", metavar="PATH", default = "../data/5kclustable.pkl")
37
+    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/5k-ag.pkl", metavar="PATH", default = "../data/5k-ag.pkl")
38 38
     parser.add_argument("-p", "--pivot", dest = "istall",    help = "input dataframe is in tall format and must be pivoted", action ="store_true")
39 39
     args = parser.parse_args()
40 40
     wd = p.read_pickle(args.input)

+ 7
- 7
py/clustering.py View File

@@ -19,8 +19,8 @@ def cluster(dcmat, method, nclusters):
19 19
     """
20 20
     lmat = squareform(dcmat)
21 21
     lobj = linkage(lmat, method = method)
22
-    clabs = [x + 1 for x in range(numclusts)]
23
-    clusts = fcluster(lobj, numclusts, criterion='maxclust')
22
+    clabs = [x + 1 for x in range(nclusters)]
23
+    clusts = fcluster(lobj, nclusters, criterion='maxclust')
24 24
     clustdf = p.DataFrame({'icp_id' : dcmat.index.values, 'cluster' : clusts})
25 25
     return lobj, clustdf
26 26
 
@@ -55,12 +55,12 @@ def dendro(lobj, clustdf, numclusts, icps, fname):
55 55
 
56 56
 def main():
57 57
     parser = ArgumentParser(description='Cluster from pre-existing distance correlation matrix in pickled dataframe')
58
-    parser.add_argument("-i", "--input",  dest="input",  help = "input pickle path; default: ../data/5kdcorrmatrix.pkl",  metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
59
-    parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdclustable.pkl",  metavar="[PATH]", default = "../data/5kdclustable.pkl")
60
-    parser.add_argument("--method", dest="method", help = "clustering method; default 'ward'", metavar = "[METHOD]", default = "ward")
61
-    parser.add_argument("--clusters",  dest="numclusters",  help = "number of clusters; default: 9", metavar = "[NUM]", default = 9, type = int)
58
+    parser.add_argument("-i", "--input",  dest="input",  help = "input pickle path; default: ../data/5kdcorrmatrix.pkl",  metavar="PATH", default = "../data/5kdcorrmatrix.pkl")
59
+    parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdclustable.pkl",  metavar="PATH", default = "../data/5kdclustable.pkl")
60
+    parser.add_argument("--method", dest="method", help = "clustering method; default 'ward'", metavar = "METHOD", default = "ward")
61
+    parser.add_argument("--clusters",  dest="numclusters",  help = "number of clusters; default: 9", metavar = "NUM", default = 9, type = int)
62 62
     parser.add_argument("-d", "--dendrogram", dest = "incdendro", help = "draw dendrogram", action ="store_true")
63
-    parser.add_argument("-t", "--tree", dest="treepath", help="Filename for dendrogram (if -d), default: ../img/59-9-dendro.png", metavar="[PATH]", default = "../img/5k-9-dendro.png")
63
+    parser.add_argument("-t", "--tree", dest="treepath", help="Filename for dendrogram (if -d), default: ../img/59-9-dendro.png", metavar="PATH", default = "../img/5k-9-dendro.png")
64 64
     args = parser.parse_args()
65 65
 
66 66
     print("Clustering")

+ 2
- 3
py/dcorr.py View File

@@ -20,7 +20,6 @@ def tqcorr(df):
20 20
         dv = 1 - df[c1].corr(df[c2])
21 21
         cdf.loc[c1, c2] = dv
22 22
         cdf.loc[c2, c1] = dv
23
-    print(cdf.info())
24 23
     return cdf
25 24
 
26 25
 
@@ -37,8 +36,8 @@ def createCorr(source, output, piv):
37 36
 
38 37
 if __name__ == "__main__":
39 38
     parser = ArgumentParser(description='Create distance correlation matrix from pickled wideform pandas dataframe')
40
-    parser.add_argument("-i", "--input",  dest="input",   help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
41
-    parser.add_argument("-o", "--output", dest="output",  help = "output pickle path; default: ../data/5kdcorrmatrix.pkl", metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
39
+    parser.add_argument("-i", "--input",  dest="input",   help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="PATH", default = "../data/2017-5k-wide.pkl")
40
+    parser.add_argument("-o", "--output", dest="output",  help = "output pickle path; default: ../data/5kdcorrmatrix.pkl", metavar="PATH", default = "../data/5kdcorrmatrix.pkl")
42 41
     parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true")
43 42
     args = parser.parse_args()
44 43
     createCorr(args.input, args.output, args.istall)

+ 6
- 6
py/downkwh.py View File

@@ -112,12 +112,12 @@ def collateddownload(startd, endd, numdivis, icp_tab, pivot, verbose):
112 112
 
113 113
 
114 114
 def main():
115
-    parser = ArgumentParser(description='Download kwh data from dataframe')
116
-    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/2017-5k-wide.pkl", metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
117
-    parser.add_argument("-s", "--start-date", dest = "startdate", help = "start date for download; format: YYYY-MM-DD; default: 2017-01-01", metavar="[DATE]", default = "2017-01-01", type = datevalid)
118
-    parser.add_argument("-e", "--end-date", dest = "enddate", help = "end date for download; format: YYYY-MM-DD; default: 2018-01-01", metavar="[DATE]", default = "2018-01-01", type = datevalid)
119
-    parser.add_argument("-t", "--table", dest = "table", help = "table for download (constrained to specific values in source); default: public.icp_sample", metavar="[TABLE]", default = "public.icp_sample", choices = tables)
120
-    parser.add_argument("-n", "--num-div",  dest="numdiv",  help = "number of segments to divide download into", metavar = "[NUM]", default = 12, type = int)
115
+    parser = ArgumentParser(description='Download kwh data from database')
116
+    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/2017-5k-wide.pkl", metavar="PATH", default = "../data/2017-5k-wide.pkl")
117
+    parser.add_argument("-s", "--start-date", dest = "startdate", help = "start date for download; format: YYYY-MM-DD; default: 2017-01-01", metavar="DATE", default = "2017-01-01", type = datevalid)
118
+    parser.add_argument("-e", "--end-date", dest = "enddate", help = "end date for download; format: YYYY-MM-DD; default: 2018-01-01", metavar="DATE", default = "2018-01-01", type = datevalid)
119
+    parser.add_argument("-t", "--table", dest = "table", help = "table for download (constrained to specific values in source); default: public.icp_sample", metavar="TABLE", default = "public.icp_sample", choices = tables)
120
+    parser.add_argument("-n", "--num-div",  dest="numdiv",  help = "number of segments to divide download into", metavar = "NUM", default = 12, type = int)
121 121
     parser.add_argument("--no-pivot", dest = "pivot",    help = "output dataframe in tall (non-pivoted) format", action ="store_false")
122 122
     parser.add_argument("-v", "--verbose", dest = "verbose", action ="store_true")
123 123
     args = parser.parse_args()

+ 4
- 4
py/downweather.py View File

@@ -5,10 +5,10 @@ import pandas as p
5 5
 
6 6
 def main():
7 7
     parser = ArgumentParser(description='Download kwh data from dataframe')
8
-    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/2016-18-weather.pkl", metavar="[PATH]", default = "../data/2016-18-weather.pkl")
9
-    parser.add_argument("-s", "--start-date", dest = "startdate", help = "start date for download; format: YYYY-MM-DD; default: 2016-04-01", metavar="[DATE]", default = "2016-04-01", type = datevalid)
10
-    parser.add_argument("-e", "--end-date", dest = "enddate", help = "end date for download; format: YYYY-MM-DD; default: 2019-01-01", metavar="[DATE]", default = "2019-01-01", type = datevalid)
11
-    parser.add_argument("--station", dest = "station", help = "weather station to get data from; default: 2006", metavar="[STATION]", default = "2006")
8
+    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/2016-18-weather.pkl", metavar="PATH", default = "../data/2016-18-weather.pkl")
9
+    parser.add_argument("-s", "--start-date", dest = "startdate", help = "start date for download; format: YYYY-MM-DD; default: 2016-04-01", metavar="DATE", default = "2016-04-01", type = datevalid)
10
+    parser.add_argument("-e", "--end-date", dest = "enddate", help = "end date for download; format: YYYY-MM-DD; default: 2019-01-01", metavar="DATE", default = "2019-01-01", type = datevalid)
11
+    parser.add_argument("--station", dest = "station", help = "weather station to get data from; default: 2006", metavar="STATION", default = "2006")
12 12
     parser.add_argument("-v", "--verbose", dest = "verbose", action ="store_true")
13 13
     args = parser.parse_args()
14 14
     tempdata = gettemp(args.startdate.strftime("%Y-%m-%d"), args.enddate.strftime("%Y-%m-%d"), args.station, args.verbose)