|
@@ -3,10 +3,12 @@ from argparse import ArgumentParser
|
3
|
3
|
import pandas as p
|
4
|
4
|
from tqdm import tqdm
|
5
|
5
|
|
6
|
|
-def aggregator(widedf, clusdf):
|
|
6
|
+def aggregator(widedf, clusdf, drop_misc = False):
|
7
|
7
|
"""Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe
|
8
|
8
|
"""
|
9
|
|
- clusters = clusdf['cluster'].unique()
|
|
9
|
+ clusters = list(clusdf['cluster'].unique())
|
|
10
|
+ if drop_misc and -1 in clusters:
|
|
11
|
+ clusters.remove(-1)
|
10
|
12
|
clusters.sort()
|
11
|
13
|
dflis = []
|
12
|
14
|
qlow = lambda x: x.quantile(0.250)
|
|
@@ -35,6 +37,7 @@ def main():
|
35
|
37
|
parser.add_argument("-i", "--input", dest="input", help = "input pickle path", metavar="PATH", required = True)
|
36
|
38
|
parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path", metavar="PATH", required = True)
|
37
|
39
|
parser.add_argument("-o", "--output", dest="output", help = "output pickle path", metavar="PATH", required = True)
|
|
40
|
+ parser.add_argument("-d", "--drop-misc", dest="drop_misc", help = "drop 'misc' (-1) pseudocluster", action = "store_true")
|
38
|
41
|
parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true")
|
39
|
42
|
args = parser.parse_args()
|
40
|
43
|
wd = p.read_pickle(args.input)
|
|
@@ -42,7 +45,7 @@ def main():
|
42
|
45
|
if (args.istall):
|
43
|
46
|
wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
|
44
|
47
|
|
45
|
|
- agged = aggregator(wd, cd)
|
|
48
|
+ agged = aggregator(wd, cd, args.drop_misc)
|
46
|
49
|
agged.to_pickle(args.output)
|
47
|
50
|
|
48
|
51
|
|