# Aggregrate given clusters from argparse import ArgumentParser import pandas as p from tqdm import tqdm def aggregator(widedf, clusdf, drop_misc = False): """Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe """ clusters = list(clusdf['cluster'].unique()) if drop_misc and -1 in clusters: clusters.remove(-1) clusters.sort() dflis = [] qlow = lambda x: x.quantile(0.250) qhigh = lambda x: x.quantile(0.750) for c in tqdm(clusters): icps = clusdf[clusdf.cluster == c].icp_id.unique() subdf = widedf[icps] aggmed = subdf.agg(func = 'median', axis = 1) aggmen = subdf.agg(func = 'mean', axis = 1) aggupq = subdf.agg(func = qlow, axis = 1) aggdwq = subdf.agg(func = qhigh, axis = 1) agged = p.DataFrame(data = { "cluster": c, "kwh_tot_median": aggmed, "kwh_tot_mean": aggmen, "kwh_tot_CI_low": aggupq, "kwh_tot_CI_high": aggdwq, }).reset_index() dflis.append(agged) adf = p.concat(dflis, axis = 0, ignore_index = True) return adf def main(): parser = ArgumentParser(description='Aggregate dataframe by specified clusters') parser.add_argument("-i", "--input", dest="input", help = "input pickle path", metavar="PATH", required = True) parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path", metavar="PATH", required = True) parser.add_argument("-o", "--output", dest="output", help = "output pickle path", metavar="PATH", required = True) parser.add_argument("-d", "--drop-misc", dest="drop_misc", help = "drop 'misc' (-1) pseudocluster", action = "store_true") parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true") args = parser.parse_args() wd = p.read_pickle(args.input) cd = p.read_pickle(args.clusfile) if (args.istall): wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot') agged = aggregator(wd, cd, args.drop_misc) agged.to_pickle(args.output) if __name__ == "__main__": main()