# Aggregrate given clusters from argparse import ArgumentParser import pandas as p from tqdm import tqdm def aggregator(widedf, clusdf): """Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe """ clusters = clusdf['cluster'].unique() clusters.sort() dflis = [] qlow = lambda x: x.quantile(0.250) qhigh = lambda x: x.quantile(0.750) for c in tqdm(clusters): icps = clusdf[clusdf.cluster == c].icp_id.unique() subdf = widedf[icps] aggmed = subdf.agg(func = 'median', axis = 1) aggmen = subdf.agg(func = 'mean', axis = 1) aggupq = subdf.agg(func = qlow, axis = 1) aggdwq = subdf.agg(func = qhigh, axis = 1) agged = p.DataFrame(data = { "cluster": c, "kwh_tot_median": aggmed, "kwh_tot_mean": aggmen, "kwh_tot_CI_low": aggupq, "kwh_tot_CI_high": aggdwq, }).reset_index() dflis.append(agged) adf = p.concat(dflis, axis = 0, ignore_index = True) return adf def main(): parser = ArgumentParser(description='Aggregate dataframe by specified clusters') parser.add_argument("-i", "--input", dest="input", help = "input pickle path; default: ../data/2017-5k-wide.pkl", metavar="PATH", default = "../data/2017-5k-wide.pkl") parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path; default: ../data/5kclustable.pkl", metavar="PATH", default = "../data/5kclustable.pkl") parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5k-ag.pkl", metavar="PATH", default = "../data/5k-ag.pkl") parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true") args = parser.parse_args() wd = p.read_pickle(args.input) cd = p.read_pickle(args.clusfile) if (args.istall): wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot') agged = aggregator(wd, cd) agged.to_pickle(args.output) if __name__ == "__main__": main()