1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- # Aggregrate given clusters
- from argparse import ArgumentParser
- import pandas as p
- from tqdm import tqdm
-
- def aggregator(widedf, clusdf):
- """Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe
- """
- clusters = clusdf['cluster'].unique()
- clusters.sort()
- dflis = []
- qlow = lambda x: x.quantile(0.250)
- qhigh = lambda x: x.quantile(0.750)
- for c in tqdm(clusters):
- icps = clusdf[clusdf.cluster == c].icp_id.unique()
- subdf = widedf[icps]
- aggmed = subdf.agg(func = 'median', axis = 1)
- aggmen = subdf.agg(func = 'mean', axis = 1)
- aggupq = subdf.agg(func = qlow, axis = 1)
- aggdwq = subdf.agg(func = qhigh, axis = 1)
- agged = p.DataFrame(data = {
- "cluster": c,
- "kwh_tot_median": aggmed,
- "kwh_tot_mean": aggmen,
- "kwh_tot_CI_low": aggupq,
- "kwh_tot_CI_high": aggdwq,
- }).reset_index()
- dflis.append(agged)
- adf = p.concat(dflis, axis = 0, ignore_index = True)
- return adf
-
-
- def main():
- parser = ArgumentParser(description='Aggregate dataframe by specified clusters')
- parser.add_argument("-i", "--input", dest="input", help = "input pickle path", metavar="PATH", required = True)
- parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path", metavar="PATH", required = True)
- parser.add_argument("-o", "--output", dest="output", help = "output pickle path", metavar="PATH", required = True)
- parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true")
- args = parser.parse_args()
- wd = p.read_pickle(args.input)
- cd = p.read_pickle(args.clusfile)
- if (args.istall):
- wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
-
- agged = aggregator(wd, cd)
- agged.to_pickle(args.output)
-
-
- if __name__ == "__main__":
- main()
|