Repository for Petra's work at ampli Jan-Feb 2019

agg.py 2.2KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # Aggregrate given clusters
  2. from argparse import ArgumentParser
  3. import pandas as p
  4. from tqdm import tqdm
  5. def aggregator(widedf, clusdf, drop_misc = False):
  6. """Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe
  7. """
  8. clusters = list(clusdf['cluster'].unique())
  9. if drop_misc and -1 in clusters:
  10. clusters.remove(-1)
  11. clusters.sort()
  12. dflis = []
  13. qlow = lambda x: x.quantile(0.250)
  14. qhigh = lambda x: x.quantile(0.750)
  15. for c in tqdm(clusters):
  16. icps = clusdf[clusdf.cluster == c].icp_id.unique()
  17. subdf = widedf[icps]
  18. aggmed = subdf.agg(func = 'median', axis = 1)
  19. aggmen = subdf.agg(func = 'mean', axis = 1)
  20. aggupq = subdf.agg(func = qlow, axis = 1)
  21. aggdwq = subdf.agg(func = qhigh, axis = 1)
  22. agged = p.DataFrame(data = {
  23. "cluster": c,
  24. "kwh_tot_median": aggmed,
  25. "kwh_tot_mean": aggmen,
  26. "kwh_tot_CI_low": aggupq,
  27. "kwh_tot_CI_high": aggdwq,
  28. }).reset_index()
  29. dflis.append(agged)
  30. adf = p.concat(dflis, axis = 0, ignore_index = True)
  31. return adf
  32. def main():
  33. parser = ArgumentParser(description='Aggregate dataframe by specified clusters')
  34. parser.add_argument("-i", "--input", dest="input", help = "input pickle path", metavar="PATH", required = True)
  35. parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path", metavar="PATH", required = True)
  36. parser.add_argument("-o", "--output", dest="output", help = "output pickle path", metavar="PATH", required = True)
  37. parser.add_argument("-d", "--drop-misc", dest="drop_misc", help = "drop 'misc' (-1) pseudocluster", action = "store_true")
  38. parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true")
  39. args = parser.parse_args()
  40. wd = p.read_pickle(args.input)
  41. cd = p.read_pickle(args.clusfile)
  42. if (args.istall):
  43. wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
  44. agged = aggregator(wd, cd, args.drop_misc)
  45. agged.to_pickle(args.output)
  46. if __name__ == "__main__":
  47. main()