Repository for Petra's work at ampli Jan-Feb 2019

agg.py 598B

12345678910111213141516171819202122232425
  1. # Aggregrate given clusters
  2. import pandas as p
  3. def aggregator(widedf, clusdf):
  4. clusters = clusdf['cluster'].unique()
  5. clusters.sort()
  6. aggv = {}
  7. qlow = lambda x: x.quantile(0.250)
  8. qhigh = lambda x: x.quantile(0.750)
  9. for c in clusters:
  10. icps = clusdf[clusdf.cluster == c].icp_id.values
  11. subdf = widedf[icps]
  12. agged = subdf.agg(func = 'median', axis = 1)
  13. print(agged)
  14. def main():
  15. wd = p.read_pickle("../data/2017-5k-wide.pkl")
  16. cd = p.read_pickle("../data/5kclustable.pkl")
  17. aggregator(wd, cd)
  18. if __name__ == "__main__":
  19. main()