12345678910111213141516171819202122232425 |
- # Aggregrate given clusters
- import pandas as p
-
- def aggregator(widedf, clusdf):
- clusters = clusdf['cluster'].unique()
- clusters.sort()
- aggv = {}
- qlow = lambda x: x.quantile(0.250)
- qhigh = lambda x: x.quantile(0.750)
- for c in clusters:
- icps = clusdf[clusdf.cluster == c].icp_id.values
- subdf = widedf[icps]
- agged = subdf.agg(func = 'median', axis = 1)
- print(agged)
-
-
- def main():
- wd = p.read_pickle("../data/2017-5k-wide.pkl")
- cd = p.read_pickle("../data/5kclustable.pkl")
- aggregator(wd, cd)
-
-
-
- if __name__ == "__main__":
- main()
|