Repository for Petra's work at ampli Jan-Feb 2019

clusAssign.py 615B

12345678910111213141516171819202122
  1. # An algorithm for assigning a dataset to pre-existing clusters
  2. import pandas as p
  3. # Pre-existing aggregated clusters
  4. clusfile = '../data/9-clusters.agg.pkl'
  5. # A new dataset
  6. ndsfile = '../data/2016-17-sample.pkl'
  7. clusdf = p.read_pickle(clusfile)
  8. clusdf = clusdf.pivot(index = 'read_time', columns = 'cluster', values = 'kwh_tot_mean')
  9. del clusdf.columns.name
  10. print(clusdf.info())
  11. newdf = p.read_pickle(ndsfile).pivot(index = 'read_time',
  12. columns = 'icp_id',
  13. values = 'kwh_tot').loc[clusdf.index, :]
  14. print(newdf)
  15. print(newdf.info())