Repository for Petra's work at ampli Jan-Feb 2019

collate.py 503B

1234567891011121314151617181920
  1. # Collate 12 dataframes into one (wide) combined dataframe
  2. import pandas as p
  3. import gc
  4. from tqdm import tqdm
  5. months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
  6. coldf = p.read_pickle('../data/2017-{}-5k.pkl'.format(months[0]))
  7. for i in tqdm(range(1, 12)):
  8. tdf = p.read_pickle('../data/2017-{}-5k.pkl'.format(months[i]))
  9. coldf = p.concat([coldf, tdf])
  10. del tdf
  11. gc.collect()
  12. print(coldf.info())
  13. coldf.to_pickle('../data/2017-5k-wide.pkl')