Repository for Petra's work at ampli Jan-Feb 2019

dcorr.py 1.8KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. # Takes the correlation part out of clustering file
  2. from argparse import ArgumentParser
  3. import numpy as np
  4. import pandas as p
  5. from tqdm import tqdm
  6. from itertools import combinations
  7. from math import factorial as f
  8. def tqcorr(df):
  9. """Calculates a (distance) correlation matrix for wide-form dataframe, with progressbar
  10. """
  11. cols = df.columns
  12. ncols = len(cols)
  13. cdf = p.DataFrame(index = cols, columns = cols, dtype = np.float16)
  14. for c in tqdm(cols):
  15. cdf.loc[c, c] = 0
  16. comb = combinations(cols, 2)
  17. ncomb = f(ncols) // f(2) // f(ncols - 2)
  18. for c1, c2 in tqdm(comb, total = ncomb):
  19. dv = 1 - df[c1].corr(df[c2])
  20. cdf.loc[c1, c2] = dv
  21. cdf.loc[c2, c1] = dv
  22. print(cdf.info())
  23. return cdf
  24. def createCorr(source, output, piv):
  25. """Load a pkl in wide form from source, process, run tqcorr() and save response to output
  26. """
  27. df = p.read_pickle(source)
  28. if piv:
  29. df = df.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
  30. df = df[df.columns[df.max() != df.min()]]
  31. cmat = tqcorr(df)
  32. cmat.to_pickle(output)
  33. if __name__ == "__main__":
  34. parser = ArgumentParser(description='Create distance correlation matrix from pickled wideform pandas dataframe')
  35. parser.add_argument("-i", "--input", dest="input", help = "input pickle path; default: ../data/2017-5k-wide.pkl", metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
  36. parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdcorrmatrix.pkl", metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
  37. parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true")
  38. args = parser.parse_args()
  39. createCorr(args.input, args.output, args.istall)