petra
/
ampli


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
							# Aggregrate given clusters
from argparse import ArgumentParser
import pandas as p
from tqdm import tqdm

def aggregator(widedf, clusdf):
    """Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe
    """
    clusters = clusdf['cluster'].unique()
    clusters.sort()
    dflis = []
    qlow  = lambda x: x.quantile(0.250)
    qhigh = lambda x: x.quantile(0.750)
    for c in tqdm(clusters):
        icps = clusdf[clusdf.cluster == c].icp_id.unique()
        subdf = widedf[icps]
        aggmed = subdf.agg(func = 'median', axis = 1)
        aggmen = subdf.agg(func = 'mean', axis = 1)
        aggupq = subdf.agg(func = qlow, axis = 1)
        aggdwq = subdf.agg(func = qhigh, axis = 1)
        agged = p.DataFrame(data = {
            "cluster":         c,
            "kwh_tot_median":  aggmed,
            "kwh_tot_mean":    aggmen,
            "kwh_tot_CI_low":  aggupq,
            "kwh_tot_CI_high": aggdwq,
            }).reset_index()
        dflis.append(agged)
    adf = p.concat(dflis, axis = 0, ignore_index = True)
    return adf


def main():
    parser = ArgumentParser(description='Aggregate dataframe by specified clusters')
    parser.add_argument("-i", "--input",  dest="input",      help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="PATH", default = "../data/2017-5k-wide.pkl")
    parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path; default: ../data/5kclustable.pkl", metavar="PATH", default = "../data/5kclustable.pkl")
    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/5k-ag.pkl", metavar="PATH", default = "../data/5k-ag.pkl")
    parser.add_argument("-p", "--pivot", dest = "istall",    help = "input dataframe is in tall format and must be pivoted", action ="store_true")
    args = parser.parse_args()
    wd = p.read_pickle(args.input)
    cd = p.read_pickle(args.clusfile)
    if (args.istall):
        wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')

    agged = aggregator(wd, cd)
    agged.to_pickle(args.output)


if __name__ == "__main__":
    main()