# Aggregrate given clusters
from argparse import ArgumentParser
import pandas as p
from tqdm import tqdm

def aggregator(widedf, clusdf, drop_misc = False):
    """Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe
    """
    clusters = list(clusdf['cluster'].unique())
    if drop_misc and -1 in clusters:
        clusters.remove(-1)
    clusters.sort()
    dflis = []
    qlow  = lambda x: x.quantile(0.250)
    qhigh = lambda x: x.quantile(0.750)
    for c in tqdm(clusters):
        icps = clusdf[clusdf.cluster == c].icp_id.unique()
        subdf = widedf[icps]
        aggmed = subdf.agg(func = 'median', axis = 1)
        aggmen = subdf.agg(func = 'mean', axis = 1)
        aggupq = subdf.agg(func = qlow, axis = 1)
        aggdwq = subdf.agg(func = qhigh, axis = 1)
        agged = p.DataFrame(data = {
            "cluster":         c,
            "kwh_tot_median":  aggmed,
            "kwh_tot_mean":    aggmen,
            "kwh_tot_CI_low":  aggupq,
            "kwh_tot_CI_high": aggdwq,
            }).reset_index()
        dflis.append(agged)
    adf = p.concat(dflis, axis = 0, ignore_index = True)
    return adf


def main():
    parser = ArgumentParser(description='Aggregate dataframe by specified clusters')
    parser.add_argument("-i", "--input",  dest="input",      help = "input pickle path",  metavar="PATH", required = True)
    parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path", metavar="PATH", required = True)
    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path", metavar="PATH", required = True)
    parser.add_argument("-d", "--drop-misc", dest="drop_misc", help = "drop 'misc' (-1) pseudocluster", action = "store_true")
    parser.add_argument("-p", "--pivot", dest = "istall",    help = "input dataframe is in tall format and must be pivoted", action ="store_true")
    args = parser.parse_args()
    wd = p.read_pickle(args.input)
    cd = p.read_pickle(args.clusfile)
    if (args.istall):
        wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')

    agged = aggregator(wd, cd, args.drop_misc)
    agged.to_pickle(args.output)


if __name__ == "__main__":
    main()