from argparse import ArgumentParser, FileType import numpy as np import pandas as p import statsmodels.formula.api as smf import datetime as dt import pickle from tqdm import tqdm epoch = dt.datetime(2017, 1, 1) def thirtyoffset(date, epoch=epoch): """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time """ return (date - epoch).total_seconds() / (60 * 30) def harmonic(length, period, start=0, harmonics=1, prefix=""): x = np.arange(length) + start y = np.ndarray((harmonics * 2, length), dtype=np.float64) ns = [] nc = [] for i in range(harmonics): ysi = np.sin(x/period * 2 * np.pi * (i + 1)) yci = np.cos(x/period * 2 * np.pi * (i + 1)) y[i] = ysi ns.append("{}sin{}".format(prefix, i + 1)) y[i + harmonics] = yci nc.append("{}cos{}".format(prefix, i + 1)) y = p.DataFrame.from_records(y.transpose(), columns=ns + nc) return y def fitWeather(weather, harmonics): """Fit a weather model with a specified number of harmonics in order to detrend Returns models for minimum and maximum rolling temperature, along with a summary dataframe """ # Make sure weather dataset is complete wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq="30 min") wantedtimes = p.Series(wantedtimes.round(freq="30 min")) wanteddf = p.DataFrame(index=wantedtimes) weather = wanteddf.join(weather.set_index(['temp_timestamp']), how='left').drop('temp_date', axis=1) weather.index.name = "read_time" weather = weather.fillna(method='ffill', axis=0) # Add rolling columns weather['min_rolling'] = weather['tmin_c'].rolling("1D").min() weather['max_rolling'] = weather['tmax_c'].rolling("1D").max() # Calculate harmonics wharmstart = thirtyoffset(weather.index.min()) wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart, harmonics=harmonics) # Weather harmonic column names in format for model weatherharms = " + ".join(wharm.columns.tolist()) # Set up dataframe for modeling wharm.index = weather.index wharm['min_rolling'] = weather['min_rolling'] wharm['max_rolling'] = weather['max_rolling'] # Fitting models minmodel = smf.ols("min_rolling ~ " + weatherharms, data=wharm).fit() maxmodel = smf.ols("max_rolling ~ " + weatherharms, data=wharm).fit() # Residuals and fitted values sumdf = p.DataFrame(data={ "max_rolling": wharm["max_rolling"], "max_resid": maxmodel.resid, "max_fitted": maxmodel.fittedvalues, "min_rolling": wharm["min_rolling"], "min_resid": minmodel.resid, "min_fitted": minmodel.fittedvalues }) return minmodel, maxmodel, sumdf def predweather(model, datestart, dateend, harmonics=2): """Predict weather values from a weather model and date range Note: it's the residuals (true value - prediction) that go into the demand model """ drange = p.date_range(datestart, dateend, freq="30min") pharm = harmonic(len(drange), period=365.25 * 48, start=thirtyoffset(drange.min()), harmonics=harmonics) pharm.index = drange return model.predict(pharm) def fitdemand(df, wmodsum, harmonics=[2, 3, 3]): harmstart = thirtyoffset(df.index.min()) clusters = df.cluster.unique() clusters.sort() utv = df.index.unique() harmlen = len(utv) yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart, harmonics=harmonics[0], prefix="y") yharm.index = utv wharm = harmonic(harmlen, period=7 * 48, start=harmstart, harmonics=harmonics[1], prefix="w") wharm.index = utv dharm = harmonic(harmlen, period=48, start=harmstart, harmonics=harmonics[2], prefix="d") dharm.index = utv y_params = " + ".join(yharm.columns.tolist()) w_params = " + ".join(wharm.columns.tolist()) d_params = " + ".join(dharm.columns.tolist()) hcomb = p.concat([yharm, wharm, dharm], axis=1) modellist = {} modelstring = f"""kwh_tot_mean ~ {y_params} + {w_params} + {d_params} + ({y_params}):({d_params}) + ({w_params}):({d_params}) + max_resid + max_resid:({y_params}) + max_resid:({w_params}) + max_resid:({d_params}) + min_resid + min_resid:({y_params}) + min_resid:({w_params}) + min_resid:({d_params}) """.replace("\n", "").replace(" ", "") for c in tqdm(clusters): dfc = df[df['cluster'] == c].join(hcomb, how='left').join(wmodsum, how='left') # print(dfc) cmod = smf.ols(modelstring, data=dfc).fit() modellist[c] = cmod return modellist def main(): parser = ArgumentParser(description='Harmonic models of all ' 'clusters in specified pickle') parser.add_argument("-i", "--input", dest="input", help="input aggregated ICP pickle path", metavar="PATH", required=True, type=FileType('rb')) parser.add_argument("-w", "--weather", dest="weather", help="input weather pickle path", metavar="PATH", required=True, type=FileType('rb')) parser.add_argument("-m", "--model-file", dest="model_file", help="filename for model to store", required=True, type=FileType('wb')) parser.add_argument("--weather-harmonics", dest="weather_harmonics", help="number of harmonics for weather; default: 2", type=int, default=2, metavar="NUM") parser.add_argument("--icp-harmonics", dest="icp_harmonics", nargs=3, help="harmonics for icp fitting, default 2 3 3", default=[2, 3, 3], type=int, metavar="NUM") args = parser.parse_args() wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis=1) icpdat = p.read_pickle(args.input).set_index('read_time') minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics) clusters = icpdat.cluster.unique() clusters.sort() mods = { "icp": fitdemand(icpdat, sumdf, args.icp_harmonics), "min_temp": minm, "max_temp": maxm, "clusters": clusters, "weather_harmonics": args.weather_harmonics, "icp_harmonics": args.icp_harmonics } pickle.dump(mods, args.model_file) args.input.close() args.weather.close() args.model_file.close() if __name__ == "__main__": main()