from argparse import ArgumentParser, FileType import numpy as np import pandas as p import statsmodels.formula.api as smf import statsmodels.api as sm import datetime as dt epoch = dt.datetime(2017, 1, 1) def thirtyoffset(date, epoch = epoch): """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time """ return (date - epoch).total_seconds() / (60 * 30) def harmonic(length, period, start = 0, harmonics = 1, prefix = ""): x = np.arange(length) + start y = np.ndarray((harmonics * 2, length), dtype = np.float64) ns = [] nc = [] for i in range(harmonics): ysi = np.sin(x/period * 2 * np.pi * (i + 1)) yci = np.cos(x/period * 2 * np.pi * (i + 1)) y[i] = ysi ns.append("{}sin{}".format(prefix, i + 1)) y[i + harmonics] = yci nc.append("{}cos{}".format(prefix, i + 1)) y = p.DataFrame.from_records(y.transpose(), columns = ns + nc) return y def fitWeather(weather, harmonics): """Fit a weather model with a specified number of harmonics in order to detrend Returns models for minimum and maximum rolling temperature, along with a summary dataframe """ # Make sure weather dataset is complete wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq = "30 min") wantedtimes = p.Series(wantedtimes.round(freq = "30 min")) wanteddf = p.DataFrame(index = wantedtimes) weather = wanteddf.join(weather.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1) weather.index.name = "read_time" weather = weather.fillna(method = 'ffill', axis = 0) # Add rolling columns weather['min_rolling'] = weather['tmin_c'].rolling("1D").min() weather['max_rolling'] = weather['tmax_c'].rolling("1D").max() # Calculate harmonics wharmstart = thirtyoffset(weather.index.min()) wharm = harmonic(weather.shape[0], period = 365.25 * 48, start = wharmstart, harmonics = harmonics) # Weather harmonic column names in format for model weatherharms = " + ".join(wharm.columns.tolist()) # Set up dataframe for modeling wharm.index = weather.index wharm['min_rolling'] = weather['min_rolling'] wharm['max_rolling'] = weather['max_rolling'] # Fitting models minmodel = smf.ols("min_rolling ~ " + weatherharms, data = wharm).fit() maxmodel = smf.ols("max_rolling ~ " + weatherharms, data = wharm).fit() # Residuals and fitted values sumdf = p.DataFrame(data = { "max_rolling" : wharm["max_rolling"], "max_resid" : maxmodel.resid, "max_fitted" : maxmodel.fittedvalues, "min_rolling" : wharm["min_rolling"], "min_resid" : minmodel.resid, "min_fitted" : minmodel.fittedvalues }) return minmodel, maxmodel, sumdf def predweather(model, datestart, dateend, harmonics = 2): """Predict weather values from a weather model and date range Note: it's the residuals (true value - prediction) that go into the demand model """ drange = p.date_range(datestart, dateend, freq = "30min") pharm = harmonic(len(drange), period = 365.25 * 48, start = thirtyoffset(drange.min()), harmonics = harmonics) pharm.index = drange return model.predict(pharm) def fitdemand(df, harmonics = [2, 3, 3]): print(harmonics) def main(): parser = ArgumentParser(description='Harmonic models of all clusters in specified pickle') parser.add_argument("-i", "--input", dest="input", help = "input aggregated ICP pickle path", metavar="PATH", default = "../data/test1kagg.pkl", type = FileType('rb')) parser.add_argument("-w", "--weather", dest="weather", help = "input weather pickle path", metavar="PATH", default = "../data/weathertest.pkl", type = FileType('rb')) parser.add_argument("--weather-harmonics", dest="weather_harmonics", help = "number of harmonics for weather; default: 2", type = int, default = 2, metavar = "NUM") parser.add_argument("--icp-harmonics", dest = "icp_harmonics", nargs=3, help = "harmonics for icp fitting, default 2 3 3", default = [2, 3, 3], type = int, metavar = "NUM") args = parser.parse_args() print(args) wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis = 1) icpdat = p.read_pickle(args.input).set_index('read_time') minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics) icpdat = icpdat.join(sumdf, how = 'left') # fitdemand(icpdat, args # print(predweather(minm, "2019-01-01", "2019-02-01", args.weather_harmonics)) args.input.close() args.weather.close() if __name__ == "__main__": main()