123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- from argparse import ArgumentParser, FileType
- import numpy as np
- import pandas as p
- import statsmodels.formula.api as smf
- import datetime as dt
- import pickle
-
- epoch = dt.datetime(2017, 1, 1)
-
-
- def thirtyoffset(date, epoch=epoch):
- """Get the offset from the epoch, in multiples of 30 minutes,
- for a particular date/time
- """
- return (date - epoch).total_seconds() / (60 * 30)
-
-
- def harmonic(length, period, start=0, harmonics=1, prefix=""):
- x = np.arange(length) + start
- y = np.ndarray((harmonics * 2, length), dtype=np.float64)
- ns = []
- nc = []
- for i in range(harmonics):
- ysi = np.sin(x/period * 2 * np.pi * (i + 1))
- yci = np.cos(x/period * 2 * np.pi * (i + 1))
- y[i] = ysi
- ns.append("{}sin{}".format(prefix, i + 1))
- y[i + harmonics] = yci
- nc.append("{}cos{}".format(prefix, i + 1))
- y = p.DataFrame.from_records(y.transpose(), columns=ns + nc)
- return y
-
-
- def fitWeather(weather, harmonics):
- """Fit a weather model with a specified number of harmonics
- in order to detrend Returns models for minimum and maximum
- rolling temperature, along with a summary dataframe
- """
- # Make sure weather dataset is complete
- wantedtimes = p.date_range(weather.temp_timestamp.min(),
- weather.temp_timestamp.max(), freq="30 min")
- wantedtimes = p.Series(wantedtimes.round(freq="30 min"))
- wanteddf = p.DataFrame(index=wantedtimes)
- weather = wanteddf.join(weather.set_index(['temp_timestamp']),
- how='left').drop('temp_date', axis=1)
- weather.index.name = "read_time"
- weather = weather.fillna(method='ffill', axis=0)
-
- # Add rolling columns
- weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
- weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
-
- # Calculate harmonics
- wharmstart = thirtyoffset(weather.index.min())
- wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart,
- harmonics=harmonics)
-
- # Weather harmonic column names in format for model
- weatherharms = " + ".join(wharm.columns.tolist())
-
- # Set up dataframe for modeling
- wharm.index = weather.index
- wharm['min_rolling'] = weather['min_rolling']
- wharm['max_rolling'] = weather['max_rolling']
-
- # Fitting models
- minmodel = smf.ols("min_rolling ~ " + weatherharms, data=wharm).fit()
- maxmodel = smf.ols("max_rolling ~ " + weatherharms, data=wharm).fit()
-
- # Residuals and fitted values
- sumdf = p.DataFrame(data={
- "max_rolling": wharm["max_rolling"],
- "max_resid": maxmodel.resid,
- "max_fitted": maxmodel.fittedvalues,
- "min_rolling": wharm["min_rolling"],
- "min_resid": minmodel.resid,
- "min_fitted": minmodel.fittedvalues
- })
-
- return minmodel, maxmodel, sumdf
-
-
- def predweather(model, datestart, dateend, harmonics=2):
- """Predict weather values from a weather model and date range
- Note: it's the residuals (true value - prediction) that go
- into the demand model
- """
- drange = p.date_range(datestart, dateend, freq="30min")
- pharm = harmonic(len(drange),
- period=365.25 * 48,
- start=thirtyoffset(drange.min()),
- harmonics=harmonics)
- pharm.index = drange
- return model.predict(pharm)
-
-
- def fitdemand(df, wmodsum, harmonics=[2, 3, 3]):
- harmstart = thirtyoffset(df.index.min())
- clusters = df.cluster.unique()
- clusters.sort()
- utv = df.index.unique()
- harmlen = len(utv)
-
- yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart,
- harmonics=harmonics[0], prefix="y")
- yharm.index = utv
-
- wharm = harmonic(harmlen, period=7 * 48, start=harmstart,
- harmonics=harmonics[1], prefix="w")
- wharm.index = utv
-
- dharm = harmonic(harmlen, period=48, start=harmstart,
- harmonics=harmonics[2], prefix="d")
- dharm.index = utv
-
- y_params = " + ".join(yharm.columns.tolist())
- w_params = " + ".join(wharm.columns.tolist())
- d_params = " + ".join(dharm.columns.tolist())
-
- hcomb = p.concat([yharm, wharm, dharm], axis=1)
-
- modellist = {}
-
- modelstring = f"""kwh_tot_mean ~ {y_params} + {w_params} + {d_params}
- + ({y_params}):({d_params})
- + ({w_params}):({d_params})
- + max_resid + max_resid:({y_params})
- + max_resid:({w_params}) + max_resid:({d_params})
- + min_resid + min_resid:({y_params})
- + min_resid:({w_params}) + min_resid:({d_params})
- """.replace("\n", "").replace(" ", "")
-
- for c in clusters:
- dfc = df[df['cluster'] == c].join(hcomb,
- how='left').join(wmodsum,
- how='left')
- # print(dfc)
- cmod = smf.ols(modelstring, data=dfc).fit()
- modellist[c] = cmod
-
- return modellist
-
-
- def main():
- parser = ArgumentParser(description='Harmonic models of all '
- 'clusters in specified pickle')
- parser.add_argument("-i", "--input", dest="input",
- help="input aggregated ICP pickle path",
- metavar="PATH", required=True,
- type=FileType('rb'))
- parser.add_argument("-w", "--weather", dest="weather",
- help="input weather pickle path",
- metavar="PATH", required=True,
- type=FileType('rb'))
- parser.add_argument("-m", "--model-file", dest="model_file",
- help="filename for model to store",
- required=True,
- type=FileType('wb'))
- parser.add_argument("--weather-harmonics", dest="weather_harmonics",
- help="number of harmonics for weather; default: 2",
- type=int, default=2, metavar="NUM")
- parser.add_argument("--icp-harmonics", dest="icp_harmonics", nargs=3,
- help="harmonics for icp fitting, default 2 3 3",
- default=[2, 3, 3], type=int,
- metavar="NUM")
- args = parser.parse_args()
-
- wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis=1)
-
- icpdat = p.read_pickle(args.input).set_index('read_time')
-
- minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
-
- clusters = icpdat.cluster.unique()
- clusters.sort()
-
- mods = {
- "icp": fitdemand(icpdat, sumdf, args.icp_harmonics),
- "min_temp": minm,
- "max_temp": maxm,
- "clusters": clusters,
- "weather_harmonics": args.weather_harmonics,
- "icp_harmonics": args.icp_harmonics
- }
-
- pickle.dump(mods, args.model_file)
-
- args.input.close()
- args.weather.close()
- args.model_file.close()
-
-
- if __name__ == "__main__":
- main()
|