123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- from argparse import ArgumentParser, FileType
- import numpy as np
- import pandas as p
- import statsmodels.formula.api as smf
- import statsmodels.api as sm
- import datetime as dt
-
- epoch = dt.datetime(2017, 1, 1)
-
- def thirtyoffset(date, epoch = epoch):
- """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time
- """
- return (date - epoch).total_seconds() / (60 * 30)
-
- def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
- x = np.arange(length) + start
- y = np.ndarray((harmonics * 2, length), dtype = np.float64)
- ns = []
- nc = []
- for i in range(harmonics):
- ysi = np.sin(x/period * 2 * np.pi * (i + 1))
- yci = np.cos(x/period * 2 * np.pi * (i + 1))
- y[i] = ysi
- ns.append("{}sin{}".format(prefix, i + 1))
- y[i + harmonics] = yci
- nc.append("{}cos{}".format(prefix, i + 1))
- y = p.DataFrame.from_records(y.transpose(), columns = ns + nc)
- return y
-
- def fitWeather(weather, harmonics):
- """Fit a weather model with a specified number of harmonics in order to detrend
- Returns models for minimum and maximum rolling temperature, along with a summary dataframe
- """
- # Make sure weather dataset is complete
- wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq = "30 min")
- wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
- wanteddf = p.DataFrame(index = wantedtimes)
- weather = wanteddf.join(weather.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
- weather.index.name = "read_time"
- weather = weather.fillna(method = 'ffill', axis = 0)
-
- # Add rolling columns
- weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
- weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
-
- # Calculate harmonics
- wharmstart = thirtyoffset(weather.index.min())
- wharm = harmonic(weather.shape[0], period = 365.25 * 48, start = wharmstart,
- harmonics = harmonics)
-
- # Weather harmonic column names in format for model
- weatherharms = " + ".join(wharm.columns.tolist())
-
- # Set up dataframe for modeling
- wharm.index = weather.index
- wharm['min_rolling'] = weather['min_rolling']
- wharm['max_rolling'] = weather['max_rolling']
-
- # Fitting models
- minmodel = smf.ols("min_rolling ~ " + weatherharms, data = wharm).fit()
- maxmodel = smf.ols("max_rolling ~ " + weatherharms, data = wharm).fit()
-
- # Residuals and fitted values
- sumdf = p.DataFrame(data = {
- "max_rolling" : wharm["max_rolling"],
- "max_resid" : maxmodel.resid,
- "max_fitted" : maxmodel.fittedvalues,
- "min_rolling" : wharm["min_rolling"],
- "min_resid" : minmodel.resid,
- "min_fitted" : minmodel.fittedvalues
- })
-
- return minmodel, maxmodel, sumdf
-
- def predweather(model, datestart, dateend, harmonics = 2):
- """Predict weather values from a weather model and date range
- Note: it's the residuals (true value - prediction) that go
- into the demand model
- """
- drange = p.date_range(datestart, dateend, freq = "30min")
- pharm = harmonic(len(drange),
- period = 365.25 * 48,
- start = thirtyoffset(drange.min()),
- harmonics = harmonics)
- pharm.index = drange
- return model.predict(pharm)
-
- def fitdemand(df, harmonics = [2, 3, 3]):
- print(harmonics)
-
-
-
-
- def main():
- parser = ArgumentParser(description='Harmonic models of all clusters in specified pickle')
- parser.add_argument("-i", "--input", dest="input",
- help = "input aggregated ICP pickle path",
- metavar="PATH", default = "../data/test1kagg.pkl",
- type = FileType('rb'))
- parser.add_argument("-w", "--weather", dest="weather",
- help = "input weather pickle path",
- metavar="PATH", default = "../data/weathertest.pkl",
- type = FileType('rb'))
- parser.add_argument("--weather-harmonics", dest="weather_harmonics",
- help = "number of harmonics for weather; default: 2",
- type = int, default = 2, metavar = "NUM")
- parser.add_argument("--icp-harmonics", dest = "icp_harmonics", nargs=3,
- help = "harmonics for icp fitting, default 2 3 3",
- default = [2, 3, 3], type = int,
- metavar = "NUM")
- args = parser.parse_args()
-
- print(args)
-
- wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis = 1)
-
- icpdat = p.read_pickle(args.input).set_index('read_time')
-
- minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
-
- icpdat = icpdat.join(sumdf, how = 'left')
-
- # fitdemand(icpdat, args
-
- # print(predweather(minm, "2019-01-01", "2019-02-01", args.weather_harmonics))
-
-
-
- args.input.close()
- args.weather.close()
-
-
- if __name__ == "__main__":
- main()
|