123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- from argparse import ArgumentParser, FileType
- import numpy as np
- import pandas as p
- import statsmodels.formula.api as smf
- import datetime as dt
- import pickle
- from sys import stdout
- from pymodels import thirtyoffset, predweather, harmonic
- from pprint import pprint
- from util import datevalid
- from signal import signal, SIGPIPE, SIG_DFL
-
- def preddemand(model, weather, harmonics):
- harmstart = thirtyoffset(weather.index.min())
- utv = weather.index
- harmlen = len(utv)
-
- yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart,
- harmonics=harmonics[0], prefix="y")
- yharm.index = utv
-
- wharm = harmonic(harmlen, period=7 * 48, start=harmstart,
- harmonics=harmonics[1], prefix="w")
- wharm.index = utv
-
- dharm = harmonic(harmlen, period=48, start=harmstart,
- harmonics=harmonics[2], prefix="d")
- dharm.index = utv
-
- df = p.concat([yharm, wharm, dharm, weather], axis = 1)
- return model.predict(df)
-
- def main():
- parser = ArgumentParser(description=
- 'Predict from harmonic model of cluster')
- parser.add_argument("-m", "--model-file", dest="model_file",
- help="filename for models",
- required = True,
- type=FileType('rb'))
- parser.add_argument("-w", "--weather", dest="weather_file",
- help="input weather pickle path",
- required=False,
- type=FileType('rb'))
- parser.add_argument("-o", "--output", dest="output_file",
- help="file to save result (default stdout)")
- parser.add_argument("-t", "--temperature", dest="temp",
- help = "min and max temperature, if not using "
- "weather dataset, e.g. 2.0 10.5",
- required=False,
- type=float, nargs=2)
- parser.add_argument("-s", "--start-date",
- dest = "startdate",
- help = "start date for prediction; format: YYYY-MM-DD",
- metavar="START_DATE",
- required = True,
- type = datevalid)
- parser.add_argument("-e", "--end-date",
- dest = "enddate",
- help = "end date for prediction; format: YYYY-MM-DD",
- metavar="END_DATE",
- required = True,
- type = datevalid)
- parser.add_argument("-c", "--cluster",
- dest = "cluster",
- help = "cluster to predict for",
- type = int,
- required = True)
- parser.add_argument("--pkl",
- help="output as pkl rather than csv",
- dest = "csv",
- action="store_false")
- args = parser.parse_args()
-
- if args.temp is None and args.weather_file is None:
- parser.error("Either the temperature range or a weather "
- "dataset must be specified")
-
- mods = pickle.load(args.model_file)
-
- if args.cluster not in mods["clusters"]:
- parser.error(f"cluster ('{args.cluster}') not in model")
-
- if args.output_file is None or args.output_file == "-":
- args.output_file = stdout
-
- wdat = []
-
- if args.weather_file is not None:
- weather = p.read_pickle(args.weather_file)
- if (args.startdate < weather['temp_timestamp'].min() or
- args.enddate > weather['temp_timestamp'].max()):
- parser.error("Start and or end date not in supplied weather dataset")
-
- wantedtimes = p.date_range(args.startdate,
- args.enddate, freq="30 min")
- wantedtimes = p.Series(wantedtimes.round(freq="30 min"))
- wanteddf = p.DataFrame(index=wantedtimes)
- weather = wanteddf.join(weather.set_index(['temp_timestamp']),
- how='left').drop('temp_date', axis=1)
- weather.index.name = "read_time"
- weather = weather.fillna(method='ffill', axis=0)
-
- # Add rolling columns
- weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
- weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
-
- wharmstart = thirtyoffset(weather.index.min())
- wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart,
- harmonics=mods["weather_harmonics"])
- wharm.index = wantedtimes
- pred_max = weather['max_rolling'] - mods["max_temp"].predict(wharm)
- pred_min = weather['min_rolling'] - mods["min_temp"].predict(wharm)
- wdat = p.DataFrame({
- "max_resid": pred_max,
- "min_resid": pred_min
- })
-
- else:
- mint = min(args.temp)
- maxt = max(args.temp)
- pred_max = maxt - predweather(mods["max_temp"],
- args.startdate,
- args.enddate,
- harmonics = mods["weather_harmonics"])
- pred_min = mint - predweather(mods["min_temp"],
- args.startdate,
- args.enddate,
- harmonics = mods["weather_harmonics"])
- wdat = p.DataFrame({
- "max_resid": pred_max,
- "min_resid": pred_min
- })
-
-
- dpred = p.DataFrame(
- {
- "predicted": preddemand(mods["icp"][args.cluster], wdat, mods["icp_harmonics"])
- })
-
- if args.csv:
- signal(SIGPIPE, SIG_DFL) # Prevent broken pipe errors when piping to less or head
- dpred.to_csv(args.output_file)
- else:
- dpred.to_pickle(args.output_file)
-
-
- args.model_file.close()
- if args.weather_file is not None:
- args.weather_file.close()
-
-
- if __name__ == "__main__":
- main()
|