Repository for Petra's work at ampli Jan-Feb 2019

predict.py 5.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. from argparse import ArgumentParser, FileType
  2. import numpy as np
  3. import pandas as p
  4. import statsmodels.formula.api as smf
  5. import datetime as dt
  6. import pickle
  7. from pymodels import thirtyoffset, predweather, harmonic
  8. from pprint import pprint
  9. from util import datevalid
  10. from signal import signal, SIGPIPE, SIG_DFL
  11. def preddemand(model, weather, harmonics):
  12. harmstart = thirtyoffset(weather.index.min())
  13. utv = weather.index
  14. harmlen = len(utv)
  15. yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart,
  16. harmonics=harmonics[0], prefix="y")
  17. yharm.index = utv
  18. wharm = harmonic(harmlen, period=7 * 48, start=harmstart,
  19. harmonics=harmonics[1], prefix="w")
  20. wharm.index = utv
  21. dharm = harmonic(harmlen, period=48, start=harmstart,
  22. harmonics=harmonics[2], prefix="d")
  23. dharm.index = utv
  24. df = p.concat([yharm, wharm, dharm, weather], axis = 1)
  25. return model.predict(df)
  26. def main():
  27. parser = ArgumentParser(description=
  28. 'Predict from harmonic model of cluster')
  29. parser.add_argument("-m", "--model-file", dest="model_file",
  30. help="filename for models",
  31. required = True,
  32. type=FileType('rb'))
  33. parser.add_argument("-w", "--weather", dest="weather_file",
  34. help="input weather pickle path",
  35. required=False,
  36. type=FileType('rb'))
  37. parser.add_argument("-o", "--output", dest="output_file",
  38. help="file to save result",
  39. required=True, type=FileType('w'))
  40. parser.add_argument("-t", "--temperature", dest="temp",
  41. help = "min and max temperature, if not using "
  42. "weather dataset, e.g. 2.0 10.5",
  43. required=False,
  44. type=float, nargs=2)
  45. parser.add_argument("-s", "--start-date",
  46. dest = "startdate",
  47. help = "start date for prediction; format: YYYY-MM-DD; default: 2018-01-01",
  48. metavar="START_DATE",
  49. required = True,
  50. type = datevalid)
  51. parser.add_argument("-e", "--end-date",
  52. dest = "enddate",
  53. help = "end date for prediction; format: YYYY-MM-DD; default: 2018-02-01",
  54. metavar="END_DATE",
  55. required = True,
  56. type = datevalid)
  57. parser.add_argument("-c", "--cluster",
  58. dest = "cluster",
  59. help = "cluster to predict for",
  60. type = int,
  61. required = True)
  62. parser.add_argument("--csv",
  63. help="output as csv",
  64. action="store_true")
  65. args = parser.parse_args()
  66. if args.temp is None and args.weather_file is None:
  67. parser.error("Either the temperature range or a weather "
  68. "dataset must be specified")
  69. mods = pickle.load(args.model_file)
  70. if args.cluster not in mods["clusters"]:
  71. parser.error(f"cluster ('{args.cluster}') not in model")
  72. wdat = []
  73. if args.weather_file is not None:
  74. weather = p.read_pickle(args.weather_file)
  75. if (args.startdate < weather['temp_timestamp'].min() or
  76. args.enddate > weather['temp_timestamp'].max()):
  77. parser.error("Start and or end date not in supplied weather dataset")
  78. wantedtimes = p.date_range(args.startdate,
  79. args.enddate, freq="30 min")
  80. wantedtimes = p.Series(wantedtimes.round(freq="30 min"))
  81. wanteddf = p.DataFrame(index=wantedtimes)
  82. weather = wanteddf.join(weather.set_index(['temp_timestamp']),
  83. how='left').drop('temp_date', axis=1)
  84. weather.index.name = "read_time"
  85. weather = weather.fillna(method='ffill', axis=0)
  86. # Add rolling columns
  87. weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
  88. weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
  89. wharmstart = thirtyoffset(weather.index.min())
  90. wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart,
  91. harmonics=mods["weather_harmonics"])
  92. wharm.index = wantedtimes
  93. pred_max = weather['max_rolling'] - mods["max_temp"].predict(wharm)
  94. pred_min = weather['min_rolling'] - mods["min_temp"].predict(wharm)
  95. wdat = p.DataFrame({
  96. "max_resid": pred_max,
  97. "min_resid": pred_min
  98. })
  99. else:
  100. mint = min(args.temp)
  101. maxt = max(args.temp)
  102. pred_max = maxt - predweather(mods["max_temp"],
  103. args.startdate,
  104. args.enddate,
  105. harmonics = mods["weather_harmonics"])
  106. pred_min = mint - predweather(mods["min_temp"],
  107. args.startdate,
  108. args.enddate,
  109. harmonics = mods["weather_harmonics"])
  110. wdat = p.DataFrame({
  111. "max_resid": pred_max,
  112. "min_resid": pred_min
  113. })
  114. dpred = p.DataFrame(
  115. {
  116. "predicted": preddemand(mods["icp"][args.cluster], wdat, mods["icp_harmonics"])
  117. })
  118. if args.csv:
  119. signal(SIGPIPE, SIG_DFL) # Prevent broken pipe errors when piping to less or head
  120. dpred.to_csv(args.output_file)
  121. else:
  122. dpred.to_pickle(args.output_file)
  123. args.model_file.close()
  124. if args.weather_file is not None:
  125. args.weather_file.close()
  126. if __name__ == "__main__":
  127. main()