Repository for Petra's work at ampli Jan-Feb 2019

predict.py 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. from argparse import ArgumentParser, FileType
  2. import numpy as np
  3. import pandas as p
  4. import statsmodels.formula.api as smf
  5. import datetime as dt
  6. import pickle
  7. from sys import stdout
  8. from pymodels import thirtyoffset, predweather, harmonic
  9. from pprint import pprint
  10. from util import datevalid
  11. from signal import signal, SIGPIPE, SIG_DFL
  12. def preddemand(model, weather, harmonics):
  13. harmstart = thirtyoffset(weather.index.min())
  14. utv = weather.index
  15. harmlen = len(utv)
  16. yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart,
  17. harmonics=harmonics[0], prefix="y")
  18. yharm.index = utv
  19. wharm = harmonic(harmlen, period=7 * 48, start=harmstart,
  20. harmonics=harmonics[1], prefix="w")
  21. wharm.index = utv
  22. dharm = harmonic(harmlen, period=48, start=harmstart,
  23. harmonics=harmonics[2], prefix="d")
  24. dharm.index = utv
  25. df = p.concat([yharm, wharm, dharm, weather], axis = 1)
  26. return model.predict(df)
  27. def main():
  28. parser = ArgumentParser(description=
  29. 'Predict from harmonic model of cluster')
  30. parser.add_argument("-m", "--model-file", dest="model_file",
  31. help="filename for models",
  32. required = True,
  33. type=FileType('rb'))
  34. parser.add_argument("-w", "--weather", dest="weather_file",
  35. help="input weather pickle path",
  36. required=False,
  37. type=FileType('rb'))
  38. parser.add_argument("-o", "--output", dest="output_file",
  39. help="file to save result (default stdout)")
  40. parser.add_argument("-t", "--temperature", dest="temp",
  41. help = "min and max temperature, if not using "
  42. "weather dataset, e.g. 2.0 10.5",
  43. required=False,
  44. type=float, nargs=2)
  45. parser.add_argument("-s", "--start-date",
  46. dest = "startdate",
  47. help = "start date for prediction; format: YYYY-MM-DD",
  48. metavar="START_DATE",
  49. required = True,
  50. type = datevalid)
  51. parser.add_argument("-e", "--end-date",
  52. dest = "enddate",
  53. help = "end date for prediction; format: YYYY-MM-DD",
  54. metavar="END_DATE",
  55. required = True,
  56. type = datevalid)
  57. parser.add_argument("-c", "--cluster",
  58. dest = "cluster",
  59. help = "cluster to predict for",
  60. type = int,
  61. required = True)
  62. parser.add_argument("--pkl",
  63. help="output as pkl rather than csv",
  64. dest = "csv",
  65. action="store_false")
  66. args = parser.parse_args()
  67. if args.temp is None and args.weather_file is None:
  68. parser.error("Either the temperature range or a weather "
  69. "dataset must be specified")
  70. mods = pickle.load(args.model_file)
  71. if args.cluster not in mods["clusters"]:
  72. parser.error(f"cluster ('{args.cluster}') not in model")
  73. if args.output_file is None or args.output_file == "-":
  74. args.output_file = stdout
  75. wdat = []
  76. if args.weather_file is not None:
  77. weather = p.read_pickle(args.weather_file)
  78. if (args.startdate < weather['temp_timestamp'].min() or
  79. args.enddate > weather['temp_timestamp'].max()):
  80. parser.error("Start and or end date not in supplied weather dataset")
  81. wantedtimes = p.date_range(args.startdate,
  82. args.enddate, freq="30 min")
  83. wantedtimes = p.Series(wantedtimes.round(freq="30 min"))
  84. wanteddf = p.DataFrame(index=wantedtimes)
  85. weather = wanteddf.join(weather.set_index(['temp_timestamp']),
  86. how='left').drop('temp_date', axis=1)
  87. weather.index.name = "read_time"
  88. weather = weather.fillna(method='ffill', axis=0)
  89. # Add rolling columns
  90. weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
  91. weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
  92. wharmstart = thirtyoffset(weather.index.min())
  93. wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart,
  94. harmonics=mods["weather_harmonics"])
  95. wharm.index = wantedtimes
  96. pred_max = weather['max_rolling'] - mods["max_temp"].predict(wharm)
  97. pred_min = weather['min_rolling'] - mods["min_temp"].predict(wharm)
  98. wdat = p.DataFrame({
  99. "max_resid": pred_max,
  100. "min_resid": pred_min
  101. })
  102. else:
  103. mint = min(args.temp)
  104. maxt = max(args.temp)
  105. pred_max = maxt - predweather(mods["max_temp"],
  106. args.startdate,
  107. args.enddate,
  108. harmonics = mods["weather_harmonics"])
  109. pred_min = mint - predweather(mods["min_temp"],
  110. args.startdate,
  111. args.enddate,
  112. harmonics = mods["weather_harmonics"])
  113. wdat = p.DataFrame({
  114. "max_resid": pred_max,
  115. "min_resid": pred_min
  116. })
  117. dpred = p.DataFrame(
  118. {
  119. "predicted": preddemand(mods["icp"][args.cluster], wdat, mods["icp_harmonics"])
  120. })
  121. if args.csv:
  122. signal(SIGPIPE, SIG_DFL) # Prevent broken pipe errors when piping to less or head
  123. dpred.to_csv(args.output_file)
  124. else:
  125. dpred.to_pickle(args.output_file)
  126. args.model_file.close()
  127. if args.weather_file is not None:
  128. args.weather_file.close()
  129. if __name__ == "__main__":
  130. main()