Repository for Petra's work at ampli Jan-Feb 2019

pymodels.py 6.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. from argparse import ArgumentParser, FileType
  2. import numpy as np
  3. import pandas as p
  4. import statsmodels.formula.api as smf
  5. import datetime as dt
  6. import pickle
  7. epoch = dt.datetime(2017, 1, 1)
  8. def thirtyoffset(date, epoch=epoch):
  9. """Get the offset from the epoch, in multiples of 30 minutes,
  10. for a particular date/time
  11. """
  12. return (date - epoch).total_seconds() / (60 * 30)
  13. def harmonic(length, period, start=0, harmonics=1, prefix=""):
  14. x = np.arange(length) + start
  15. y = np.ndarray((harmonics * 2, length), dtype=np.float64)
  16. ns = []
  17. nc = []
  18. for i in range(harmonics):
  19. ysi = np.sin(x/period * 2 * np.pi * (i + 1))
  20. yci = np.cos(x/period * 2 * np.pi * (i + 1))
  21. y[i] = ysi
  22. ns.append("{}sin{}".format(prefix, i + 1))
  23. y[i + harmonics] = yci
  24. nc.append("{}cos{}".format(prefix, i + 1))
  25. y = p.DataFrame.from_records(y.transpose(), columns=ns + nc)
  26. return y
  27. def fitWeather(weather, harmonics):
  28. """Fit a weather model with a specified number of harmonics
  29. in order to detrend Returns models for minimum and maximum
  30. rolling temperature, along with a summary dataframe
  31. """
  32. # Make sure weather dataset is complete
  33. wantedtimes = p.date_range(weather.temp_timestamp.min(),
  34. weather.temp_timestamp.max(), freq="30 min")
  35. wantedtimes = p.Series(wantedtimes.round(freq="30 min"))
  36. wanteddf = p.DataFrame(index=wantedtimes)
  37. weather = wanteddf.join(weather.set_index(['temp_timestamp']),
  38. how='left').drop('temp_date', axis=1)
  39. weather.index.name = "read_time"
  40. weather = weather.fillna(method='ffill', axis=0)
  41. # Add rolling columns
  42. weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
  43. weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
  44. # Calculate harmonics
  45. wharmstart = thirtyoffset(weather.index.min())
  46. wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart,
  47. harmonics=harmonics)
  48. # Weather harmonic column names in format for model
  49. weatherharms = " + ".join(wharm.columns.tolist())
  50. # Set up dataframe for modeling
  51. wharm.index = weather.index
  52. wharm['min_rolling'] = weather['min_rolling']
  53. wharm['max_rolling'] = weather['max_rolling']
  54. # Fitting models
  55. minmodel = smf.ols("min_rolling ~ " + weatherharms, data=wharm).fit()
  56. maxmodel = smf.ols("max_rolling ~ " + weatherharms, data=wharm).fit()
  57. # Residuals and fitted values
  58. sumdf = p.DataFrame(data={
  59. "max_rolling": wharm["max_rolling"],
  60. "max_resid": maxmodel.resid,
  61. "max_fitted": maxmodel.fittedvalues,
  62. "min_rolling": wharm["min_rolling"],
  63. "min_resid": minmodel.resid,
  64. "min_fitted": minmodel.fittedvalues
  65. })
  66. return minmodel, maxmodel, sumdf
  67. def predweather(model, datestart, dateend, harmonics=2):
  68. """Predict weather values from a weather model and date range
  69. Note: it's the residuals (true value - prediction) that go
  70. into the demand model
  71. """
  72. drange = p.date_range(datestart, dateend, freq="30min")
  73. pharm = harmonic(len(drange),
  74. period=365.25 * 48,
  75. start=thirtyoffset(drange.min()),
  76. harmonics=harmonics)
  77. pharm.index = drange
  78. return model.predict(pharm)
  79. def fitdemand(df, wmodsum, harmonics=[2, 3, 3]):
  80. harmstart = thirtyoffset(df.index.min())
  81. clusters = df.cluster.unique()
  82. clusters.sort()
  83. utv = df.index.unique()
  84. harmlen = len(utv)
  85. yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart,
  86. harmonics=harmonics[0], prefix="y")
  87. yharm.index = utv
  88. wharm = harmonic(harmlen, period=7 * 48, start=harmstart,
  89. harmonics=harmonics[1], prefix="w")
  90. wharm.index = utv
  91. dharm = harmonic(harmlen, period=48, start=harmstart,
  92. harmonics=harmonics[2], prefix="d")
  93. dharm.index = utv
  94. y_params = " + ".join(yharm.columns.tolist())
  95. w_params = " + ".join(wharm.columns.tolist())
  96. d_params = " + ".join(dharm.columns.tolist())
  97. hcomb = p.concat([yharm, wharm, dharm], axis=1)
  98. modellist = {}
  99. modelstring = f"""kwh_tot_mean ~ {y_params} + {w_params} + {d_params}
  100. + ({y_params}):({d_params})
  101. + ({w_params}):({d_params})
  102. + max_resid + max_resid:({y_params})
  103. + max_resid:({w_params}) + max_resid:({d_params})
  104. + min_resid + min_resid:({y_params})
  105. + min_resid:({w_params}) + min_resid:({d_params})
  106. """.replace("\n", "").replace(" ", "")
  107. for c in clusters:
  108. dfc = df[df['cluster'] == c].join(hcomb,
  109. how='left').join(wmodsum,
  110. how='left')
  111. # print(dfc)
  112. cmod = smf.ols(modelstring, data=dfc).fit()
  113. modellist[c] = cmod
  114. return modellist
  115. def main():
  116. parser = ArgumentParser(description='Harmonic models of all '
  117. 'clusters in specified pickle')
  118. parser.add_argument("-i", "--input", dest="input",
  119. help="input aggregated ICP pickle path",
  120. metavar="PATH", required=True,
  121. type=FileType('rb'))
  122. parser.add_argument("-w", "--weather", dest="weather",
  123. help="input weather pickle path",
  124. metavar="PATH", required=True,
  125. type=FileType('rb'))
  126. parser.add_argument("-m", "--model-file", dest="model_file",
  127. help="filename for model to store",
  128. required=True,
  129. type=FileType('wb'))
  130. parser.add_argument("--weather-harmonics", dest="weather_harmonics",
  131. help="number of harmonics for weather; default: 2",
  132. type=int, default=2, metavar="NUM")
  133. parser.add_argument("--icp-harmonics", dest="icp_harmonics", nargs=3,
  134. help="harmonics for icp fitting, default 2 3 3",
  135. default=[2, 3, 3], type=int,
  136. metavar="NUM")
  137. args = parser.parse_args()
  138. wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis=1)
  139. icpdat = p.read_pickle(args.input).set_index('read_time')
  140. minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
  141. clusters = icpdat.cluster.unique()
  142. clusters.sort()
  143. mods = {
  144. "icp": fitdemand(icpdat, sumdf, args.icp_harmonics),
  145. "min_temp": minm,
  146. "max_temp": maxm,
  147. "clusters": clusters,
  148. "weather_harmonics": args.weather_harmonics,
  149. "icp_harmonics": args.icp_harmonics
  150. }
  151. pickle.dump(mods, args.model_file)
  152. args.input.close()
  153. args.weather.close()
  154. args.model_file.close()
  155. if __name__ == "__main__":
  156. main()