Repository for Petra's work at ampli Jan-Feb 2019

pymodels.py 4.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. from argparse import ArgumentParser, FileType
  2. import numpy as np
  3. import pandas as p
  4. import statsmodels.formula.api as smf
  5. import statsmodels.api as sm
  6. import datetime as dt
  7. epoch = dt.datetime(2017, 1, 1)
  8. def thirtyoffset(date, epoch = epoch):
  9. """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time
  10. """
  11. return (date - epoch).total_seconds() / (60 * 30)
  12. def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
  13. x = np.arange(length) + start
  14. y = np.ndarray((harmonics * 2, length), dtype = np.float64)
  15. ns = []
  16. nc = []
  17. for i in range(harmonics):
  18. ysi = np.sin(x/period * 2 * np.pi * (i + 1))
  19. yci = np.cos(x/period * 2 * np.pi * (i + 1))
  20. y[i] = ysi
  21. ns.append("{}sin{}".format(prefix, i + 1))
  22. y[i + harmonics] = yci
  23. nc.append("{}cos{}".format(prefix, i + 1))
  24. y = p.DataFrame.from_records(y.transpose(), columns = ns + nc)
  25. return y
  26. def fitWeather(weather, harmonics):
  27. """Fit a weather model with a specified number of harmonics in order to detrend
  28. Returns models for minimum and maximum rolling temperature, along with a summary dataframe
  29. """
  30. # Make sure weather dataset is complete
  31. wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq = "30 min")
  32. wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
  33. wanteddf = p.DataFrame(index = wantedtimes)
  34. weather = wanteddf.join(weather.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
  35. weather.index.name = "read_time"
  36. weather = weather.fillna(method = 'ffill', axis = 0)
  37. # Add rolling columns
  38. weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
  39. weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
  40. # Calculate harmonics
  41. wharmstart = thirtyoffset(weather.index.min())
  42. wharm = harmonic(weather.shape[0], period = 365.25 * 48, start = wharmstart,
  43. harmonics = harmonics)
  44. # Weather harmonic column names in format for model
  45. weatherharms = " + ".join(wharm.columns.tolist())
  46. # Set up dataframe for modeling
  47. wharm.index = weather.index
  48. wharm['min_rolling'] = weather['min_rolling']
  49. wharm['max_rolling'] = weather['max_rolling']
  50. # Fitting models
  51. minmodel = smf.ols("min_rolling ~ " + weatherharms, data = wharm).fit()
  52. maxmodel = smf.ols("max_rolling ~ " + weatherharms, data = wharm).fit()
  53. # Residuals and fitted values
  54. sumdf = p.DataFrame(data = {
  55. "max_rolling" : wharm["max_rolling"],
  56. "max_resid" : maxmodel.resid,
  57. "max_fitted" : maxmodel.fittedvalues,
  58. "min_rolling" : wharm["min_rolling"],
  59. "min_resid" : minmodel.resid,
  60. "min_fitted" : minmodel.fittedvalues
  61. })
  62. return minmodel, maxmodel, sumdf
  63. def predweather(model, datestart, dateend, harmonics = 2):
  64. """Predict weather values from a weather model and date range
  65. Note: it's the residuals (true value - prediction) that go
  66. into the demand model
  67. """
  68. drange = p.date_range(datestart, dateend, freq = "30min")
  69. pharm = harmonic(len(drange),
  70. period = 365.25 * 48,
  71. start = thirtyoffset(drange.min()),
  72. harmonics = harmonics)
  73. pharm.index = drange
  74. return model.predict(pharm)
  75. def fitdemand(df, harmonics = [2, 3, 3]):
  76. print(harmonics)
  77. def main():
  78. parser = ArgumentParser(description='Harmonic models of all clusters in specified pickle')
  79. parser.add_argument("-i", "--input", dest="input",
  80. help = "input aggregated ICP pickle path",
  81. metavar="PATH", default = "../data/test1kagg.pkl",
  82. type = FileType('rb'))
  83. parser.add_argument("-w", "--weather", dest="weather",
  84. help = "input weather pickle path",
  85. metavar="PATH", default = "../data/weathertest.pkl",
  86. type = FileType('rb'))
  87. parser.add_argument("--weather-harmonics", dest="weather_harmonics",
  88. help = "number of harmonics for weather; default: 2",
  89. type = int, default = 2, metavar = "NUM")
  90. parser.add_argument("--icp-harmonics", dest = "icp_harmonics", nargs=3,
  91. help = "harmonics for icp fitting, default 2 3 3",
  92. default = [2, 3, 3], type = int,
  93. metavar = "NUM")
  94. args = parser.parse_args()
  95. print(args)
  96. wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis = 1)
  97. icpdat = p.read_pickle(args.input).set_index('read_time')
  98. minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
  99. icpdat = icpdat.join(sumdf, how = 'left')
  100. # fitdemand(icpdat, args
  101. # print(predweather(minm, "2019-01-01", "2019-02-01", args.weather_harmonics))
  102. args.input.close()
  103. args.weather.close()
  104. if __name__ == "__main__":
  105. main()