|
@@ -2,10 +2,16 @@ from argparse import ArgumentParser, FileType
|
2
|
2
|
import numpy as np
|
3
|
3
|
import pandas as p
|
4
|
4
|
import statsmodels.formula.api as smf
|
|
5
|
+import statsmodels.api as sm
|
5
|
6
|
import datetime as dt
|
6
|
7
|
|
7
|
8
|
epoch = dt.datetime(2017, 1, 1)
|
8
|
9
|
|
|
10
|
+def thirtyoffset(date, epoch = epoch):
|
|
11
|
+ """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time
|
|
12
|
+ """
|
|
13
|
+ return (date - epoch).total_seconds() / (60 * 30)
|
|
14
|
+
|
9
|
15
|
def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
|
10
|
16
|
x = np.arange(length) + start
|
11
|
17
|
y = np.ndarray((harmonics * 2, length), dtype = np.float64)
|
|
@@ -21,43 +27,104 @@ def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
|
21
|
27
|
y = p.DataFrame.from_records(y.transpose(), columns = ns + nc)
|
22
|
28
|
return y
|
23
|
29
|
|
|
30
|
+def fitWeather(weather, harmonics):
|
|
31
|
+ """Fit a weather model with a specified number of harmonics in order to detrend
|
|
32
|
+ Returns models for minimum and maximum rolling temperature, along with a summary dataframe
|
|
33
|
+ """
|
|
34
|
+ # Make sure weather dataset is complete
|
|
35
|
+ wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq = "30 min")
|
|
36
|
+ wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
|
|
37
|
+ wanteddf = p.DataFrame(index = wantedtimes)
|
|
38
|
+ weather = wanteddf.join(weather.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
|
|
39
|
+ weather.index.name = "read_time"
|
|
40
|
+ weather = weather.fillna(method = 'ffill', axis = 0)
|
|
41
|
+
|
|
42
|
+ # Add rolling columns
|
|
43
|
+ weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
|
|
44
|
+ weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
|
|
45
|
+
|
|
46
|
+ # Calculate harmonics
|
|
47
|
+ wharmstart = thirtyoffset(weather.index.min())
|
|
48
|
+ wharm = harmonic(weather.shape[0], period = 365.25 * 48, start = wharmstart,
|
|
49
|
+ harmonics = harmonics)
|
|
50
|
+
|
|
51
|
+ # Weather harmonic column names in format for model
|
|
52
|
+ weatherharms = " + ".join(wharm.columns.tolist())
|
|
53
|
+
|
|
54
|
+ # Set up dataframe for modeling
|
|
55
|
+ wharm.index = weather.index
|
|
56
|
+ wharm['min_rolling'] = weather['min_rolling']
|
|
57
|
+ wharm['max_rolling'] = weather['max_rolling']
|
|
58
|
+
|
|
59
|
+ # Fitting models
|
|
60
|
+ minmodel = smf.ols("min_rolling ~ " + weatherharms, data = wharm).fit()
|
|
61
|
+ maxmodel = smf.ols("max_rolling ~ " + weatherharms, data = wharm).fit()
|
|
62
|
+
|
|
63
|
+ # Residuals and fitted values
|
|
64
|
+ sumdf = p.DataFrame(data = {
|
|
65
|
+ "max_rolling" : wharm["max_rolling"],
|
|
66
|
+ "max_resid" : maxmodel.resid,
|
|
67
|
+ "max_fitted" : maxmodel.fittedvalues,
|
|
68
|
+ "min_rolling" : wharm["min_rolling"],
|
|
69
|
+ "min_resid" : minmodel.resid,
|
|
70
|
+ "min_fitted" : minmodel.fittedvalues
|
|
71
|
+ })
|
|
72
|
+
|
|
73
|
+ return minmodel, maxmodel, sumdf
|
|
74
|
+
|
|
75
|
+def predweather(model, datestart, dateend, harmonics = 2):
|
|
76
|
+ """Predict weather values from a weather model and date range
|
|
77
|
+ Note: it's the residuals (true value - prediction) that go
|
|
78
|
+ into the demand model
|
|
79
|
+ """
|
|
80
|
+ drange = p.date_range(datestart, dateend, freq = "30min")
|
|
81
|
+ pharm = harmonic(len(drange),
|
|
82
|
+ period = 365.25 * 48,
|
|
83
|
+ start = thirtyoffset(drange.min()),
|
|
84
|
+ harmonics = harmonics)
|
|
85
|
+ pharm.index = drange
|
|
86
|
+ return model.predict(pharm)
|
|
87
|
+
|
|
88
|
+def fitdemand(df, harmonics = [2, 3, 3]):
|
|
89
|
+ print(harmonics)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
24
|
93
|
|
25
|
94
|
def main():
|
26
|
95
|
parser = ArgumentParser(description='Harmonic models of all clusters in specified pickle')
|
27
|
96
|
parser.add_argument("-i", "--input", dest="input",
|
28
|
97
|
help = "input aggregated ICP pickle path",
|
29
|
|
- metavar="PATH", required = True,
|
|
98
|
+ metavar="PATH", default = "../data/test1kagg.pkl",
|
30
|
99
|
type = FileType('rb'))
|
31
|
100
|
parser.add_argument("-w", "--weather", dest="weather",
|
32
|
101
|
help = "input weather pickle path",
|
33
|
|
- metavar="PATH", required = True,
|
|
102
|
+ metavar="PATH", default = "../data/weathertest.pkl",
|
34
|
103
|
type = FileType('rb'))
|
35
|
104
|
parser.add_argument("--weather-harmonics", dest="weather_harmonics",
|
36
|
105
|
help = "number of harmonics for weather; default: 2",
|
37
|
|
- type = int, default = 2)
|
|
106
|
+ type = int, default = 2, metavar = "NUM")
|
|
107
|
+ parser.add_argument("--icp-harmonics", dest = "icp_harmonics", nargs=3,
|
|
108
|
+ help = "harmonics for icp fitting, default 2 3 3",
|
|
109
|
+ default = [2, 3, 3], type = int,
|
|
110
|
+ metavar = "NUM")
|
38
|
111
|
args = parser.parse_args()
|
39
|
112
|
|
40
|
|
- # print(args)
|
|
113
|
+ print(args)
|
41
|
114
|
|
42
|
|
- # print(harmonic(17, 8, 0, 2, "test"))
|
43
|
|
- # print(epoch)
|
44
|
|
-
|
45
|
|
- # print(p.read_pickle(args.input))
|
46
|
115
|
wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis = 1)
|
47
|
116
|
|
48
|
|
- wantedtimes = p.date_range(wdat.temp_timestamp.min(), wdat.temp_timestamp.max(), freq = "30 min")
|
49
|
|
- wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
|
50
|
|
- wanteddf = p.DataFrame(index = wantedtimes)
|
51
|
|
- wdat = wanteddf.join(wdat.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
|
52
|
|
- wdat.index.name = "read_time"
|
53
|
|
- wdat = wdat.fillna(method = 'ffill', axis = 0)
|
54
|
|
- wdat['min_rolling'] = wdat['tmin_c'].rolling("1D").min()
|
55
|
|
- wdat['max_rolling'] = wdat['tmax_c'].rolling("1D").max()
|
56
|
|
- print(wdat)
|
57
|
|
- wharmstart = (wdat.index.min() - epoch).total_seconds() / (60 * 30)
|
58
|
|
- wharm = harmonic(wdat.shape[0], period = 365.25, start = wharmstart,
|
59
|
|
- harmonics = args.weather_harmonics)
|
60
|
|
- print(wharm)
|
|
117
|
+ icpdat = p.read_pickle(args.input).set_index('read_time')
|
|
118
|
+
|
|
119
|
+ minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
|
|
120
|
+
|
|
121
|
+ icpdat = icpdat.join(sumdf, how = 'left')
|
|
122
|
+
|
|
123
|
+ # fitdemand(icpdat, args
|
|
124
|
+
|
|
125
|
+ # print(predweather(minm, "2019-01-01", "2019-02-01", args.weather_harmonics))
|
|
126
|
+
|
|
127
|
+
|
61
|
128
|
|
62
|
129
|
args.input.close()
|
63
|
130
|
args.weather.close()
|