|
@@ -2,19 +2,22 @@ from argparse import ArgumentParser, FileType
|
2
|
2
|
import numpy as np
|
3
|
3
|
import pandas as p
|
4
|
4
|
import statsmodels.formula.api as smf
|
5
|
|
-import statsmodels.api as sm
|
6
|
5
|
import datetime as dt
|
|
6
|
+import pickle
|
7
|
7
|
|
8
|
8
|
epoch = dt.datetime(2017, 1, 1)
|
9
|
9
|
|
10
|
|
-def thirtyoffset(date, epoch = epoch):
|
11
|
|
- """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time
|
|
10
|
+
|
|
11
|
+def thirtyoffset(date, epoch=epoch):
|
|
12
|
+ """Get the offset from the epoch, in multiples of 30 minutes,
|
|
13
|
+ for a particular date/time
|
12
|
14
|
"""
|
13
|
15
|
return (date - epoch).total_seconds() / (60 * 30)
|
14
|
16
|
|
15
|
|
-def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
|
|
17
|
+
|
|
18
|
+def harmonic(length, period, start=0, harmonics=1, prefix=""):
|
16
|
19
|
x = np.arange(length) + start
|
17
|
|
- y = np.ndarray((harmonics * 2, length), dtype = np.float64)
|
|
20
|
+ y = np.ndarray((harmonics * 2, length), dtype=np.float64)
|
18
|
21
|
ns = []
|
19
|
22
|
nc = []
|
20
|
23
|
for i in range(harmonics):
|
|
@@ -24,29 +27,33 @@ def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
|
24
|
27
|
ns.append("{}sin{}".format(prefix, i + 1))
|
25
|
28
|
y[i + harmonics] = yci
|
26
|
29
|
nc.append("{}cos{}".format(prefix, i + 1))
|
27
|
|
- y = p.DataFrame.from_records(y.transpose(), columns = ns + nc)
|
|
30
|
+ y = p.DataFrame.from_records(y.transpose(), columns=ns + nc)
|
28
|
31
|
return y
|
29
|
32
|
|
|
33
|
+
|
30
|
34
|
def fitWeather(weather, harmonics):
|
31
|
|
- """Fit a weather model with a specified number of harmonics in order to detrend
|
32
|
|
- Returns models for minimum and maximum rolling temperature, along with a summary dataframe
|
|
35
|
+ """Fit a weather model with a specified number of harmonics
|
|
36
|
+ in order to detrend Returns models for minimum and maximum
|
|
37
|
+ rolling temperature, along with a summary dataframe
|
33
|
38
|
"""
|
34
|
39
|
# Make sure weather dataset is complete
|
35
|
|
- wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq = "30 min")
|
36
|
|
- wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
|
37
|
|
- wanteddf = p.DataFrame(index = wantedtimes)
|
38
|
|
- weather = wanteddf.join(weather.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
|
|
40
|
+ wantedtimes = p.date_range(weather.temp_timestamp.min(),
|
|
41
|
+ weather.temp_timestamp.max(), freq="30 min")
|
|
42
|
+ wantedtimes = p.Series(wantedtimes.round(freq="30 min"))
|
|
43
|
+ wanteddf = p.DataFrame(index=wantedtimes)
|
|
44
|
+ weather = wanteddf.join(weather.set_index(['temp_timestamp']),
|
|
45
|
+ how='left').drop('temp_date', axis=1)
|
39
|
46
|
weather.index.name = "read_time"
|
40
|
|
- weather = weather.fillna(method = 'ffill', axis = 0)
|
|
47
|
+ weather = weather.fillna(method='ffill', axis=0)
|
41
|
48
|
|
42
|
49
|
# Add rolling columns
|
43
|
|
- weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
|
44
|
|
- weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
|
|
50
|
+ weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
|
|
51
|
+ weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
|
45
|
52
|
|
46
|
53
|
# Calculate harmonics
|
47
|
54
|
wharmstart = thirtyoffset(weather.index.min())
|
48
|
|
- wharm = harmonic(weather.shape[0], period = 365.25 * 48, start = wharmstart,
|
49
|
|
- harmonics = harmonics)
|
|
55
|
+ wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart,
|
|
56
|
+ harmonics=harmonics)
|
50
|
57
|
|
51
|
58
|
# Weather harmonic column names in format for model
|
52
|
59
|
weatherharms = " + ".join(wharm.columns.tolist())
|
|
@@ -57,77 +64,124 @@ def fitWeather(weather, harmonics):
|
57
|
64
|
wharm['max_rolling'] = weather['max_rolling']
|
58
|
65
|
|
59
|
66
|
# Fitting models
|
60
|
|
- minmodel = smf.ols("min_rolling ~ " + weatherharms, data = wharm).fit()
|
61
|
|
- maxmodel = smf.ols("max_rolling ~ " + weatherharms, data = wharm).fit()
|
|
67
|
+ minmodel = smf.ols("min_rolling ~ " + weatherharms, data=wharm).fit()
|
|
68
|
+ maxmodel = smf.ols("max_rolling ~ " + weatherharms, data=wharm).fit()
|
62
|
69
|
|
63
|
70
|
# Residuals and fitted values
|
64
|
|
- sumdf = p.DataFrame(data = {
|
65
|
|
- "max_rolling" : wharm["max_rolling"],
|
66
|
|
- "max_resid" : maxmodel.resid,
|
67
|
|
- "max_fitted" : maxmodel.fittedvalues,
|
68
|
|
- "min_rolling" : wharm["min_rolling"],
|
69
|
|
- "min_resid" : minmodel.resid,
|
70
|
|
- "min_fitted" : minmodel.fittedvalues
|
|
71
|
+ sumdf = p.DataFrame(data={
|
|
72
|
+ "max_rolling": wharm["max_rolling"],
|
|
73
|
+ "max_resid": maxmodel.resid,
|
|
74
|
+ "max_fitted": maxmodel.fittedvalues,
|
|
75
|
+ "min_rolling": wharm["min_rolling"],
|
|
76
|
+ "min_resid": minmodel.resid,
|
|
77
|
+ "min_fitted": minmodel.fittedvalues
|
71
|
78
|
})
|
72
|
79
|
|
73
|
80
|
return minmodel, maxmodel, sumdf
|
74
|
81
|
|
75
|
|
-def predweather(model, datestart, dateend, harmonics = 2):
|
|
82
|
+
|
|
83
|
+def predweather(model, datestart, dateend, harmonics=2):
|
76
|
84
|
"""Predict weather values from a weather model and date range
|
77
|
85
|
Note: it's the residuals (true value - prediction) that go
|
78
|
86
|
into the demand model
|
79
|
87
|
"""
|
80
|
|
- drange = p.date_range(datestart, dateend, freq = "30min")
|
81
|
|
- pharm = harmonic(len(drange),
|
82
|
|
- period = 365.25 * 48,
|
83
|
|
- start = thirtyoffset(drange.min()),
|
84
|
|
- harmonics = harmonics)
|
|
88
|
+ drange = p.date_range(datestart, dateend, freq="30min")
|
|
89
|
+ pharm = harmonic(len(drange),
|
|
90
|
+ period=365.25 * 48,
|
|
91
|
+ start=thirtyoffset(drange.min()),
|
|
92
|
+ harmonics=harmonics)
|
85
|
93
|
pharm.index = drange
|
86
|
94
|
return model.predict(pharm)
|
87
|
95
|
|
88
|
|
-def fitdemand(df, harmonics = [2, 3, 3]):
|
89
|
|
- print(harmonics)
|
90
|
96
|
|
|
97
|
+def fitdemand(df, wmodsum, harmonics=[2, 3, 3]):
|
|
98
|
+ harmstart = thirtyoffset(df.index.min())
|
|
99
|
+ clusters = df.cluster.unique()
|
|
100
|
+ clusters.sort()
|
|
101
|
+ utv = df.index.unique()
|
|
102
|
+ harmlen = len(utv)
|
|
103
|
+
|
|
104
|
+ yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart,
|
|
105
|
+ harmonics=harmonics[0], prefix="y")
|
|
106
|
+ yharm.index = utv
|
|
107
|
+
|
|
108
|
+ wharm = harmonic(harmlen, period=7 * 48, start=harmstart,
|
|
109
|
+ harmonics=harmonics[1], prefix="w")
|
|
110
|
+ wharm.index = utv
|
|
111
|
+
|
|
112
|
+ dharm = harmonic(harmlen, period=48, start=harmstart,
|
|
113
|
+ harmonics=harmonics[2], prefix="d")
|
|
114
|
+ dharm.index = utv
|
91
|
115
|
|
|
116
|
+ y_params = " + ".join(yharm.columns.tolist())
|
|
117
|
+ w_params = " + ".join(wharm.columns.tolist())
|
|
118
|
+ d_params = " + ".join(dharm.columns.tolist())
|
|
119
|
+
|
|
120
|
+ hcomb = p.concat([yharm, wharm, dharm], axis=1)
|
|
121
|
+
|
|
122
|
+ modellist = {}
|
|
123
|
+
|
|
124
|
+ modelstring = f"""kwh_tot_mean ~ {y_params} + {w_params} + {d_params}
|
|
125
|
+ + ({y_params}):({d_params})
|
|
126
|
+ + ({w_params}):({d_params})
|
|
127
|
+ + max_resid + max_resid:({y_params})
|
|
128
|
+ + max_resid:({w_params}) + max_resid:({d_params})
|
|
129
|
+ + min_resid + min_resid:({y_params})
|
|
130
|
+ + min_resid:({w_params}) + min_resid:({d_params})
|
|
131
|
+ """.replace("\n", "").replace(" ", "")
|
|
132
|
+
|
|
133
|
+ for c in clusters:
|
|
134
|
+ dfc = df[df['cluster'] == c].join(hcomb,
|
|
135
|
+ how='left').join(wmodsum,
|
|
136
|
+ how='left')
|
|
137
|
+ # print(dfc)
|
|
138
|
+ cmod = smf.ols(modelstring, data=dfc).fit()
|
|
139
|
+ modellist[c] = cmod
|
|
140
|
+
|
|
141
|
+ return modellist
|
92
|
142
|
|
93
|
143
|
|
94
|
144
|
def main():
|
95
|
|
- parser = ArgumentParser(description='Harmonic models of all clusters in specified pickle')
|
96
|
|
- parser.add_argument("-i", "--input", dest="input",
|
97
|
|
- help = "input aggregated ICP pickle path",
|
98
|
|
- metavar="PATH", default = "../data/test1kagg.pkl",
|
99
|
|
- type = FileType('rb'))
|
100
|
|
- parser.add_argument("-w", "--weather", dest="weather",
|
101
|
|
- help = "input weather pickle path",
|
102
|
|
- metavar="PATH", default = "../data/weathertest.pkl",
|
103
|
|
- type = FileType('rb'))
|
|
145
|
+ parser = ArgumentParser(description='Harmonic models of all '
|
|
146
|
+ 'clusters in specified pickle')
|
|
147
|
+ parser.add_argument("-i", "--input", dest="input",
|
|
148
|
+ help="input aggregated ICP pickle path",
|
|
149
|
+ metavar="PATH", default="../data/test1kagg.pkl",
|
|
150
|
+ type=FileType('rb'))
|
|
151
|
+ parser.add_argument("-w", "--weather", dest="weather",
|
|
152
|
+ help="input weather pickle path",
|
|
153
|
+ metavar="PATH", default="../data/weathertest.pkl",
|
|
154
|
+ type=FileType('rb'))
|
|
155
|
+ parser.add_argument("-m", "--model-file", dest="model_file",
|
|
156
|
+ help="filename for model to store",
|
|
157
|
+ default="../models/testmod.pkl",
|
|
158
|
+ type=FileType('wb'))
|
104
|
159
|
parser.add_argument("--weather-harmonics", dest="weather_harmonics",
|
105
|
|
- help = "number of harmonics for weather; default: 2",
|
106
|
|
- type = int, default = 2, metavar = "NUM")
|
107
|
|
- parser.add_argument("--icp-harmonics", dest = "icp_harmonics", nargs=3,
|
108
|
|
- help = "harmonics for icp fitting, default 2 3 3",
|
109
|
|
- default = [2, 3, 3], type = int,
|
110
|
|
- metavar = "NUM")
|
|
160
|
+ help="number of harmonics for weather; default: 2",
|
|
161
|
+ type=int, default=2, metavar="NUM")
|
|
162
|
+ parser.add_argument("--icp-harmonics", dest="icp_harmonics", nargs=3,
|
|
163
|
+ help="harmonics for icp fitting, default 2 3 3",
|
|
164
|
+ default=[2, 3, 3], type=int,
|
|
165
|
+ metavar="NUM")
|
111
|
166
|
args = parser.parse_args()
|
112
|
|
-
|
113
|
|
- print(args)
|
114
|
167
|
|
115
|
|
- wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis = 1)
|
|
168
|
+ wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis=1)
|
116
|
169
|
|
117
|
170
|
icpdat = p.read_pickle(args.input).set_index('read_time')
|
118
|
171
|
|
119
|
172
|
minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
|
120
|
173
|
|
121
|
|
- icpdat = icpdat.join(sumdf, how = 'left')
|
122
|
|
-
|
123
|
|
- # fitdemand(icpdat, args
|
124
|
|
-
|
125
|
|
- # print(predweather(minm, "2019-01-01", "2019-02-01", args.weather_harmonics))
|
|
174
|
+ mods = {
|
|
175
|
+ "icp": fitdemand(icpdat, sumdf, args.icp_harmonics),
|
|
176
|
+ "min_temp": minm,
|
|
177
|
+ "max_temp": maxm
|
|
178
|
+ }
|
126
|
179
|
|
|
180
|
+ pickle.dump(mods, args.model_file)
|
127
|
181
|
|
128
|
|
-
|
129
|
182
|
args.input.close()
|
130
|
183
|
args.weather.close()
|
|
184
|
+ args.model_file.close()
|
131
|
185
|
|
132
|
186
|
|
133
|
187
|
if __name__ == "__main__":
|