Petra Lamborn 5 years ago
parent
commit
ad1f41ff87
1 changed files with 112 additions and 58 deletions
  1. 112
    58
      py/pymodels.py

+ 112
- 58
py/pymodels.py View File

@@ -2,19 +2,22 @@ from argparse import ArgumentParser, FileType
2 2
 import numpy as np
3 3
 import pandas as p
4 4
 import statsmodels.formula.api as smf
5
-import statsmodels.api as sm
6 5
 import datetime as dt
6
+import pickle
7 7
 
8 8
 epoch = dt.datetime(2017, 1, 1)
9 9
 
10
-def thirtyoffset(date, epoch = epoch):
11
-    """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time
10
+
11
+def thirtyoffset(date, epoch=epoch):
12
+    """Get the offset from the epoch, in multiples of 30 minutes,
13
+    for a particular date/time
12 14
     """
13 15
     return (date - epoch).total_seconds() / (60 * 30)
14 16
 
15
-def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
17
+
18
+def harmonic(length, period, start=0, harmonics=1, prefix=""):
16 19
     x = np.arange(length) + start
17
-    y = np.ndarray((harmonics * 2, length), dtype = np.float64)
20
+    y = np.ndarray((harmonics * 2, length), dtype=np.float64)
18 21
     ns = []
19 22
     nc = []
20 23
     for i in range(harmonics):
@@ -24,29 +27,33 @@ def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
24 27
         ns.append("{}sin{}".format(prefix, i + 1))
25 28
         y[i + harmonics] = yci
26 29
         nc.append("{}cos{}".format(prefix, i + 1))
27
-    y = p.DataFrame.from_records(y.transpose(), columns = ns + nc)
30
+    y = p.DataFrame.from_records(y.transpose(), columns=ns + nc)
28 31
     return y
29 32
 
33
+
30 34
 def fitWeather(weather, harmonics):
31
-    """Fit a weather model with a specified number of harmonics in order to detrend
32
-    Returns models for minimum and maximum rolling temperature, along with a summary dataframe
35
+    """Fit a weather model with a specified number of harmonics
36
+    in order to detrend Returns models for minimum and maximum
37
+    rolling temperature, along with a summary dataframe
33 38
     """
34 39
     # Make sure weather dataset is complete
35
-    wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq = "30 min")
36
-    wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
37
-    wanteddf = p.DataFrame(index = wantedtimes)
38
-    weather = wanteddf.join(weather.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
40
+    wantedtimes = p.date_range(weather.temp_timestamp.min(),
41
+                               weather.temp_timestamp.max(), freq="30 min")
42
+    wantedtimes = p.Series(wantedtimes.round(freq="30 min"))
43
+    wanteddf = p.DataFrame(index=wantedtimes)
44
+    weather = wanteddf.join(weather.set_index(['temp_timestamp']),
45
+                            how='left').drop('temp_date', axis=1)
39 46
     weather.index.name = "read_time"
40
-    weather = weather.fillna(method = 'ffill', axis = 0)
47
+    weather = weather.fillna(method='ffill', axis=0)
41 48
 
42 49
     # Add rolling columns
43
-    weather['min_rolling'] = weather['tmin_c'].rolling("1D").min() 
44
-    weather['max_rolling'] = weather['tmax_c'].rolling("1D").max() 
50
+    weather['min_rolling'] = weather['tmin_c'].rolling("1D").min()
51
+    weather['max_rolling'] = weather['tmax_c'].rolling("1D").max()
45 52
 
46 53
     # Calculate harmonics
47 54
     wharmstart = thirtyoffset(weather.index.min())
48
-    wharm = harmonic(weather.shape[0], period = 365.25 * 48, start = wharmstart, 
49
-                     harmonics = harmonics)
55
+    wharm = harmonic(weather.shape[0], period=365.25 * 48, start=wharmstart,
56
+                     harmonics=harmonics)
50 57
 
51 58
     # Weather harmonic column names in format for model
52 59
     weatherharms = " + ".join(wharm.columns.tolist())
@@ -57,77 +64,124 @@ def fitWeather(weather, harmonics):
57 64
     wharm['max_rolling'] = weather['max_rolling']
58 65
 
59 66
     # Fitting models
60
-    minmodel = smf.ols("min_rolling ~ " + weatherharms, data = wharm).fit()
61
-    maxmodel = smf.ols("max_rolling ~ " + weatherharms, data = wharm).fit()
67
+    minmodel = smf.ols("min_rolling ~ " + weatherharms, data=wharm).fit()
68
+    maxmodel = smf.ols("max_rolling ~ " + weatherharms, data=wharm).fit()
62 69
 
63 70
     # Residuals and fitted values
64
-    sumdf = p.DataFrame(data = {
65
-        "max_rolling" : wharm["max_rolling"],
66
-        "max_resid"   : maxmodel.resid,
67
-        "max_fitted"  : maxmodel.fittedvalues,
68
-        "min_rolling" : wharm["min_rolling"],
69
-        "min_resid"   : minmodel.resid,
70
-        "min_fitted"  : minmodel.fittedvalues
71
+    sumdf = p.DataFrame(data={
72
+        "max_rolling": wharm["max_rolling"],
73
+        "max_resid":   maxmodel.resid,
74
+        "max_fitted":  maxmodel.fittedvalues,
75
+        "min_rolling": wharm["min_rolling"],
76
+        "min_resid":   minmodel.resid,
77
+        "min_fitted":  minmodel.fittedvalues
71 78
     })
72 79
 
73 80
     return minmodel, maxmodel, sumdf
74 81
 
75
-def predweather(model, datestart, dateend, harmonics = 2):
82
+
83
+def predweather(model, datestart, dateend, harmonics=2):
76 84
     """Predict weather values from a weather model and date range
77 85
     Note: it's the residuals (true value - prediction) that go
78 86
     into the demand model
79 87
     """
80
-    drange = p.date_range(datestart, dateend, freq = "30min")
81
-    pharm = harmonic(len(drange), 
82
-                     period = 365.25 * 48, 
83
-                     start = thirtyoffset(drange.min()), 
84
-                     harmonics = harmonics)
88
+    drange = p.date_range(datestart, dateend, freq="30min")
89
+    pharm = harmonic(len(drange),
90
+                     period=365.25 * 48,
91
+                     start=thirtyoffset(drange.min()),
92
+                     harmonics=harmonics)
85 93
     pharm.index = drange
86 94
     return model.predict(pharm)
87 95
 
88
-def fitdemand(df, harmonics = [2, 3, 3]):
89
-    print(harmonics)
90 96
 
97
+def fitdemand(df, wmodsum, harmonics=[2, 3, 3]):
98
+    harmstart = thirtyoffset(df.index.min())
99
+    clusters = df.cluster.unique()
100
+    clusters.sort()
101
+    utv = df.index.unique()
102
+    harmlen = len(utv)
103
+
104
+    yharm = harmonic(harmlen, period=365.25 * 48, start=harmstart,
105
+                     harmonics=harmonics[0], prefix="y")
106
+    yharm.index = utv
107
+
108
+    wharm = harmonic(harmlen, period=7 * 48, start=harmstart,
109
+                     harmonics=harmonics[1], prefix="w")
110
+    wharm.index = utv
111
+
112
+    dharm = harmonic(harmlen, period=48, start=harmstart,
113
+                     harmonics=harmonics[2], prefix="d")
114
+    dharm.index = utv
91 115
 
116
+    y_params = " + ".join(yharm.columns.tolist())
117
+    w_params = " + ".join(wharm.columns.tolist())
118
+    d_params = " + ".join(dharm.columns.tolist())
119
+
120
+    hcomb = p.concat([yharm, wharm, dharm], axis=1)
121
+
122
+    modellist = {}
123
+
124
+    modelstring = f"""kwh_tot_mean ~ {y_params} + {w_params} + {d_params}
125
+                   + ({y_params}):({d_params})
126
+                   + ({w_params}):({d_params})
127
+                   + max_resid + max_resid:({y_params})
128
+                   + max_resid:({w_params}) + max_resid:({d_params})
129
+                   + min_resid + min_resid:({y_params})
130
+                   + min_resid:({w_params}) + min_resid:({d_params})
131
+                   """.replace("\n", "").replace("  ", "")
132
+
133
+    for c in clusters:
134
+        dfc = df[df['cluster'] == c].join(hcomb,
135
+                                          how='left').join(wmodsum,
136
+                                                           how='left')
137
+        # print(dfc)
138
+        cmod = smf.ols(modelstring, data=dfc).fit()
139
+        modellist[c] = cmod
140
+
141
+    return modellist
92 142
 
93 143
 
94 144
 def main():
95
-    parser = ArgumentParser(description='Harmonic models of all clusters in specified pickle')
96
-    parser.add_argument("-i", "--input",  dest="input",  
97
-                        help = "input aggregated ICP pickle path",  
98
-                        metavar="PATH", default = "../data/test1kagg.pkl",
99
-                        type = FileType('rb'))
100
-    parser.add_argument("-w", "--weather",  dest="weather",  
101
-                        help = "input weather pickle path",  
102
-                        metavar="PATH", default = "../data/weathertest.pkl",
103
-                        type = FileType('rb'))
145
+    parser = ArgumentParser(description='Harmonic models of all '
146
+                            'clusters in specified pickle')
147
+    parser.add_argument("-i", "--input",  dest="input",
148
+                        help="input aggregated ICP pickle path",
149
+                        metavar="PATH", default="../data/test1kagg.pkl",
150
+                        type=FileType('rb'))
151
+    parser.add_argument("-w", "--weather",  dest="weather",
152
+                        help="input weather pickle path",
153
+                        metavar="PATH", default="../data/weathertest.pkl",
154
+                        type=FileType('rb'))
155
+    parser.add_argument("-m", "--model-file", dest="model_file",
156
+                        help="filename for model to store",
157
+                        default="../models/testmod.pkl",
158
+                        type=FileType('wb'))
104 159
     parser.add_argument("--weather-harmonics", dest="weather_harmonics",
105
-                        help = "number of harmonics for weather; default: 2",
106
-                        type = int, default = 2, metavar = "NUM")
107
-    parser.add_argument("--icp-harmonics", dest = "icp_harmonics", nargs=3,
108
-                        help = "harmonics for icp fitting, default 2 3 3",
109
-                        default = [2, 3, 3], type = int,
110
-                        metavar = "NUM")
160
+                        help="number of harmonics for weather; default: 2",
161
+                        type=int, default=2, metavar="NUM")
162
+    parser.add_argument("--icp-harmonics", dest="icp_harmonics", nargs=3,
163
+                        help="harmonics for icp fitting, default 2 3 3",
164
+                        default=[2, 3, 3], type=int,
165
+                        metavar="NUM")
111 166
     args = parser.parse_args()
112
-    
113
-    print(args)
114 167
 
115
-    wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis = 1)
168
+    wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis=1)
116 169
 
117 170
     icpdat = p.read_pickle(args.input).set_index('read_time')
118 171
 
119 172
     minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
120 173
 
121
-    icpdat = icpdat.join(sumdf, how = 'left')
122
-
123
-    # fitdemand(icpdat, args
124
-
125
-    # print(predweather(minm, "2019-01-01", "2019-02-01", args.weather_harmonics))
174
+    mods = {
175
+        "icp": fitdemand(icpdat, sumdf, args.icp_harmonics),
176
+        "min_temp": minm,
177
+        "max_temp": maxm
178
+    }
126 179
 
180
+    pickle.dump(mods, args.model_file)
127 181
 
128
-    
129 182
     args.input.close()
130 183
     args.weather.close()
184
+    args.model_file.close()
131 185
 
132 186
 
133 187
 if __name__ == "__main__":