Browse Source

More models in python

Petra Lamborn 5 years ago
parent
commit
4366c84fa3
1 changed files with 88 additions and 21 deletions
  1. 88
    21
      py/pymodels.py

+ 88
- 21
py/pymodels.py View File

@@ -2,10 +2,16 @@ from argparse import ArgumentParser, FileType
2 2
 import numpy as np
3 3
 import pandas as p
4 4
 import statsmodels.formula.api as smf
5
+import statsmodels.api as sm
5 6
 import datetime as dt
6 7
 
7 8
 epoch = dt.datetime(2017, 1, 1)
8 9
 
10
+def thirtyoffset(date, epoch = epoch):
11
+    """Get the offset from the epoch, in multiples of 30 minutes, for a particular date/time
12
+    """
13
+    return (date - epoch).total_seconds() / (60 * 30)
14
+
9 15
 def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
10 16
     x = np.arange(length) + start
11 17
     y = np.ndarray((harmonics * 2, length), dtype = np.float64)
@@ -21,43 +27,104 @@ def harmonic(length, period, start = 0, harmonics = 1, prefix = ""):
21 27
     y = p.DataFrame.from_records(y.transpose(), columns = ns + nc)
22 28
     return y
23 29
 
30
+def fitWeather(weather, harmonics):
31
+    """Fit a weather model with a specified number of harmonics in order to detrend
32
+    Returns models for minimum and maximum rolling temperature, along with a summary dataframe
33
+    """
34
+    # Make sure weather dataset is complete
35
+    wantedtimes = p.date_range(weather.temp_timestamp.min(), weather.temp_timestamp.max(), freq = "30 min")
36
+    wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
37
+    wanteddf = p.DataFrame(index = wantedtimes)
38
+    weather = wanteddf.join(weather.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
39
+    weather.index.name = "read_time"
40
+    weather = weather.fillna(method = 'ffill', axis = 0)
41
+
42
+    # Add rolling columns
43
+    weather['min_rolling'] = weather['tmin_c'].rolling("1D").min() 
44
+    weather['max_rolling'] = weather['tmax_c'].rolling("1D").max() 
45
+
46
+    # Calculate harmonics
47
+    wharmstart = thirtyoffset(weather.index.min())
48
+    wharm = harmonic(weather.shape[0], period = 365.25 * 48, start = wharmstart, 
49
+                     harmonics = harmonics)
50
+
51
+    # Weather harmonic column names in format for model
52
+    weatherharms = " + ".join(wharm.columns.tolist())
53
+
54
+    # Set up dataframe for modeling
55
+    wharm.index = weather.index
56
+    wharm['min_rolling'] = weather['min_rolling']
57
+    wharm['max_rolling'] = weather['max_rolling']
58
+
59
+    # Fitting models
60
+    minmodel = smf.ols("min_rolling ~ " + weatherharms, data = wharm).fit()
61
+    maxmodel = smf.ols("max_rolling ~ " + weatherharms, data = wharm).fit()
62
+
63
+    # Residuals and fitted values
64
+    sumdf = p.DataFrame(data = {
65
+        "max_rolling" : wharm["max_rolling"],
66
+        "max_resid"   : maxmodel.resid,
67
+        "max_fitted"  : maxmodel.fittedvalues,
68
+        "min_rolling" : wharm["min_rolling"],
69
+        "min_resid"   : minmodel.resid,
70
+        "min_fitted"  : minmodel.fittedvalues
71
+    })
72
+
73
+    return minmodel, maxmodel, sumdf
74
+
75
+def predweather(model, datestart, dateend, harmonics = 2):
76
+    """Predict weather values from a weather model and date range
77
+    Note: it's the residuals (true value - prediction) that go
78
+    into the demand model
79
+    """
80
+    drange = p.date_range(datestart, dateend, freq = "30min")
81
+    pharm = harmonic(len(drange), 
82
+                     period = 365.25 * 48, 
83
+                     start = thirtyoffset(drange.min()), 
84
+                     harmonics = harmonics)
85
+    pharm.index = drange
86
+    return model.predict(pharm)
87
+
88
+def fitdemand(df, harmonics = [2, 3, 3]):
89
+    print(harmonics)
90
+
91
+
92
+
24 93
 
25 94
 def main():
26 95
     parser = ArgumentParser(description='Harmonic models of all clusters in specified pickle')
27 96
     parser.add_argument("-i", "--input",  dest="input",  
28 97
                         help = "input aggregated ICP pickle path",  
29
-                        metavar="PATH", required = True,
98
+                        metavar="PATH", default = "../data/test1kagg.pkl",
30 99
                         type = FileType('rb'))
31 100
     parser.add_argument("-w", "--weather",  dest="weather",  
32 101
                         help = "input weather pickle path",  
33
-                        metavar="PATH", required = True,
102
+                        metavar="PATH", default = "../data/weathertest.pkl",
34 103
                         type = FileType('rb'))
35 104
     parser.add_argument("--weather-harmonics", dest="weather_harmonics",
36 105
                         help = "number of harmonics for weather; default: 2",
37
-                        type = int, default = 2)
106
+                        type = int, default = 2, metavar = "NUM")
107
+    parser.add_argument("--icp-harmonics", dest = "icp_harmonics", nargs=3,
108
+                        help = "harmonics for icp fitting, default 2 3 3",
109
+                        default = [2, 3, 3], type = int,
110
+                        metavar = "NUM")
38 111
     args = parser.parse_args()
39 112
     
40
-    # print(args)
113
+    print(args)
41 114
 
42
-    # print(harmonic(17, 8, 0, 2, "test"))
43
-    # print(epoch)
44
-
45
-    # print(p.read_pickle(args.input))
46 115
     wdat = p.read_pickle(args.weather).drop(['record_no', 'station'], axis = 1)
47 116
 
48
-    wantedtimes = p.date_range(wdat.temp_timestamp.min(), wdat.temp_timestamp.max(), freq = "30 min")
49
-    wantedtimes = p.Series(wantedtimes.round(freq = "30 min"))
50
-    wanteddf = p.DataFrame(index = wantedtimes)
51
-    wdat = wanteddf.join(wdat.set_index(['temp_timestamp']), how = 'left').drop('temp_date', axis = 1)
52
-    wdat.index.name = "read_time"
53
-    wdat = wdat.fillna(method = 'ffill', axis = 0)
54
-    wdat['min_rolling'] = wdat['tmin_c'].rolling("1D").min() 
55
-    wdat['max_rolling'] = wdat['tmax_c'].rolling("1D").max() 
56
-    print(wdat)
57
-    wharmstart = (wdat.index.min() - epoch).total_seconds() / (60 * 30)
58
-    wharm = harmonic(wdat.shape[0], period = 365.25, start = wharmstart, 
59
-                     harmonics = args.weather_harmonics)
60
-    print(wharm)
117
+    icpdat = p.read_pickle(args.input).set_index('read_time')
118
+
119
+    minm, maxm, sumdf = fitWeather(wdat, args.weather_harmonics)
120
+
121
+    icpdat = icpdat.join(sumdf, how = 'left')
122
+
123
+    # fitdemand(icpdat, args
124
+
125
+    # print(predweather(minm, "2019-01-01", "2019-02-01", args.weather_harmonics))
126
+
127
+
61 128
     
62 129
     args.input.close()
63 130
     args.weather.close()