Browse Source

Graph predictions

Petra Lamborn 5 years ago
parent
commit
e0bf20d2bd
4 changed files with 80 additions and 8 deletions
  1. 42
    4
      R/combmodels.R
  2. 2
    2
      py/downkwh.py
  3. 34
    0
      py/projprocess.py
  4. 2
    2
      py/util.py

+ 42
- 4
R/combmodels.R View File

@@ -21,15 +21,22 @@ mtempdf <- read.csv("../data/weatherharm.csv", stringsAsFactors = FALSE) %>%
21 21
     mutate(x = as.POSIXct(x, tz = "UTC")) %>%
22 22
     rename(read_time = x, rollingmin = y, fitmin = f, resmin = r)
23 23
 str(mtempdf)
24
+sns <- import("seaborn")
25
+cbp <- as.character(p$Series(sns$color_palette("colorblind", as.integer(9))$as_hex()))
24 26
 
25 27
 ntps <- length(unique(aggdf$read_time))
26 28
 
27 29
 clus = "9"
28 30
 
31
+yfreq <- floor(48 * 365.25)
32
+wfreq <- floor(48 * 7)
33
+dfreq <- floor(48)
34
+harmonics <- c(2, 3, 3)
35
+
29 36
 
30
-harm.y <- ts(1:ntps, frequency = floor(48 * 365.25)) %>% harmonic(2)
31
-harm.w <- ts(1:ntps, frequency = floor(48 * 7))      %>% harmonic(3)
32
-harm.d <- ts(1:ntps, frequency = floor(48))          %>% harmonic(3)
37
+harm.y <- ts(1:ntps, frequency = yfreq) %>% harmonic(harmonics[1])
38
+harm.w <- ts(1:ntps, frequency = wfreq) %>% harmonic(harmonics[2])
39
+harm.d <- ts(1:ntps, frequency = dfreq) %>% harmonic(harmonics[3])
33 40
 colnames(harm.y) <- sprintf("%s.%s.%s", "year", rep(c("cos", "sin"), each = ncol(harm.y)/2), rep(1:(ncol(harm.y)/2), times = 2))
34 41
 colnames(harm.w) <- sprintf("%s.%s.%s", "week", rep(c("cos", "sin"), each = ncol(harm.w)/2), rep(1:(ncol(harm.w)/2), times = 2))
35 42
 colnames(harm.d) <- sprintf("%s.%s.%s", "day",  rep(c("cos", "sin"), each = ncol(harm.d)/2), rep(1:(ncol(harm.d)/2), times = 2))
@@ -72,9 +79,40 @@ cmplot <-ggplot(cmdf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue",
72 79
 
73 80
 cmplot
74 81
 
75
-cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-01", tz = "UTC")))
82
+cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-03-01", tz = "UTC"), as.POSIXct("2017-04-01", tz = "UTC")))
76 83
 
77 84
 # sres <- stepAIC(charmmod, scope = list(upper = nform.full, lower = nform.min),
78 85
 #                 direction = "both", steps = 300)
79 86
 
80 87
 
88
+newagg <- p$read_pickle("../data/9-proj-agg.pkl")
89
+newagg$cluster <- factor(newagg$cluster)
90
+str(newagg)
91
+
92
+ptps <- length(unique(newagg$read_time))
93
+perdiff <- as.numeric(min(newagg$read_time) - min(aggdf$read_time), units = "mins") / 30
94
+
95
+pharm.y <- ts(1:ptps, frequency = yfreq, start = c(perdiff %/% yfreq + 1, perdiff %% yfreq + 1)) %>% harmonic(harmonics[1])
96
+pharm.w <- ts(1:ptps, frequency = wfreq, start = c(perdiff %/% wfreq + 1, perdiff %% wfreq + 1)) %>% harmonic(harmonics[2])
97
+pharm.d <- ts(1:ptps, frequency = dfreq, start = c(perdiff %/% dfreq + 1, perdiff %% dfreq + 1)) %>% harmonic(harmonics[3])
98
+colnames(pharm.y) <- sprintf("%s.%s.%s", "year", rep(c("cos", "sin"), each = ncol(pharm.y)/2), rep(1:(ncol(pharm.y)/2), times = 2))
99
+colnames(pharm.w) <- sprintf("%s.%s.%s", "week", rep(c("cos", "sin"), each = ncol(pharm.w)/2), rep(1:(ncol(pharm.w)/2), times = 2))
100
+colnames(pharm.d) <- sprintf("%s.%s.%s", "day",  rep(c("cos", "sin"), each = ncol(pharm.d)/2), rep(1:(ncol(pharm.d)/2), times = 2))
101
+
102
+pclusdf <- filter(newagg, cluster == clus) %>% 
103
+    dplyr::select(read_time, kwh = kwh_tot_mean) %>% 
104
+    left_join(mtempdf, by = "read_time") %>% cbind(pharm.y, pharm.w, pharm.d)
105
+str(pclusdf)
106
+
107
+ptestdata <- dplyr::select(pclusdf, -kwh)
108
+str(ptestdata)
109
+
110
+predvals <- predict(charmmod, ptestdata)
111
+
112
+predf <- data.frame(x = pclusdf$read_time, y = pclusdf$kwh, f = predvals, r = pclusdf$kwh - predvals)
113
+predplot <-ggplot(predf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue", size = 2) + geom_point() +
114
+    geom_point(aes(y = r), color = "darkgreen")
115
+
116
+predplot
117
+
118
+predplot + coord_cartesian(xlim = c(as.POSIXct("2018-03-01", tz = "UTC"), as.POSIXct("2018-04-01", tz = "UTC")))

+ 2
- 2
py/downkwh.py View File

@@ -39,9 +39,9 @@ import seaborn as sns
39 39
 # 
40 40
 # pickleQuery(query, "../data/jan19s.pkl")
41 41
 
42
-kwhdata = getkwh('2016-04-01', '2018-01-01', '2016-04-01 00:30:00', '2018-01-01 00:00:00', '%%1')
42
+kwhdata = getkwh('2018-01-01', '2018-04-01', '2018-01-01 00:30:00', '2018-04-01 00:00:00', '%%1')
43 43
 
44 44
 print(kwhdata.info())
45 45
 
46 46
 print("Pickling")
47
-kwhdata.to_pickle("../data/2016-17-sample.pkl")
47
+kwhdata.to_pickle("../data/2018-proj-sample.pkl")

+ 34
- 0
py/projprocess.py View File

@@ -0,0 +1,34 @@
1
+# This file simply takes future kwh data for the thousand previously 
2
+# sampled ICPs and calculates new aggregated measures for each cluster
3
+
4
+import pandas as p
5
+
6
+# Load new data
7
+newdat = p.read_pickle("../data/2018-proj-sample.pkl")
8
+print(newdat.info())
9
+
10
+# Get cluseters dataframe
11
+clusters = p.read_pickle("../data/9-clusters.pkl")
12
+print(clusters)
13
+print(clusters.info())
14
+clusters = clusters.drop(['read_time', 'kwh_tot'], 
15
+                         axis = 1).drop_duplicates().reset_index(drop = True)
16
+
17
+# Join dataframes
18
+newdat = clusters.set_index('icp_id').join(newdat.set_index('icp_id'), how = 'left').reset_index()
19
+
20
+print(newdat)
21
+print(newdat.info())
22
+
23
+# Aggregate median and mean (only really want mean)
24
+newagg = newdat.groupby(['read_time', 'cluster']).agg({
25
+        'kwh_tot': ['median', 'mean']
26
+})
27
+newagg.columns = ['_'.join(x) for x in newagg.columns.ravel()]
28
+newagg = newagg.reset_index()
29
+
30
+print(newagg)
31
+print(newagg.info())
32
+
33
+# Save data
34
+newagg.to_pickle("../data/9-proj-agg.pkl")

+ 2
- 2
py/util.py View File

@@ -79,7 +79,7 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
79 79
             SELECT read_time 
80 80
             FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, 
81 81
                 '30 minutes'::interval) read_time
82
-        ) AS tsdata CROSS JOIN public.icp_sample_1618
82
+        ) AS tsdata CROSS JOIN public.icp_sample
83 83
     ) AS comb
84 84
     LEFT JOIN
85 85
     (
@@ -97,7 +97,7 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
97 97
              and   a.read_date <  to_date(%(dateend)s,'yyyy-mm-dd')
98 98
              and   a.content_code  ~ ('UN|CN|EG')
99 99
              AND   a.icp_id IN (
100
-                SELECT icp_id FROM public.icp_sample_1618
100
+                SELECT icp_id FROM public.icp_sample
101 101
              )
102 102
             GROUP BY 1, 2, 3
103 103
         ) AS coup_tall