|
@@ -0,0 +1,80 @@
|
|
1
|
+# Combined models
|
|
2
|
+# Continuation of clusterviz.R and weathmod.R
|
|
3
|
+
|
|
4
|
+library(TSA)
|
|
5
|
+library(caTools)
|
|
6
|
+library(dplyr)
|
|
7
|
+library(ggplot2)
|
|
8
|
+library(reticulate)
|
|
9
|
+library(tidyr)
|
|
10
|
+library(MASS)
|
|
11
|
+theme_set(theme_bw())
|
|
12
|
+use_virtualenv("../venv/")
|
|
13
|
+
|
|
14
|
+p <- import("pandas")
|
|
15
|
+sns <- import("seaborn")
|
|
16
|
+aggdf <- p$read_pickle("../data/9-clusters.agg.pkl")
|
|
17
|
+aggdf$cluster <- factor(aggdf$cluster)
|
|
18
|
+clusters <- levels(aggdf$cluster)
|
|
19
|
+str(aggdf)
|
|
20
|
+mtempdf <- read.csv("../data/weatherharm.csv", stringsAsFactors = FALSE) %>%
|
|
21
|
+ mutate(x = as.POSIXct(x, tz = "UTC")) %>%
|
|
22
|
+ rename(read_time = x, rollingmin = y, fitmin = f, resmin = r)
|
|
23
|
+str(mtempdf)
|
|
24
|
+
|
|
25
|
+ntps <- length(unique(aggdf$read_time))
|
|
26
|
+
|
|
27
|
+clus = "9"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+harm.y <- ts(1:ntps, frequency = floor(48 * 365.25)) %>% harmonic(2)
|
|
31
|
+harm.w <- ts(1:ntps, frequency = floor(48 * 7)) %>% harmonic(3)
|
|
32
|
+harm.d <- ts(1:ntps, frequency = floor(48)) %>% harmonic(3)
|
|
33
|
+colnames(harm.y) <- sprintf("%s.%s.%s", "year", rep(c("cos", "sin"), each = ncol(harm.y)/2), rep(1:(ncol(harm.y)/2), times = 2))
|
|
34
|
+colnames(harm.w) <- sprintf("%s.%s.%s", "week", rep(c("cos", "sin"), each = ncol(harm.w)/2), rep(1:(ncol(harm.w)/2), times = 2))
|
|
35
|
+colnames(harm.d) <- sprintf("%s.%s.%s", "day", rep(c("cos", "sin"), each = ncol(harm.d)/2), rep(1:(ncol(harm.d)/2), times = 2))
|
|
36
|
+
|
|
37
|
+clusdf <- filter(aggdf, cluster == clus) %>%
|
|
38
|
+ dplyr::select(read_time, kwh = kwh_tot_mean) %>%
|
|
39
|
+ left_join(mtempdf, by = "read_time") %>% cbind(harm.y, harm.w, harm.d)
|
|
40
|
+str(clusdf)
|
|
41
|
+
|
|
42
|
+ycols <- paste(colnames(harm.y), collapse = " + ")
|
|
43
|
+wcols <- paste(colnames(harm.w), collapse = " + ")
|
|
44
|
+dcols <- paste(colnames(harm.d), collapse = " + ")
|
|
45
|
+
|
|
46
|
+nform.full <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s) + (%s):(%s) + resmin + resmin:(%s) + resmin:(%s) + resmin:(%s)",
|
|
47
|
+ ycols, wcols, dcols, ycols, wcols, ycols, dcols, wcols, dcols, ycols, wcols, dcols) %>% formula()
|
|
48
|
+nform.comp <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s) + resmin + resmin:(%s) + resmin:(%s) + resmin:(%s)",
|
|
49
|
+ ycols, wcols, dcols, ycols, dcols, wcols, dcols, ycols, wcols, dcols) %>% formula()
|
|
50
|
+nform.now <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s)",
|
|
51
|
+ ycols, wcols, dcols, ycols, dcols, wcols, dcols) %>% formula()
|
|
52
|
+nform.min <- formula("kwh ~ 1")
|
|
53
|
+nform.start <- sprintf("kwh ~ %s + %s + %s + resmin",
|
|
54
|
+ ycols, wcols, dcols) %>% formula()
|
|
55
|
+
|
|
56
|
+# charmmod <- lm(kwh ~ resmin + harm.y * harm.w * harm.d + resmin:harm.y, data = clusdf)
|
|
57
|
+charmmod <- lm(nform.comp, data = clusdf)
|
|
58
|
+# charmmod <- lm(nform.full, data = clusdf)
|
|
59
|
+# charmmod <- lm(kwh ~ ., data = clusdf)
|
|
60
|
+summary(charmmod)
|
|
61
|
+
|
|
62
|
+mean(abs(lm(nform.now, data = clusdf)$residuals))
|
|
63
|
+mean(abs(lm(nform.comp, data = clusdf)$residuals))
|
|
64
|
+mean(abs(lm(nform.full, data = clusdf)$residuals))
|
|
65
|
+sd(lm(nform.now, data = clusdf)$residuals)
|
|
66
|
+sd(lm(nform.comp, data = clusdf)$residuals)
|
|
67
|
+sd(lm(nform.full, data = clusdf)$residuals)
|
|
68
|
+
|
|
69
|
+cmdf <- data.frame(x = clusdf$read_time, y = clusdf$kwh, f = fitted(charmmod), r = resid(charmmod))
|
|
70
|
+cmplot <-ggplot(cmdf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue", size = 2) + geom_point() +
|
|
71
|
+ geom_point(aes(y = r), color = "darkgreen")
|
|
72
|
+
|
|
73
|
+cmplot
|
|
74
|
+
|
|
75
|
+cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-01", tz = "UTC")))
|
|
76
|
+
|
|
77
|
+# sres <- stepAIC(charmmod, scope = list(upper = nform.full, lower = nform.min),
|
|
78
|
+# direction = "both", steps = 300)
|
|
79
|
+
|
|
80
|
+
|