Browse Source

Workable harmonic/weather model

Petra Lamborn 5 years ago
parent
commit
c82da5a058
1 changed files with 80 additions and 0 deletions
  1. 80
    0
      R/combmodels.R

+ 80
- 0
R/combmodels.R View File

@@ -0,0 +1,80 @@
1
+# Combined models
2
+# Continuation of clusterviz.R and weathmod.R
3
+
4
+library(TSA)
5
+library(caTools)
6
+library(dplyr)
7
+library(ggplot2)
8
+library(reticulate)
9
+library(tidyr)
10
+library(MASS)
11
+theme_set(theme_bw())
12
+use_virtualenv("../venv/")
13
+
14
+p <- import("pandas")
15
+sns <- import("seaborn")
16
+aggdf <- p$read_pickle("../data/9-clusters.agg.pkl")
17
+aggdf$cluster <- factor(aggdf$cluster)
18
+clusters <- levels(aggdf$cluster)
19
+str(aggdf)
20
+mtempdf <- read.csv("../data/weatherharm.csv", stringsAsFactors = FALSE) %>% 
21
+    mutate(x = as.POSIXct(x, tz = "UTC")) %>%
22
+    rename(read_time = x, rollingmin = y, fitmin = f, resmin = r)
23
+str(mtempdf)
24
+
25
+ntps <- length(unique(aggdf$read_time))
26
+
27
+clus = "9"
28
+
29
+
30
+harm.y <- ts(1:ntps, frequency = floor(48 * 365.25)) %>% harmonic(2)
31
+harm.w <- ts(1:ntps, frequency = floor(48 * 7))      %>% harmonic(3)
32
+harm.d <- ts(1:ntps, frequency = floor(48))          %>% harmonic(3)
33
+colnames(harm.y) <- sprintf("%s.%s.%s", "year", rep(c("cos", "sin"), each = ncol(harm.y)/2), rep(1:(ncol(harm.y)/2), times = 2))
34
+colnames(harm.w) <- sprintf("%s.%s.%s", "week", rep(c("cos", "sin"), each = ncol(harm.w)/2), rep(1:(ncol(harm.w)/2), times = 2))
35
+colnames(harm.d) <- sprintf("%s.%s.%s", "day",  rep(c("cos", "sin"), each = ncol(harm.d)/2), rep(1:(ncol(harm.d)/2), times = 2))
36
+
37
+clusdf <- filter(aggdf, cluster == clus) %>% 
38
+    dplyr::select(read_time, kwh = kwh_tot_mean) %>% 
39
+    left_join(mtempdf, by = "read_time") %>% cbind(harm.y, harm.w, harm.d)
40
+str(clusdf)
41
+
42
+ycols <- paste(colnames(harm.y), collapse = " + ") 
43
+wcols <- paste(colnames(harm.w), collapse = " + ") 
44
+dcols <- paste(colnames(harm.d), collapse = " + ") 
45
+
46
+nform.full <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s) + (%s):(%s) + resmin + resmin:(%s) + resmin:(%s) + resmin:(%s)", 
47
+        ycols, wcols, dcols, ycols, wcols, ycols, dcols, wcols, dcols, ycols, wcols, dcols) %>% formula()
48
+nform.comp <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s) + resmin + resmin:(%s) + resmin:(%s) + resmin:(%s)", 
49
+        ycols, wcols, dcols, ycols, dcols, wcols, dcols, ycols, wcols, dcols) %>% formula()
50
+nform.now <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s)", 
51
+        ycols, wcols, dcols, ycols, dcols, wcols, dcols) %>% formula()
52
+nform.min <- formula("kwh ~ 1")
53
+nform.start <- sprintf("kwh ~ %s + %s + %s + resmin", 
54
+        ycols, wcols, dcols) %>% formula()
55
+
56
+# charmmod <- lm(kwh ~ resmin + harm.y * harm.w * harm.d + resmin:harm.y, data = clusdf)
57
+charmmod <- lm(nform.comp, data = clusdf)
58
+# charmmod <- lm(nform.full, data = clusdf)
59
+# charmmod <- lm(kwh ~ ., data = clusdf)
60
+summary(charmmod)
61
+
62
+mean(abs(lm(nform.now,  data = clusdf)$residuals))
63
+mean(abs(lm(nform.comp, data = clusdf)$residuals))
64
+mean(abs(lm(nform.full, data = clusdf)$residuals))
65
+sd(lm(nform.now,  data = clusdf)$residuals)
66
+sd(lm(nform.comp, data = clusdf)$residuals)
67
+sd(lm(nform.full, data = clusdf)$residuals)
68
+
69
+cmdf <- data.frame(x = clusdf$read_time, y = clusdf$kwh, f = fitted(charmmod), r = resid(charmmod))
70
+cmplot <-ggplot(cmdf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue", size = 2) + geom_point() +
71
+    geom_point(aes(y = r), color = "darkgreen")
72
+
73
+cmplot
74
+
75
+cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-01", tz = "UTC")))
76
+
77
+# sres <- stepAIC(charmmod, scope = list(upper = nform.full, lower = nform.min),
78
+#                 direction = "both", steps = 300)
79
+
80
+