Browse Source

Working on seasonality; very much a WIP

Petra Lamborn 5 years ago
parent
commit
3e423aea81
3 changed files with 55 additions and 0 deletions
  1. 55
    0
      R/clusterviz.R
  2. BIN
      img/full-autocorr.png
  3. BIN
      img/week-autocorr.png

+ 55
- 0
R/clusterviz.R View File

@@ -1,6 +1,9 @@
1 1
 library(reticulate)
2 2
 library(ggplot2)
3 3
 library(dplyr)
4
+library(tidyr)
5
+library(TSA)
6
+library(forecast)
4 7
 theme_set(theme_bw())
5 8
 use_virtualenv("../venv/")
6 9
 
@@ -11,6 +14,7 @@ aggdf <- p$read_pickle("../data/9-clusters.agg.pkl")
11 14
 aggdf <- as.data.frame(aggdf)
12 15
 aggdf$cluster <- factor(aggdf$cluster)
13 16
 str(aggdf)
17
+clusters = levels(aggdf$cluster)
14 18
 
15 19
 ggplot(aggdf, aes(y = kwh_tot_mean, x = cluster)) + geom_boxplot()
16 20
 
@@ -91,3 +95,54 @@ ggsave("jul-9-fre.png", julfre, path = "../img/", dpi = "retina", width = 40, he
91 95
 ggsave("oct-9-fix.png", octcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
92 96
 ggsave("oct-9-fre.png", octfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
93 97
 
98
+
99
+# ----
100
+
101
+
102
+cacf <- list()
103
+perd <- list()
104
+
105
+for (c in clusters) {
106
+    cagg <- filter(aggdf, cluster == c)
107
+    cacf[[c]] <- acf(cagg$kwh_tot_mean, lag.max = 48*365, plot = FALSE)$acf
108
+    per <- periodogram(cagg$kwh_tot_mean, plot = FALSE)
109
+    perd[[c]] <- data.frame(freq = per$freq, spec = per$spec) %>% mutate(cluster = c, period = (1/freq)) 
110
+    #%>% arrange(desc(spec)) %>% head(5)
111
+}
112
+
113
+acfm <- sapply(cacf, as.numeric) %>% as.data.frame() %>% mutate(hour = ((1:length(`1`)) - 1)/2) %>% 
114
+    gather(key = "cluster", value = "acorr", clusters) %>% mutate(day = hour / 24, week = hour / (24 * 7))
115
+
116
+
117
+fcorr <- ggplot(acfm, aes(x = week, y = acorr, color = cluster)) + geom_line(size = 1.5) +
118
+    scale_color_manual(values = cbp) + facet_grid(cluster ~ .) + coord_cartesian(expand = FALSE) +
119
+    theme(legend.position = "none") + labs(title = "Autocorrelation plot (full year)", 
120
+                                           y = "Autocorrelation", x = "lag (weeks)")
121
+
122
+wcorr <- ggplot(acfm, aes(x = day, y = acorr, color = cluster)) + geom_line(size = 1.5) +
123
+    scale_color_manual(values = cbp) + facet_grid(cluster ~ .) +
124
+    scale_x_continuous(breaks = unique(floor(acfm$day / 7)) * 7) +
125
+    theme(legend.position = "none") + coord_cartesian(xlim = c(0, 15), expand = FALSE) +
126
+    labs(title = "Autocorrelation plot (two weeks)", y = "Autocorrelation", x = "lag (days)")
127
+
128
+ggsave("full-autocorr.png", fcorr, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
129
+ggsave("week-autocorr.png", wcorr, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
130
+
131
+perd <- bind_rows(perd)
132
+
133
+ggplot(perd, aes(x = freq, y = spec)) + 
134
+    geom_line() + facet_grid(cluster ~ ., scales = "free") +
135
+    scale_x_continuous(breaks = 1 / (c(48, 48*7, 48*7*4, 48*365)))
136
+
137
+c1ts <- filter(aggdf, cluster == "1")$kwh_tot_mean
138
+cts <- ts(c1ts, frequency = 48, start = c(1, 1))
139
+# carima <- auto.arima(cts, trace = TRUE)
140
+# plot(forecast(carima, h = 480))
141
+ctsnp <- msts(c1ts, c(48, 48*7))
142
+ctbats <- tbats(ctsnp)
143
+plot(forecast(ctbats, h = 48 * 7 * 4))
144
+
145
+c9ts <- filter(aggdf, cluster == "9")$kwh_tot_mean
146
+ctsnp <- msts(c9ts, c(48, 48*7))
147
+ctbats <- tbats(ctsnp)
148
+plot(forecast(ctbats, h = 48 * 7 * 4))

BIN
img/full-autocorr.png View File


BIN
img/week-autocorr.png View File