|
@@ -1,3 +1,16 @@
|
|
1
|
+#!/usr/bin/env Rscript
|
|
2
|
+library(argparse)
|
|
3
|
+
|
|
4
|
+parser <- ArgumentParser(description="Create plots for aggregated cluster data")
|
|
5
|
+parser$add_argument("cluster_file", help = "file to visualise")
|
|
6
|
+parser$add_argument("-i", "--img-path", dest = "img_path", default = "../img/", help = "path to store plots in; default: ../img/")
|
|
7
|
+parser$add_argument("-p", "--postfix", dest = "postfix", default = "_plot", help = "postfix for files, default: _plot")
|
|
8
|
+parser$add_argument("-v", "--virtualenv", dest = "virtualenv", default = "../venv/", help = "path of virtualenv; default: ../venv/; '-' for none")
|
|
9
|
+parser$add_argument("--width", dest = "width", default = 40, help = "width (cm), default 40", type = "double")
|
|
10
|
+parser$add_argument("--height", dest = "height", default = 25, help = "height (cm), default 25", type = "double")
|
|
11
|
+
|
|
12
|
+args <- parser$parse_args()
|
|
13
|
+
|
1
|
14
|
library(reticulate)
|
2
|
15
|
library(ggplot2)
|
3
|
16
|
library(dplyr)
|
|
@@ -5,36 +18,21 @@ library(tidyr)
|
5
|
18
|
library(TSA)
|
6
|
19
|
library(forecast)
|
7
|
20
|
theme_set(theme_bw())
|
8
|
|
-use_virtualenv("../venv/")
|
9
|
21
|
|
10
|
|
-fextract <- function(x, y, keep = 1, top = TRUE) {
|
11
|
|
- sdy <- sd(y)
|
12
|
|
- my <- mean(y)
|
13
|
|
- stany <- (y - my) / sdy
|
14
|
|
- ftf <- fft(stany)
|
15
|
|
- if (top) {
|
16
|
|
- ftf[rank(-abs(ftf)) > keep] <- 0
|
17
|
|
- } else {
|
18
|
|
- ftf[(keep + 1):length(ftf)] <- 0
|
19
|
|
- }
|
20
|
|
- rfv <- Re(fft(ftf, inverse = TRUE))
|
21
|
|
- dfr <- data.frame(x = x, y = y, f = (rfv - mean(rfv)) / sd(rfv) * sdy + my)
|
22
|
|
- dfr$res <- dfr$y - dfr$f
|
23
|
|
- return(dfr)
|
|
22
|
+if (args$virtualenv != "-") {
|
|
23
|
+ print(args$virtualenv)
|
|
24
|
+ use_virtualenv(args$virtualenv)
|
24
|
25
|
}
|
25
|
26
|
|
26
|
|
-
|
27
|
27
|
p <- import("pandas")
|
28
|
28
|
sns <- import("seaborn")
|
29
|
29
|
cbp <- as.character(p$Series(sns$color_palette("colorblind", as.integer(9))$as_hex()))
|
30
|
|
-aggdf <- p$read_pickle("../data/9-clusters.agg.pkl")
|
|
30
|
+aggdf <- p$read_pickle(args$cluster_file)
|
31
|
31
|
# aggdf <- as.data.frame(aggdf)
|
32
|
32
|
aggdf$cluster <- factor(aggdf$cluster)
|
33
|
33
|
str(aggdf)
|
34
|
34
|
clusters = levels(aggdf$cluster)
|
35
|
35
|
|
36
|
|
-mtempdf <- read.csv("../data/weatherharm.csv", stringsAsFactors = FALSE) %>% mutate(x = as.POSIXct(x, tz = "UTC"))
|
37
|
|
-
|
38
|
36
|
ggplot(aggdf, aes(y = kwh_tot_mean, x = cluster)) + geom_boxplot()
|
39
|
37
|
|
40
|
38
|
facall <- ggplot(aggdf, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
|
|
@@ -103,210 +101,15 @@ facoct <- ggplot(midoct, aes(x = read_time, y = kwh_tot_mean, color = cluster, f
|
103
|
101
|
octcon <- facoct + facet_grid(cluster ~ .)
|
104
|
102
|
octfre <- facoct + facet_grid(cluster ~ ., scales = "free")
|
105
|
103
|
|
106
|
|
-# ggsave("all-9-fix-1617.png", allcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
107
|
|
-# ggsave("all-9-fre-1617.png", allfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
108
|
|
-# ggsave("jan-9-fix-1617.png", jancon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
109
|
|
-# ggsave("jan-9-fre-1617.png", janfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
110
|
|
-# ggsave("apr-9-fix-1617.png", apcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
111
|
|
-# ggsave("apr-9-fre-1617.png", apfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
112
|
|
-# ggsave("jul-9-fix-1617.png", julcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
113
|
|
-# ggsave("jul-9-fre-1617.png", julfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
114
|
|
-# ggsave("oct-9-fix-1617.png", octcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
115
|
|
-# ggsave("oct-9-fre-1617.png", octfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
116
|
|
-
|
117
|
|
-
|
118
|
|
-# ----
|
119
|
|
-
|
120
|
|
-
|
121
|
|
-cacf <- list()
|
122
|
|
-perd <- list()
|
123
|
|
-
|
124
|
|
-for (c in clusters) {
|
125
|
|
- cagg <- filter(aggdf, cluster == c)
|
126
|
|
- cacf[[c]] <- acf(cagg$kwh_tot_mean, lag.max = 48*365, plot = FALSE)$acf
|
127
|
|
- per <- periodogram(cagg$kwh_tot_mean, plot = FALSE)
|
128
|
|
- perd[[c]] <- data.frame(freq = per$freq, spec = per$spec) %>% mutate(cluster = c, period = (1/freq))
|
129
|
|
- #%>% arrange(desc(spec)) %>% head(5)
|
130
|
|
-}
|
131
|
|
-
|
132
|
|
-acfm <- sapply(cacf, as.numeric) %>% as.data.frame() %>% mutate(hour = ((1:length(`1`)) - 1)/2) %>%
|
133
|
|
- gather(key = "cluster", value = "acorr", clusters) %>% mutate(day = hour / 24, week = hour / (24 * 7))
|
134
|
|
-
|
135
|
|
-
|
136
|
|
-fcorr <- ggplot(acfm, aes(x = week, y = acorr, color = cluster)) + geom_line(size = 1.5) +
|
137
|
|
- scale_color_manual(values = cbp) + facet_grid(cluster ~ .) + coord_cartesian(expand = FALSE) +
|
138
|
|
- theme(legend.position = "none") + labs(title = "Autocorrelation plot (full year)",
|
139
|
|
- y = "Autocorrelation", x = "lag (weeks)")
|
140
|
|
-
|
141
|
|
-wcorr <- ggplot(acfm, aes(x = day, y = acorr, color = cluster)) + geom_line(size = 1.5) +
|
142
|
|
- scale_color_manual(values = cbp) + facet_grid(cluster ~ .) +
|
143
|
|
- scale_x_continuous(breaks = unique(floor(acfm$day / 7)) * 7) +
|
144
|
|
- theme(legend.position = "none") + coord_cartesian(xlim = c(0, 15), expand = FALSE) +
|
145
|
|
- labs(title = "Autocorrelation plot (two weeks)", y = "Autocorrelation", x = "lag (days)")
|
146
|
|
-
|
147
|
|
-# ggsave("full-autocorr-1617.png", fcorr, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
148
|
|
-# ggsave("week-autocorr-1617.png", wcorr, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
|
149
|
|
-
|
150
|
|
-perd <- bind_rows(perd)
|
151
|
|
-
|
152
|
|
-ggplot(perd, aes(x = freq, y = spec)) +
|
153
|
|
- geom_line() + facet_grid(cluster ~ ., scales = "free") +
|
154
|
|
- scale_x_continuous(breaks = 1 / (c(48, 48*7, 48*7*4, 48*365)))
|
155
|
|
-
|
156
|
|
-c1ts <- filter(aggdf, cluster == "1")$kwh_tot_mean
|
157
|
|
-cts <- ts(c1ts, frequency = 48, start = c(1, 1))
|
158
|
|
-# carima <- auto.arima(cts, trace = TRUE)
|
159
|
|
-# plot(forecast(carima, h = 480))
|
160
|
|
-ctsnp <- msts(c1ts, c(48, 48*7))
|
161
|
|
-ctbats <- tbats(ctsnp)
|
162
|
|
-plot(forecast(ctbats, h = 48 * 7 * 4))
|
163
|
|
-
|
164
|
|
-c9ts <- filter(aggdf, cluster == "9")$kwh_tot_mean
|
165
|
|
-ctsnp <- msts(c9ts, c(48, 48*7, 48*7*365.25))
|
166
|
|
-ctbats <- tbats(ctsnp)
|
167
|
|
-plot(forecast(ctbats, h = 48 * 7))
|
168
|
|
-
|
169
|
|
-p <- periodogram(c1ts)
|
170
|
|
-dd <- data.frame(freq = p$freq, spec = p$spec) %>% mutate(per = 1/freq)
|
171
|
|
-dd %>% arrange(desc(spec)) %>% mutate(d = per / 48) %>% head()
|
172
|
|
-
|
173
|
|
-c9ts <- filter(aggdf, cluster == "9")
|
174
|
|
-
|
175
|
|
-ggplot(c9ts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
|
176
|
|
-
|
177
|
|
-nft <- fextract(c9ts$read_time, c9ts$kwh_tot_mean, keep = 15)
|
178
|
|
-ggplot(nft, aes(x, y)) + geom_line() +
|
179
|
|
- geom_line(aes(x, f), color = "blue") +
|
180
|
|
- scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y") +
|
181
|
|
- coord_cartesian(xlim = c(as.POSIXct("2016-07-16", tz = "UTC"), as.POSIXct("2016-07-23", tz = "UTC")), expand = TRUE)
|
182
|
|
-
|
183
|
|
-clus <- "9"
|
184
|
|
-kp <- 50
|
185
|
|
-cts <- filter(aggdf, cluster == clus)
|
186
|
|
-# ggplot(cts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
|
187
|
|
-nft <- fextract(cts$read_time, cts$kwh_tot_mean, keep = kp)
|
188
|
|
-ggplot(nft, aes(x, y)) + geom_line() +
|
189
|
|
- geom_line(aes(x, f), color = "blue") +
|
190
|
|
- scale_x_datetime(date_breaks = "2 days", date_labels = "%a, %-d %b %Y") +
|
191
|
|
- coord_cartesian(xlim = c(as.POSIXct("2017-09-14", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
|
192
|
|
-
|
193
|
|
-ggplot(nft, aes(x, res)) + geom_line() + scale_x_datetime(date_breaks = "1 week") +
|
194
|
|
- coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
|
195
|
|
-
|
196
|
|
-ggplot(nft, aes(y = res)) + geom_boxplot()
|
197
|
|
-ggplot(nft, aes(x = res)) + geom_histogram()
|
198
|
|
-
|
199
|
|
-
|
200
|
|
-c9ts <- filter(aggdf, cluster == "9")$kwh_tot_mean
|
201
|
|
-c9ft <- as.numeric(abs(fft(c9ts)))
|
202
|
|
-c9ftdf <- data.frame(x = 1:length(c9ft), ft = c9ft)
|
203
|
|
-# %>% arrange(desc(ft)) %>% mutate(xt = 1/x)
|
204
|
|
-
|
205
|
|
-c9df <- data.frame(x = 1:length(c9ts), y = c9ts, ly = log(c9ts + min(c9ts) + 1))
|
206
|
|
-c9harm. <- harmonic(ts(c9ts, frequency = 17544), 2)
|
207
|
|
-sinmod <- lm(c9df$y ~ c9harm.)
|
208
|
|
-c9df$fit <- fitted(sinmod)
|
209
|
|
-c9df$res <- resid(sinmod)
|
210
|
|
-
|
211
|
|
-ggplot(c9df, aes(x = x, y = ly)) + geom_point() +
|
212
|
|
- geom_line(aes(y = fit), size = 2, colour = "blue")
|
213
|
|
-
|
214
|
|
-plot(c(c9df$res))
|
215
|
|
-hist(log(c9df$y), breaks = 30)
|
216
|
|
-
|
217
|
|
-n <- 1.5
|
218
|
|
-ytra <- c9df$y / c9df$fit^n - 1/c9df$fit^(n-1)
|
219
|
|
-plot(ytra)
|
220
|
|
-
|
221
|
|
-
|
222
|
|
-yts <- msts(ytra, seasonal.periods = c(48, 48*7))
|
223
|
|
-autoplot(mstl(yts))
|
224
|
|
-
|
225
|
|
-ytsd <- ts(ytra, frequency = 48)
|
226
|
|
-ydharm. <- harmonic(ytsd, 10)
|
227
|
|
-sdmod <- lm(ytra ~ ydharm.)
|
228
|
|
-sddf <- data.frame(x = 1:length(ytra), y = ytra, f = fitted(sdmod), r = resid(sdmod))
|
229
|
|
-ggplot(sddf[1:(48*7*2), ], aes(x = x, y = y)) + geom_point() +
|
230
|
|
- geom_line(aes(y = f), colour = "blue", size = 2)
|
231
|
|
-
|
232
|
|
-plot(resid(sdmod)[1:(48*7*2)])
|
233
|
|
-
|
234
|
|
-
|
235
|
|
-
|
236
|
|
-
|
237
|
|
-
|
238
|
|
-
|
239
|
|
-c9tsy <- ts(c9ts, frequency = floor(48 * 365.25))
|
240
|
|
-c9tsw <- ts(c9ts, frequency = floor(48 * 7))
|
241
|
|
-c9tsd <- ts(c9ts, frequency = floor(48))
|
242
|
|
-
|
243
|
|
-harm.y <- harmonic(c9tsy, 2)
|
244
|
|
-harm.w <- harmonic(c9tsw, 2)
|
245
|
|
-harm.d <- harmonic(c9tsd, 6)
|
246
|
|
-
|
247
|
|
-cmod <- lm(c9ts ~ harm.y + harm.w + harm.d)
|
248
|
|
-summary(cmod)
|
249
|
|
-plot(fitted(cmod))
|
250
|
|
-plot(resid(cmod))
|
251
|
|
-
|
252
|
|
-
|
253
|
|
-cmodi <- lm(c9ts ~ harm.y * harm.w * harm.d)
|
254
|
|
-
|
255
|
|
-summary(cmodi)
|
256
|
|
-
|
257
|
|
-plot(fitted(cmodi))
|
258
|
|
-plot(c9ts)
|
259
|
|
-plot(resid(cmodi))
|
260
|
|
-hist(resid(cmodi), breaks = 100)
|
261
|
|
-# Heavy tails
|
262
|
|
-qqnorm(resid(cmodi))
|
263
|
|
-
|
264
|
|
-
|
265
|
|
-clust = "5"
|
266
|
|
-
|
267
|
|
-cts <- filter(aggdf, cluster == clust)$kwh_tot_mean
|
268
|
|
-ctsy <- ts(cts, frequency = floor(48 * 365.25))
|
269
|
|
-ctsw <- ts(cts, frequency = floor(48 * 7))
|
270
|
|
-ctsd <- ts(cts, frequency = floor(48))
|
271
|
|
-harm.y <- harmonic(ctsy, 4)
|
272
|
|
-harm.w <- harmonic(ctsw, 3)
|
273
|
|
-harm.d <- harmonic(ctsd, 4)
|
274
|
|
-cmodi <- lm(cts ~ harm.y * harm.w * harm.d)
|
275
|
|
-summary(cmodi)
|
276
|
|
-cldf <- data.frame(x = unique(aggdf$read_time), y = cts, f = fitted(cmodi), r = resid(cmodi))
|
277
|
|
-ggplot(cldf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue", size = 2) + geom_point() +
|
278
|
|
- geom_point(aes(y = r), color = "darkgreen") +
|
279
|
|
- coord_cartesian(xlim = c(as.POSIXct("2017-01-01", tz = "UTC"), as.POSIXct("2017-12-31", tz = "UTC")))
|
280
|
|
-
|
281
|
|
-cwcombdf <- left_join(cldf, mtempdf, by = "x")
|
282
|
|
-str(cwcombdf)
|
283
|
|
-
|
284
|
|
-
|
285
|
|
-# A variety of models
|
286
|
|
-plot(f.x ~ f.y, data = cwcombdf)
|
287
|
|
-plot(r.x ~ r.y, data = cwcombdf)
|
288
|
|
-plot(r.x ~ y.y, data = cwcombdf)
|
289
|
|
-plot(y.x ~ r.y, data = cwcombdf)
|
290
|
|
-plot(y.x ~ x, data = cwcombdf)
|
291
|
|
-plot(y.x ~ y.y, data = cwcombdf)
|
292
|
|
-plot(y.x ~ y.y, data = cwcombdf)
|
293
|
|
-plot(y.y ~ r.x, data = cwcombdf)
|
294
|
|
-plot(y.y ~ x, data = cwcombdf)
|
295
|
|
-
|
|
104
|
+ggsave(paste0("all_fix", args$postfix, ".png"), allcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
105
|
+ggsave(paste0("all_fre", args$postfix, ".png"), allfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
106
|
+ggsave(paste0("jan_fix", args$postfix, ".png"), jancon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
107
|
+ggsave(paste0("jan_fre", args$postfix, ".png"), janfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
108
|
+ggsave(paste0("apr_fix", args$postfix, ".png"), apcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
109
|
+ggsave(paste0("apr_fre", args$postfix, ".png"), apfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
110
|
+ggsave(paste0("jul_fix", args$postfix, ".png"), julcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
111
|
+ggsave(paste0("jul_fre", args$postfix, ".png"), julfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
112
|
+ggsave(paste0("oct_fix", args$postfix, ".png"), octcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
|
113
|
+ggsave(paste0("oct_fre", args$postfix, ".png"), octfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
|
296
|
114
|
|
297
|
|
-summary(lm(f.x ~ f.y, data = cwcombdf))
|
298
|
|
-summary(lm(r.x ~ r.y, data = cwcombdf))
|
299
|
|
-summary(lm(r.x ~ y.y, data = cwcombdf))
|
300
|
|
-summary(lm(y.x ~ r.y, data = cwcombdf))
|
301
|
|
-summary(lm(y.x ~ x, data = cwcombdf))
|
302
|
|
-summary(lm(y.x ~ y.y, data = cwcombdf))
|
303
|
|
-summary(lm(y.x ~ y.y, data = cwcombdf))
|
304
|
|
-summary(lm(y.y ~ r.x, data = cwcombdf))
|
305
|
|
-summary(lm(y.y ~ x, data = cwcombdf))
|
306
|
115
|
|
307
|
|
-summary(lm(r.x ~ r.y * f.y, data = cwcombdf))
|
308
|
|
-imod <- lm(r.x ~ r.y * f.y, data = cwcombdf)
|
309
|
|
-plot(fitted(imod))
|
310
|
|
-plot(resid(imod))
|
311
|
|
-plot(cwcombdf$r.x)
|
312
|
|
-plot(cwcombdf$r.x ~ resid(imod))
|