123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- library(reticulate)
- library(ggplot2)
- library(dplyr)
- library(tidyr)
- library(TSA)
- library(forecast)
- theme_set(theme_bw())
- use_virtualenv("../venv/")
-
- fextract <- function(x, y, keep = 1, top = TRUE) {
- sdy <- sd(y)
- my <- mean(y)
- stany <- (y - my) / sdy
- ftf <- fft(stany)
- if (top) {
- ftf[rank(-abs(ftf)) > keep] <- 0
- } else {
- ftf[(keep + 1):length(ftf)] <- 0
- }
- rfv <- Re(fft(ftf, inverse = TRUE))
- dfr <- data.frame(x = x, y = y, f = (rfv - mean(rfv)) / sd(rfv) * sdy + my)
- dfr$res <- dfr$y - dfr$f
- return(dfr)
- }
-
-
- p <- import("pandas")
- sns <- import("seaborn")
- cbp <- as.character(p$Series(sns$color_palette("colorblind", as.integer(9))$as_hex()))
- aggdf <- p$read_pickle("../data/9-clusters-1617.agg.pkl")
- # aggdf <- as.data.frame(aggdf)
- aggdf$cluster <- factor(aggdf$cluster)
- str(aggdf)
- clusters = levels(aggdf$cluster)
-
- ggplot(aggdf, aes(y = kwh_tot_mean, x = cluster)) + geom_boxplot()
-
- facall <- ggplot(aggdf, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
- geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
- labs(title = "Cluster behaviour over 2016 and 2017", x = "Date", y = "kwh") +
- scale_color_manual(values = cbp) +
- scale_fill_manual(values = cbp) +
- theme(legend.position = "none") +
- scale_x_datetime(date_breaks = "1 month", date_labels = "%-d %b %y")
-
- allcon <- facall + facet_grid(cluster ~ .)
- allfre <- facall + facet_grid(cluster ~ ., scales = "free")
-
- midjan <- filter(aggdf, read_time >= as.POSIXct("2017-01-15", tz = "UTC"), read_time <= as.POSIXct("2017-01-22", tz = "UTC"))
-
- facjan <- ggplot(midjan, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
- geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
- labs(title = "Cluster behaviour over third week of January", x = "Date", y = "kwh") +
- scale_color_manual(values = cbp) +
- scale_fill_manual(values = cbp) +
- theme(legend.position = "none") +
- scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
-
- jancon <- facjan + facet_grid(cluster ~ .)
- janfre <- facjan + facet_grid(cluster ~ ., scales = "free")
-
-
- midap <- filter(aggdf, read_time >= as.POSIXct("2017-04-16", tz = "UTC"), read_time <= as.POSIXct("2017-04-23", tz = "UTC"))
-
- facap <- ggplot(midap, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
- geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
- labs(title = "Cluster behaviour over third week of April 2017", x = "Date", y = "kwh") +
- scale_color_manual(values = cbp) +
- scale_fill_manual(values = cbp) +
- theme(legend.position = "none") +
- scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
-
- apcon <- facap + facet_grid(cluster ~ .)
- apfre <- facap + facet_grid(cluster ~ ., scales = "free")
-
-
- midjul <- filter(aggdf, read_time >= as.POSIXct("2017-07-16", tz = "UTC"), read_time <= as.POSIXct("2017-07-23", tz = "UTC"))
-
- facjul <- ggplot(midjul, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
- geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
- labs(title = "Cluster behaviour over third week of July 2017", x = "Date", y = "kwh") +
- scale_color_manual(values = cbp) +
- scale_fill_manual(values = cbp) +
- theme(legend.position = "none") +
- scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
-
- julcon <- facjul + facet_grid(cluster ~ .)
- julfre <- facjul + facet_grid(cluster ~ ., scales = "free")
-
-
- midoct <- filter(aggdf, read_time >= as.POSIXct("2017-10-15", tz = "UTC"), read_time <= as.POSIXct("2017-10-22", tz = "UTC"))
-
- facoct <- ggplot(midoct, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
- geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
- labs(title = "Cluster behaviour over third week of October 2017", x = "Date", y = "kwh") +
- scale_color_manual(values = cbp) +
- scale_fill_manual(values = cbp) +
- theme(legend.position = "none") +
- scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
-
- octcon <- facoct + facet_grid(cluster ~ .)
- octfre <- facoct + facet_grid(cluster ~ ., scales = "free")
-
- ggsave("all-9-fix-1617.png", allcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("all-9-fre-1617.png", allfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("jan-9-fix-1617.png", jancon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("jan-9-fre-1617.png", janfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("apr-9-fix-1617.png", apcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("apr-9-fre-1617.png", apfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("jul-9-fix-1617.png", julcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("jul-9-fre-1617.png", julfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("oct-9-fix-1617.png", octcon, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("oct-9-fre-1617.png", octfre, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
-
-
- # ----
-
-
- cacf <- list()
- perd <- list()
-
- for (c in clusters) {
- cagg <- filter(aggdf, cluster == c)
- cacf[[c]] <- acf(cagg$kwh_tot_mean, lag.max = 48*365, plot = FALSE)$acf
- per <- periodogram(cagg$kwh_tot_mean, plot = FALSE)
- perd[[c]] <- data.frame(freq = per$freq, spec = per$spec) %>% mutate(cluster = c, period = (1/freq))
- #%>% arrange(desc(spec)) %>% head(5)
- }
-
- acfm <- sapply(cacf, as.numeric) %>% as.data.frame() %>% mutate(hour = ((1:length(`1`)) - 1)/2) %>%
- gather(key = "cluster", value = "acorr", clusters) %>% mutate(day = hour / 24, week = hour / (24 * 7))
-
-
- fcorr <- ggplot(acfm, aes(x = week, y = acorr, color = cluster)) + geom_line(size = 1.5) +
- scale_color_manual(values = cbp) + facet_grid(cluster ~ .) + coord_cartesian(expand = FALSE) +
- theme(legend.position = "none") + labs(title = "Autocorrelation plot (full year)",
- y = "Autocorrelation", x = "lag (weeks)")
-
- wcorr <- ggplot(acfm, aes(x = day, y = acorr, color = cluster)) + geom_line(size = 1.5) +
- scale_color_manual(values = cbp) + facet_grid(cluster ~ .) +
- scale_x_continuous(breaks = unique(floor(acfm$day / 7)) * 7) +
- theme(legend.position = "none") + coord_cartesian(xlim = c(0, 15), expand = FALSE) +
- labs(title = "Autocorrelation plot (two weeks)", y = "Autocorrelation", x = "lag (days)")
-
- ggsave("full-autocorr-1617.png", fcorr, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
- ggsave("week-autocorr-1617.png", wcorr, path = "../img/", dpi = "retina", width = 40, height = 25, units = "cm")
-
- perd <- bind_rows(perd)
-
- ggplot(perd, aes(x = freq, y = spec)) +
- geom_line() + facet_grid(cluster ~ ., scales = "free") +
- scale_x_continuous(breaks = 1 / (c(48, 48*7, 48*7*4, 48*365)))
-
- c1ts <- filter(aggdf, cluster == "1")$kwh_tot_mean
- cts <- ts(c1ts, frequency = 48, start = c(1, 1))
- # carima <- auto.arima(cts, trace = TRUE)
- # plot(forecast(carima, h = 480))
- ctsnp <- msts(c1ts, c(48, 48*7))
- ctbats <- tbats(ctsnp)
- plot(forecast(ctbats, h = 48 * 7 * 4))
-
- c9ts <- filter(aggdf, cluster == "9")$kwh_tot_mean
- ctsnp <- msts(c9ts, c(48, 48*7, 48*7*365.25))
- ctbats <- tbats(ctsnp)
- plot(forecast(ctbats, h = 48 * 7))
-
- p <- periodogram(c1ts)
- dd <- data.frame(freq = p$freq, spec = p$spec) %>% mutate(per = 1/freq)
- dd %>% arrange(desc(spec)) %>% mutate(d = per / 48) %>% head()
-
- c9ts <- filter(aggdf, cluster == "9")
-
- ggplot(c9ts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
-
- nft <- fextract(c9ts$read_time, c9ts$kwh_tot_mean, keep = 15)
- ggplot(nft, aes(x, y)) + geom_line() +
- geom_line(aes(x, f), color = "blue") +
- scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y") +
- coord_cartesian(xlim = c(as.POSIXct("2016-07-16", tz = "UTC"), as.POSIXct("2016-07-23", tz = "UTC")), expand = TRUE)
-
- clus <- "9"
- kp <- 50
- cts <- filter(aggdf, cluster == clus)
- # ggplot(cts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
- nft <- fextract(cts$read_time, cts$kwh_tot_mean, keep = kp)
- ggplot(nft, aes(x, y)) + geom_line() +
- geom_line(aes(x, f), color = "blue") +
- scale_x_datetime(date_breaks = "2 days", date_labels = "%a, %-d %b %Y") +
- coord_cartesian(xlim = c(as.POSIXct("2017-09-14", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
-
- ggplot(nft, aes(x, res)) + geom_line() + scale_x_datetime(date_breaks = "1 week") +
- coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
-
- ggplot(nft, aes(y = res)) + geom_boxplot()
- ggplot(nft, aes(x = res)) + geom_histogram()
|