|
@@ -7,11 +7,28 @@ library(forecast)
|
7
|
7
|
theme_set(theme_bw())
|
8
|
8
|
use_virtualenv("../venv/")
|
9
|
9
|
|
|
10
|
+fextract <- function(x, y, keep = 1, top = TRUE) {
|
|
11
|
+ sdy <- sd(y)
|
|
12
|
+ my <- mean(y)
|
|
13
|
+ stany <- (y - my) / sdy
|
|
14
|
+ ftf <- fft(stany)
|
|
15
|
+ if (top) {
|
|
16
|
+ ftf[rank(-abs(ftf)) > keep] <- 0
|
|
17
|
+ } else {
|
|
18
|
+ ftf[(keep + 1):length(ftf)] <- 0
|
|
19
|
+ }
|
|
20
|
+ rfv <- Re(fft(ftf, inverse = TRUE))
|
|
21
|
+ dfr <- data.frame(x = x, y = y, f = (rfv - mean(rfv)) / sd(rfv) * sdy + my)
|
|
22
|
+ dfr$res <- dfr$y - dfr$f
|
|
23
|
+ return(dfr)
|
|
24
|
+}
|
|
25
|
+
|
|
26
|
+
|
10
|
27
|
p <- import("pandas")
|
11
|
28
|
sns <- import("seaborn")
|
12
|
29
|
cbp <- as.character(p$Series(sns$color_palette("colorblind", as.integer(9))$as_hex()))
|
13
|
30
|
aggdf <- p$read_pickle("../data/9-clusters.agg.pkl")
|
14
|
|
-aggdf <- as.data.frame(aggdf)
|
|
31
|
+# aggdf <- as.data.frame(aggdf)
|
15
|
32
|
aggdf$cluster <- factor(aggdf$cluster)
|
16
|
33
|
str(aggdf)
|
17
|
34
|
clusters = levels(aggdf$cluster)
|
|
@@ -146,3 +163,33 @@ c9ts <- filter(aggdf, cluster == "9")$kwh_tot_mean
|
146
|
163
|
ctsnp <- msts(c9ts, c(48, 48*7))
|
147
|
164
|
ctbats <- tbats(ctsnp)
|
148
|
165
|
plot(forecast(ctbats, h = 48 * 7 * 4))
|
|
166
|
+
|
|
167
|
+p <- periodogram(c1ts)
|
|
168
|
+dd <- data.frame(freq = p$freq, spec = p$spec) %>% mutate(per = 1/freq)
|
|
169
|
+dd %>% arrange(desc(spec)) %>% mutate(d = per / 48) %>% head()
|
|
170
|
+
|
|
171
|
+c9ts <- filter(aggdf, cluster == "9")
|
|
172
|
+
|
|
173
|
+ggplot(c9ts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
|
|
174
|
+
|
|
175
|
+nft <- fextract(c9ts$read_time, c9ts$kwh_tot_mean, keep = 10)
|
|
176
|
+ggplot(nft, aes(x, y)) + geom_line() +
|
|
177
|
+ geom_line(aes(x, f), color = "blue") +
|
|
178
|
+ scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y") +
|
|
179
|
+ coord_cartesian(xlim = c(as.POSIXct("2017-07-16", tz = "UTC"), as.POSIXct("2017-07-23", tz = "UTC")), expand = TRUE)
|
|
180
|
+
|
|
181
|
+clus <- "9"
|
|
182
|
+kp <- 50
|
|
183
|
+cts <- filter(aggdf, cluster == clus)
|
|
184
|
+# ggplot(cts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
|
|
185
|
+nft <- fextract(cts$read_time, cts$kwh_tot_mean, keep = kp)
|
|
186
|
+ggplot(nft, aes(x, y)) + geom_line() +
|
|
187
|
+ geom_line(aes(x, f), color = "blue") +
|
|
188
|
+ scale_x_datetime(date_breaks = "2 days", date_labels = "%a, %-d %b %Y") +
|
|
189
|
+ coord_cartesian(xlim = c(as.POSIXct("2017-09-14", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
|
|
190
|
+
|
|
191
|
+ggplot(nft, aes(x, res)) + geom_line() + scale_x_datetime(date_breaks = "1 week") +
|
|
192
|
+ coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
|
|
193
|
+
|
|
194
|
+ggplot(nft, aes(y = res)) + geom_boxplot()
|
|
195
|
+ggplot(nft, aes(x = res)) + geom_histogram()
|