Browse Source

Fourier stuff

Petra Lamborn 5 years ago
parent
commit
a95ac06382
2 changed files with 77 additions and 1 deletions
  1. 48
    1
      R/clusterviz.R
  2. 29
    0
      R/fftest.R

+ 48
- 1
R/clusterviz.R View File

@@ -7,11 +7,28 @@ library(forecast)
7 7
 theme_set(theme_bw())
8 8
 use_virtualenv("../venv/")
9 9
 
10
+fextract <- function(x, y, keep = 1, top = TRUE) {
11
+    sdy <- sd(y)
12
+    my <- mean(y)
13
+    stany <- (y - my) / sdy
14
+    ftf <- fft(stany)
15
+    if (top) {
16
+        ftf[rank(-abs(ftf)) > keep] <- 0
17
+    } else {
18
+        ftf[(keep + 1):length(ftf)] <- 0
19
+    }
20
+    rfv <- Re(fft(ftf, inverse = TRUE))
21
+    dfr <- data.frame(x = x, y = y, f = (rfv - mean(rfv)) / sd(rfv) * sdy + my)
22
+    dfr$res <- dfr$y - dfr$f
23
+    return(dfr)
24
+}
25
+
26
+
10 27
 p <- import("pandas")
11 28
 sns <- import("seaborn")
12 29
 cbp <- as.character(p$Series(sns$color_palette("colorblind", as.integer(9))$as_hex()))
13 30
 aggdf <- p$read_pickle("../data/9-clusters.agg.pkl")
14
-aggdf <- as.data.frame(aggdf)
31
+# aggdf <- as.data.frame(aggdf)
15 32
 aggdf$cluster <- factor(aggdf$cluster)
16 33
 str(aggdf)
17 34
 clusters = levels(aggdf$cluster)
@@ -146,3 +163,33 @@ c9ts <- filter(aggdf, cluster == "9")$kwh_tot_mean
146 163
 ctsnp <- msts(c9ts, c(48, 48*7))
147 164
 ctbats <- tbats(ctsnp)
148 165
 plot(forecast(ctbats, h = 48 * 7 * 4))
166
+
167
+p <- periodogram(c1ts)
168
+dd <- data.frame(freq = p$freq, spec = p$spec) %>% mutate(per = 1/freq)
169
+dd %>% arrange(desc(spec)) %>% mutate(d = per / 48) %>% head()
170
+
171
+c9ts <- filter(aggdf, cluster == "9")
172
+
173
+ggplot(c9ts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
174
+
175
+nft <- fextract(c9ts$read_time, c9ts$kwh_tot_mean, keep = 10)
176
+ggplot(nft, aes(x, y)) + geom_line() + 
177
+    geom_line(aes(x, f), color = "blue") +
178
+    scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y") +
179
+    coord_cartesian(xlim = c(as.POSIXct("2017-07-16", tz = "UTC"), as.POSIXct("2017-07-23", tz = "UTC")), expand = TRUE)
180
+
181
+clus <- "9"
182
+kp <- 50
183
+cts <- filter(aggdf, cluster == clus)
184
+# ggplot(cts, aes(x = read_time, y = kwh_tot_mean)) + geom_line()
185
+nft <- fextract(cts$read_time, cts$kwh_tot_mean, keep = kp)
186
+ggplot(nft, aes(x, y)) + geom_line() + 
187
+    geom_line(aes(x, f), color = "blue") +
188
+    scale_x_datetime(date_breaks = "2 days", date_labels = "%a, %-d %b %Y") +
189
+    coord_cartesian(xlim = c(as.POSIXct("2017-09-14", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
190
+
191
+ggplot(nft, aes(x, res)) + geom_line() + scale_x_datetime(date_breaks = "1 week") +
192
+    coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-28", tz = "UTC")), expand = TRUE)
193
+
194
+ggplot(nft, aes(y = res)) + geom_boxplot()
195
+ggplot(nft, aes(x = res)) + geom_histogram()

+ 29
- 0
R/fftest.R View File

@@ -0,0 +1,29 @@
1
+# Testing the reconstruction of arbitrary sequencies using fourier transforms
2
+library(ggplot2)
3
+library(scales)
4
+library(dplyr)
5
+theme_set(theme_bw())
6
+
7
+fextract <- function(x, y, keep = 1, top = TRUE) {
8
+    sdy <- sd(y)
9
+    my <- mean(y)
10
+    stany <- (y - my) / sdy
11
+    ftf <- fft(stany)
12
+    if (top) {
13
+        ftf[rank(-abs(ftf)) > keep] <- 0
14
+    } else {
15
+        ftf[(keep + 1):length(ftf)] <- 0
16
+    }
17
+    rfv <- Re(fft(ftf, inverse = TRUE))
18
+    return(data.frame(x = x, y = y, f = (rfv - mean(rfv)) / sd(rfv) * sdy + my))
19
+}
20
+
21
+n <- 100
22
+k <- 1
23
+dat1 <- data.frame(x = 1:n, yo = 10 * sin(rescale(1:n, 2 * c(-pi, pi) + rnorm(2))))
24
+dat1$y <- dat1$yo + rnorm(n)
25
+fnew <- fextract(dat1$x, dat1$y, keep = k, top = TRUE)
26
+# ggplot(rdat1, aes(x, y)) + geom_line()
27
+ggplot(fnew, aes(x, y)) + geom_line() + geom_point() + 
28
+    geom_line(aes(x, f), color = "blue") + geom_point(aes(x, f), color = "blue")
29
+