Repository for Petra's work at ampli Jan-Feb 2019

combmodels.R 3.3KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # Combined models
  2. # Continuation of clusterviz.R and weathmod.R
  3. library(TSA)
  4. library(caTools)
  5. library(dplyr)
  6. library(ggplot2)
  7. library(reticulate)
  8. library(tidyr)
  9. library(MASS)
  10. theme_set(theme_bw())
  11. use_virtualenv("../venv/")
  12. p <- import("pandas")
  13. sns <- import("seaborn")
  14. aggdf <- p$read_pickle("../data/9-clusters.agg.pkl")
  15. aggdf$cluster <- factor(aggdf$cluster)
  16. clusters <- levels(aggdf$cluster)
  17. str(aggdf)
  18. mtempdf <- read.csv("../data/weatherharm.csv", stringsAsFactors = FALSE) %>%
  19. mutate(x = as.POSIXct(x, tz = "UTC")) %>%
  20. rename(read_time = x, rollingmin = y, fitmin = f, resmin = r)
  21. str(mtempdf)
  22. ntps <- length(unique(aggdf$read_time))
  23. clus = "9"
  24. harm.y <- ts(1:ntps, frequency = floor(48 * 365.25)) %>% harmonic(2)
  25. harm.w <- ts(1:ntps, frequency = floor(48 * 7)) %>% harmonic(3)
  26. harm.d <- ts(1:ntps, frequency = floor(48)) %>% harmonic(3)
  27. colnames(harm.y) <- sprintf("%s.%s.%s", "year", rep(c("cos", "sin"), each = ncol(harm.y)/2), rep(1:(ncol(harm.y)/2), times = 2))
  28. colnames(harm.w) <- sprintf("%s.%s.%s", "week", rep(c("cos", "sin"), each = ncol(harm.w)/2), rep(1:(ncol(harm.w)/2), times = 2))
  29. colnames(harm.d) <- sprintf("%s.%s.%s", "day", rep(c("cos", "sin"), each = ncol(harm.d)/2), rep(1:(ncol(harm.d)/2), times = 2))
  30. clusdf <- filter(aggdf, cluster == clus) %>%
  31. dplyr::select(read_time, kwh = kwh_tot_mean) %>%
  32. left_join(mtempdf, by = "read_time") %>% cbind(harm.y, harm.w, harm.d)
  33. str(clusdf)
  34. ycols <- paste(colnames(harm.y), collapse = " + ")
  35. wcols <- paste(colnames(harm.w), collapse = " + ")
  36. dcols <- paste(colnames(harm.d), collapse = " + ")
  37. nform.full <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s) + (%s):(%s) + resmin + resmin:(%s) + resmin:(%s) + resmin:(%s)",
  38. ycols, wcols, dcols, ycols, wcols, ycols, dcols, wcols, dcols, ycols, wcols, dcols) %>% formula()
  39. nform.comp <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s) + resmin + resmin:(%s) + resmin:(%s) + resmin:(%s)",
  40. ycols, wcols, dcols, ycols, dcols, wcols, dcols, ycols, wcols, dcols) %>% formula()
  41. nform.now <- sprintf("kwh ~ %s + %s + %s + (%s):(%s) + (%s):(%s)",
  42. ycols, wcols, dcols, ycols, dcols, wcols, dcols) %>% formula()
  43. nform.min <- formula("kwh ~ 1")
  44. nform.start <- sprintf("kwh ~ %s + %s + %s + resmin",
  45. ycols, wcols, dcols) %>% formula()
  46. # charmmod <- lm(kwh ~ resmin + harm.y * harm.w * harm.d + resmin:harm.y, data = clusdf)
  47. charmmod <- lm(nform.comp, data = clusdf)
  48. # charmmod <- lm(nform.full, data = clusdf)
  49. # charmmod <- lm(kwh ~ ., data = clusdf)
  50. summary(charmmod)
  51. mean(abs(lm(nform.now, data = clusdf)$residuals))
  52. mean(abs(lm(nform.comp, data = clusdf)$residuals))
  53. mean(abs(lm(nform.full, data = clusdf)$residuals))
  54. sd(lm(nform.now, data = clusdf)$residuals)
  55. sd(lm(nform.comp, data = clusdf)$residuals)
  56. sd(lm(nform.full, data = clusdf)$residuals)
  57. cmdf <- data.frame(x = clusdf$read_time, y = clusdf$kwh, f = fitted(charmmod), r = resid(charmmod))
  58. cmplot <-ggplot(cmdf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue", size = 2) + geom_point() +
  59. geom_point(aes(y = r), color = "darkgreen")
  60. cmplot
  61. cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-01", tz = "UTC")))
  62. # sres <- stepAIC(charmmod, scope = list(upper = nform.full, lower = nform.min),
  63. # direction = "both", steps = 300)