Repository for Petra's work at ampli Jan-Feb 2019

clusterviz.R 6.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. #!/usr/bin/env Rscript
  2. library(argparse)
  3. parser <- ArgumentParser(description="Create plots for aggregated cluster data")
  4. parser$add_argument("cluster_file", help = "file to visualise")
  5. parser$add_argument("-i", "--img-path", dest = "img_path", default = "../img/", help = "path to store plots in; default: ../img/")
  6. parser$add_argument("-p", "--postfix", dest = "postfix", default = "_plot", help = "postfix for files, default: _plot")
  7. parser$add_argument("--width", dest = "width", default = 40, help = "width (cm), default 40", type = "double")
  8. parser$add_argument("--height", dest = "height", default = 25, help = "height (cm), default 25", type = "double")
  9. parser$add_argument("--pkl", dest = "csv", help = "load from pickle instead of csv", action = "store_false")
  10. args <- parser$parse_args()
  11. print(args)
  12. library(ggplot2, warn.conflicts = FALSE, quietly = TRUE)
  13. library(dplyr, warn.conflicts = FALSE, quietly = TRUE)
  14. library(tidyr, warn.conflicts = FALSE, quietly = TRUE)
  15. library(TSA, warn.conflicts = FALSE, quietly = TRUE)
  16. library(forecast, warn.conflicts = FALSE, quietly = TRUE)
  17. theme_set(theme_bw())
  18. if (args$csv) {
  19. aggdf <- read.csv(args$cluster_file, header = TRUE, stringsAsFactors = FALSE)
  20. aggdf$read_time <- as.POSIXct(aggdf$read_time)
  21. } else {
  22. library(reticulate, warn.conflicts = FALSE, quietly = TRUE)
  23. p <- import("pandas")
  24. aggdf <- p$read_pickle(args$cluster_file)
  25. }
  26. aggdf$cluster <- factor(aggdf$cluster)
  27. clusters = levels(aggdf$cluster)
  28. cbp <- c('#0173b2', '#de8f05', '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2',
  29. '#de8f05', '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', '#de8f05',
  30. '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', '#de8f05', '#029e73',
  31. '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9')[1:length(clusters)]
  32. facall <- ggplot(aggdf, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
  33. geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
  34. labs(title = "Cluster behaviour over 2017", x = "Date", y = "kwh") +
  35. scale_color_manual(values = cbp) +
  36. scale_fill_manual(values = cbp) +
  37. theme(legend.position = "none") +
  38. scale_x_datetime(date_breaks = "1 month", date_labels = "%-d %b %y")
  39. allcon <- facall + facet_grid(cluster ~ .)
  40. allfre <- facall + facet_grid(cluster ~ ., scales = "free")
  41. midjan <- filter(aggdf, read_time >= as.POSIXct("2017-01-15", tz = "UTC"), read_time <= as.POSIXct("2017-01-22", tz = "UTC"))
  42. facjan <- ggplot(midjan, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
  43. geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
  44. labs(title = "Cluster behaviour over third week of January", x = "Date", y = "kwh") +
  45. scale_color_manual(values = cbp) +
  46. scale_fill_manual(values = cbp) +
  47. theme(legend.position = "none") +
  48. scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
  49. jancon <- facjan + facet_grid(cluster ~ .)
  50. janfre <- facjan + facet_grid(cluster ~ ., scales = "free")
  51. midap <- filter(aggdf, read_time >= as.POSIXct("2017-04-16", tz = "UTC"), read_time <= as.POSIXct("2017-04-23", tz = "UTC"))
  52. facap <- ggplot(midap, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
  53. geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
  54. labs(title = "Cluster behaviour over third week of April 2017", x = "Date", y = "kwh") +
  55. scale_color_manual(values = cbp) +
  56. scale_fill_manual(values = cbp) +
  57. theme(legend.position = "none") +
  58. scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
  59. apcon <- facap + facet_grid(cluster ~ .)
  60. apfre <- facap + facet_grid(cluster ~ ., scales = "free")
  61. midjul <- filter(aggdf, read_time >= as.POSIXct("2017-07-16", tz = "UTC"), read_time <= as.POSIXct("2017-07-23", tz = "UTC"))
  62. facjul <- ggplot(midjul, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
  63. geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
  64. labs(title = "Cluster behaviour over third week of July 2017", x = "Date", y = "kwh") +
  65. scale_color_manual(values = cbp) +
  66. scale_fill_manual(values = cbp) +
  67. theme(legend.position = "none") +
  68. scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
  69. julcon <- facjul + facet_grid(cluster ~ .)
  70. julfre <- facjul + facet_grid(cluster ~ ., scales = "free")
  71. midoct <- filter(aggdf, read_time >= as.POSIXct("2017-10-15", tz = "UTC"), read_time <= as.POSIXct("2017-10-22", tz = "UTC"))
  72. facoct <- ggplot(midoct, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) +
  73. geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
  74. labs(title = "Cluster behaviour over third week of October 2017", x = "Date", y = "kwh") +
  75. scale_color_manual(values = cbp) +
  76. scale_fill_manual(values = cbp) +
  77. theme(legend.position = "none") +
  78. scale_x_datetime(date_breaks = "1 day", date_labels = "%a, %-d %B %Y")
  79. octcon <- facoct + facet_grid(cluster ~ .)
  80. octfre <- facoct + facet_grid(cluster ~ ., scales = "free")
  81. ggsave(paste0("all_fix", args$postfix, ".png"), allcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  82. ggsave(paste0("all_fre", args$postfix, ".png"), allfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  83. ggsave(paste0("jan_fix", args$postfix, ".png"), jancon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  84. ggsave(paste0("jan_fre", args$postfix, ".png"), janfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  85. ggsave(paste0("apr_fix", args$postfix, ".png"), apcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  86. ggsave(paste0("apr_fre", args$postfix, ".png"), apfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  87. ggsave(paste0("jul_fix", args$postfix, ".png"), julcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  88. ggsave(paste0("jul_fre", args$postfix, ".png"), julfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  89. ggsave(paste0("oct_fix", args$postfix, ".png"), octcon, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")
  90. ggsave(paste0("oct_fre", args$postfix, ".png"), octfre, path = args$img_path, dpi = "retina", width = args$width, height = args$height, units = "cm")