Browse Source

Clusterviz

Petra Lamborn 5 years ago
parent
commit
92d05749ae

+ 16
- 17
R/clusterviz.R View File

5
 parser$add_argument("cluster_file", help = "file to visualise")
5
 parser$add_argument("cluster_file", help = "file to visualise")
6
 parser$add_argument("-i", "--img-path", dest = "img_path", default = "../img/", help = "path to store plots in; default: ../img/")
6
 parser$add_argument("-i", "--img-path", dest = "img_path", default = "../img/", help = "path to store plots in; default: ../img/")
7
 parser$add_argument("-p", "--postfix", dest = "postfix", default = "_plot", help = "postfix for files, default: _plot")
7
 parser$add_argument("-p", "--postfix", dest = "postfix", default = "_plot", help = "postfix for files, default: _plot")
8
-parser$add_argument("-v", "--virtualenv", dest = "virtualenv", default = "../venv/", help = "path of virtualenv; default: ../venv/; '-' for none")
9
 parser$add_argument("--width", dest = "width", default = 40, help = "width (cm), default 40", type = "double")
8
 parser$add_argument("--width", dest = "width", default = 40, help = "width (cm), default 40", type = "double")
10
 parser$add_argument("--height", dest = "height", default = 25, help = "height (cm), default 25", type = "double")
9
 parser$add_argument("--height", dest = "height", default = 25, help = "height (cm), default 25", type = "double")
10
+parser$add_argument("--pkl", dest = "csv", help = "load from pickle instead of csv", action = "store_false")
11
 
11
 
12
 args <- parser$parse_args()
12
 args <- parser$parse_args()
13
 
13
 
14
-library(reticulate)
15
-library(ggplot2)
16
-library(dplyr)
17
-library(tidyr)
18
-library(TSA)
19
-library(forecast)
14
+print(args)
15
+
16
+library(ggplot2, warn.conflicts = FALSE, quietly = TRUE)
17
+library(dplyr, warn.conflicts = FALSE, quietly = TRUE)
18
+library(tidyr, warn.conflicts = FALSE, quietly = TRUE)
19
+library(TSA, warn.conflicts = FALSE, quietly = TRUE)
20
+library(forecast, warn.conflicts = FALSE, quietly = TRUE)
20
 theme_set(theme_bw())
21
 theme_set(theme_bw())
21
 
22
 
22
-if (args$virtualenv != "-") {
23
-    print(args$virtualenv)
24
-    use_virtualenv(args$virtualenv)
23
+if (args$csv) {
24
+    aggdf <- read.csv(args$cluster_file, header = TRUE, stringsAsFactors = FALSE)
25
+    aggdf$read_time <- as.POSIXct(aggdf$read_time)
26
+} else {
27
+    library(reticulate, warn.conflicts = FALSE, quietly = TRUE)
28
+    p <- import("pandas")
29
+    aggdf <- p$read_pickle(args$cluster_file)
25
 }
30
 }
26
-
27
-p <- import("pandas")
28
-aggdf <- p$read_pickle(args$cluster_file)
29
 aggdf$cluster <- factor(aggdf$cluster)
31
 aggdf$cluster <- factor(aggdf$cluster)
30
-str(aggdf)
31
 clusters = levels(aggdf$cluster)
32
 clusters = levels(aggdf$cluster)
32
 cbp <- c('#0173b2', '#de8f05', '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', 
33
 cbp <- c('#0173b2', '#de8f05', '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', 
33
          '#de8f05', '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', '#de8f05', 
34
          '#de8f05', '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', '#de8f05', 
34
          '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', '#de8f05', '#029e73', 
35
          '#029e73', '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9', '#0173b2', '#de8f05', '#029e73', 
35
-         '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9')[1:length(clusters)
36
-
37
-ggplot(aggdf, aes(y = kwh_tot_mean, x = cluster)) + geom_boxplot()
36
+         '#d55e00', '#cc78bc', '#ca9161', '#fbafe4', '#949494', '#ece133', '#56b4e9')[1:length(clusters)]
38
 
37
 
39
 facall <- ggplot(aggdf, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) + 
38
 facall <- ggplot(aggdf, aes(x = read_time, y = kwh_tot_mean, color = cluster, fill = cluster)) + 
40
     geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +
39
     geom_line(size = 1.5) + geom_ribbon(aes(ymin = kwh_tot_CI_low, ymax = kwh_tot_CI_high), alpha = 0.2, color = NA) +

+ 5
- 0
README.md View File

202
 ```R
202
 ```R
203
 install.packages(c("dplyr", "tidyr", "ggplot2", "forecast", "TSA", "reticulate", "caTools", "scales", "argparse"))
203
 install.packages(c("dplyr", "tidyr", "ggplot2", "forecast", "TSA", "reticulate", "caTools", "scales", "argparse"))
204
 ```
204
 ```
205
+
206
+### `clusterviz.R`
207
+
208
+This script visualises the patterns of each cluster in an aggregated file.
209
+

BIN
img/all_fix_5k.png View File


BIN
img/all_fre_5k.png View File


BIN
img/apr_fix_5k.png View File


BIN
img/apr_fre_5k.png View File


BIN
img/jan_fix_5k.png View File


BIN
img/jan_fre_5k.png View File


BIN
img/jul_fix_5k.png View File


BIN
img/jul_fre_5k.png View File


BIN
img/oct_fix_5k.png View File


BIN
img/oct_fre_5k.png View File