5 years ago · e0bf20d2bd
--- a/R/combmodels.R
+++ b/R/combmodels.R
@@ -21,15 +21,22 @@ mtempdf <- read.csv("../data/weatherharm.csv", stringsAsFactors = FALSE) %>%
 
				     mutate(x = as.POSIXct(x, tz = "UTC")) %>%
			
 
				     rename(read_time = x, rollingmin = y, fitmin = f, resmin = r)
			
 
				 str(mtempdf)
			
 
				+sns <- import("seaborn")
			
 
				+cbp <- as.character(p$Series(sns$color_palette("colorblind", as.integer(9))$as_hex()))
			
 
				 
			
 
				 ntps <- length(unique(aggdf$read_time))
			
 
				 
			
 
				 clus = "9"
			
 
				 
			
 
				+yfreq <- floor(48 * 365.25)
			
 
				+wfreq <- floor(48 * 7)
			
 
				+dfreq <- floor(48)
			
 
				+harmonics <- c(2, 3, 3)
			
 
				+
			
 
				 
			
 
				-harm.y <- ts(1:ntps, frequency = floor(48 * 365.25)) %>% harmonic(2)
			
 
				-harm.w <- ts(1:ntps, frequency = floor(48 * 7))      %>% harmonic(3)
			
 
				-harm.d <- ts(1:ntps, frequency = floor(48))          %>% harmonic(3)
			
 
				+harm.y <- ts(1:ntps, frequency = yfreq) %>% harmonic(harmonics[1])
			
 
				+harm.w <- ts(1:ntps, frequency = wfreq) %>% harmonic(harmonics[2])
			
 
				+harm.d <- ts(1:ntps, frequency = dfreq) %>% harmonic(harmonics[3])
			
 
				 colnames(harm.y) <- sprintf("%s.%s.%s", "year", rep(c("cos", "sin"), each = ncol(harm.y)/2), rep(1:(ncol(harm.y)/2), times = 2))
			
 
				 colnames(harm.w) <- sprintf("%s.%s.%s", "week", rep(c("cos", "sin"), each = ncol(harm.w)/2), rep(1:(ncol(harm.w)/2), times = 2))
			
 
				 colnames(harm.d) <- sprintf("%s.%s.%s", "day",  rep(c("cos", "sin"), each = ncol(harm.d)/2), rep(1:(ncol(harm.d)/2), times = 2))
			
@@ -72,9 +79,40 @@ cmplot <-ggplot(cmdf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue",
 
				 
			
 
				 cmplot
			
 
				 
			
 
				-cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-08-01", tz = "UTC"), as.POSIXct("2017-09-01", tz = "UTC")))
			
 
				+cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-03-01", tz = "UTC"), as.POSIXct("2017-04-01", tz = "UTC")))
			
 
				 
			
 
				 # sres <- stepAIC(charmmod, scope = list(upper = nform.full, lower = nform.min),
			
 
				 #                 direction = "both", steps = 300)
			
 
				 
			
 
				 
			
 
				+newagg <- p$read_pickle("../data/9-proj-agg.pkl")
			
 
				+newagg$cluster <- factor(newagg$cluster)
			
 
				+str(newagg)
			
 
				+
			
 
				+ptps <- length(unique(newagg$read_time))
			
 
				+perdiff <- as.numeric(min(newagg$read_time) - min(aggdf$read_time), units = "mins") / 30
			
 
				+
			
 
				+pharm.y <- ts(1:ptps, frequency = yfreq, start = c(perdiff %/% yfreq + 1, perdiff %% yfreq + 1)) %>% harmonic(harmonics[1])
			
 
				+pharm.w <- ts(1:ptps, frequency = wfreq, start = c(perdiff %/% wfreq + 1, perdiff %% wfreq + 1)) %>% harmonic(harmonics[2])
			
 
				+pharm.d <- ts(1:ptps, frequency = dfreq, start = c(perdiff %/% dfreq + 1, perdiff %% dfreq + 1)) %>% harmonic(harmonics[3])
			
 
				+colnames(pharm.y) <- sprintf("%s.%s.%s", "year", rep(c("cos", "sin"), each = ncol(pharm.y)/2), rep(1:(ncol(pharm.y)/2), times = 2))
			
 
				+colnames(pharm.w) <- sprintf("%s.%s.%s", "week", rep(c("cos", "sin"), each = ncol(pharm.w)/2), rep(1:(ncol(pharm.w)/2), times = 2))
			
 
				+colnames(pharm.d) <- sprintf("%s.%s.%s", "day",  rep(c("cos", "sin"), each = ncol(pharm.d)/2), rep(1:(ncol(pharm.d)/2), times = 2))
			
 
				+
			
 
				+pclusdf <- filter(newagg, cluster == clus) %>% 
			
 
				+    dplyr::select(read_time, kwh = kwh_tot_mean) %>% 
			
 
				+    left_join(mtempdf, by = "read_time") %>% cbind(pharm.y, pharm.w, pharm.d)
			
 
				+str(pclusdf)
			
 
				+
			
 
				+ptestdata <- dplyr::select(pclusdf, -kwh)
			
 
				+str(ptestdata)
			
 
				+
			
 
				+predvals <- predict(charmmod, ptestdata)
			
 
				+
			
 
				+predf <- data.frame(x = pclusdf$read_time, y = pclusdf$kwh, f = predvals, r = pclusdf$kwh - predvals)
			
 
				+predplot <-ggplot(predf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blue", size = 2) + geom_point() +
			
 
				+    geom_point(aes(y = r), color = "darkgreen")
			
 
				+
			
 
				+predplot
			
 
				+
			
 
				+predplot + coord_cartesian(xlim = c(as.POSIXct("2018-03-01", tz = "UTC"), as.POSIXct("2018-04-01", tz = "UTC")))
			
--- a/py/downkwh.py
+++ b/py/downkwh.py
@@ -39,9 +39,9 @@ import seaborn as sns
 
				 # 
			
 
				 # pickleQuery(query, "../data/jan19s.pkl")
			
 
				 
			
 
				-kwhdata = getkwh('2016-04-01', '2018-01-01', '2016-04-01 00:30:00', '2018-01-01 00:00:00', '%%1')
			
 
				+kwhdata = getkwh('2018-01-01', '2018-04-01', '2018-01-01 00:30:00', '2018-04-01 00:00:00', '%%1')
			
 
				 
			
 
				 print(kwhdata.info())
			
 
				 
			
 
				 print("Pickling")
			
 
				-kwhdata.to_pickle("../data/2016-17-sample.pkl")
			
 
				+kwhdata.to_pickle("../data/2018-proj-sample.pkl")
			
--- a/py/projprocess.py
+++ b/py/projprocess.py
@@ -0,0 +1,34 @@
 
				+# This file simply takes future kwh data for the thousand previously 
			
 
				+# sampled ICPs and calculates new aggregated measures for each cluster
			
 
				+
			
 
				+import pandas as p
			
 
				+
			
 
				+# Load new data
			
 
				+newdat = p.read_pickle("../data/2018-proj-sample.pkl")
			
 
				+print(newdat.info())
			
 
				+
			
 
				+# Get cluseters dataframe
			
 
				+clusters = p.read_pickle("../data/9-clusters.pkl")
			
 
				+print(clusters)
			
 
				+print(clusters.info())
			
 
				+clusters = clusters.drop(['read_time', 'kwh_tot'], 
			
 
				+                         axis = 1).drop_duplicates().reset_index(drop = True)
			
 
				+
			
 
				+# Join dataframes
			
 
				+newdat = clusters.set_index('icp_id').join(newdat.set_index('icp_id'), how = 'left').reset_index()
			
 
				+
			
 
				+print(newdat)
			
 
				+print(newdat.info())
			
 
				+
			
 
				+# Aggregate median and mean (only really want mean)
			
 
				+newagg = newdat.groupby(['read_time', 'cluster']).agg({
			
 
				+        'kwh_tot': ['median', 'mean']
			
 
				+})
			
 
				+newagg.columns = ['_'.join(x) for x in newagg.columns.ravel()]
			
 
				+newagg = newagg.reset_index()
			
 
				+
			
 
				+print(newagg)
			
 
				+print(newagg.info())
			
 
				+
			
 
				+# Save data
			
 
				+newagg.to_pickle("../data/9-proj-agg.pkl")
			
--- a/py/util.py
+++ b/py/util.py
@@ -79,7 +79,7 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
 
				             SELECT read_time 
			
 
				             FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, 
			
 
				                 '30 minutes'::interval) read_time
			
 
				-        ) AS tsdata CROSS JOIN public.icp_sample_1618
			
 
				+        ) AS tsdata CROSS JOIN public.icp_sample
			
 
				     ) AS comb
			
 
				     LEFT JOIN
			
 
				     (
			
@@ -97,7 +97,7 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
 
				              and   a.read_date <  to_date(%(dateend)s,'yyyy-mm-dd')
			
 
				              and   a.content_code  ~ ('UN|CN|EG')
			
 
				              AND   a.icp_id IN (
			
 
				-                SELECT icp_id FROM public.icp_sample_1618
			
 
				+                SELECT icp_id FROM public.icp_sample
			
 
				              )
			
 
				             GROUP BY 1, 2, 3
			
 
				         ) AS coup_tall