Browse Source

Assign a new sample to clusters; look at how well it is modeled

Petra Lamborn 5 years ago
parent
commit
9ed4f3828e

+ 10
- 3
R/combmodels.R View File

@@ -27,7 +27,7 @@ cbp <- as.character(p$Series(sns$color_palette("colorblind", as.integer(9))$as_h
27 27
 
28 28
 ntps <- length(unique(aggdf$read_time))
29 29
 
30
-clus = "9"
30
+clus = "1"
31 31
 
32 32
 yfreq <- floor(48 * 365.25)
33 33
 wfreq <- floor(48 * 7)
@@ -89,7 +89,7 @@ cmplot + coord_cartesian(xlim = c(as.POSIXct("2017-03-01", tz = "UTC"), as.POSIX
89 89
 #                 direction = "both", steps = 300)
90 90
 
91 91
 
92
-newagg <- p$read_pickle("../data/9-proj-agg.pkl")
92
+newagg <- p$read_pickle("../data/1617-agg.pkl")
93 93
 newagg$cluster <- factor(newagg$cluster)
94 94
 str(newagg)
95 95
 
@@ -119,7 +119,14 @@ predplot <-ggplot(predf, aes(x = x, y = y)) + geom_line(aes(y = f), color = "blu
119 119
 
120 120
 predplot
121 121
 
122
-predplot + coord_cartesian(xlim = c(as.POSIXct("2018-03-01", tz = "UTC"), as.POSIXct("2018-04-01", tz = "UTC")))
122
+predplot + coord_cartesian(xlim = c(as.POSIXct("2017-03-01", tz = "UTC"), as.POSIXct("2017-04-01", tz = "UTC")))
123 123
 
124 124
 mean(abs(predf$r))
125 125
 sd(predf$r)
126
+
127
+
128
+# number of icps per cluster
129
+ocdf <- p$read_pickle('../data/9-clusters-sample-table.pkl')
130
+ncdf <- p$read_pickle('../data/1617-asgn-table.pkl')
131
+table(ocdf$cluster)
132
+table(ncdf$cluster)

BIN
img/all-9-fix-1617-asigned.png View File


BIN
img/all-9-fre-1617-asigned.png View File


BIN
img/apr-9-fix-1617-asigned.png View File


BIN
img/apr-9-fre-1617-asigned.png View File


BIN
img/jan-9-fix-1617-asigned.png View File


BIN
img/jan-9-fre-1617-asigned.png View File


BIN
img/jul-9-fix-1617-asigned.png View File


BIN
img/jul-9-fre-1617-asigned.png View File


BIN
img/oct-9-fix-1617-asigned.png View File


BIN
img/oct-9-fre-1617-asigned.png View File


+ 24
- 4
py/clusAssign.py View File

@@ -8,6 +8,12 @@ clusfile = '../data/9-clusters.agg.pkl'
8 8
 # A new dataset
9 9
 ndsfile = '../data/2016-17-sample.pkl'
10 10
 
11
+# Table of assigned clusters
12
+aclusfile = '../data/1617-asgn-table.pkl'
13
+
14
+# Aggregated dataset
15
+aggfile = '../data/1617-agg.pkl'
16
+
11 17
 
12 18
 clusdf = p.read_pickle(clusfile)
13 19
 clusdf = clusdf.pivot(index = 'read_time', columns = 'cluster', values = 'kwh_tot_mean')
@@ -44,14 +50,28 @@ for i in icps:
44 50
         print('ICP {} has constant value; assigning to cluster -1'.format(i))
45 51
     clusdict[i] = bestc
46 52
 
47
-# Need to make sure cluster is integer ZZZ
48 53
 newclusdf = p.DataFrame.from_dict(clusdict, orient = 'index', columns = ['cluster'])
49 54
 newclusdf.index.name = 'icp_id'
50 55
 newclusdf = newclusdf.reset_index()
51
-# print(newclusdf)
56
+print(newclusdf)
57
+newclusdf.to_pickle(aclusfile)
58
+
52 59
 
53 60
 newdf = p.melt(newdf.reset_index(), 'read_time', var_name = 'icp_id', value_name = 'kwh')
54 61
 
55
-######## ZZZ Something isn't working here
56
-anndf = newdf.set_index('icp_id').join(newclusdf)
62
+print(newdf.info())
63
+print(newclusdf.info())
64
+
65
+anndf = newdf.set_index('icp_id').join(newclusdf.set_index('icp_id')).reset_index()
57 66
 print(anndf)
67
+
68
+qlow  = lambda x: x.quantile(0.250)
69
+qhigh = lambda x: x.quantile(0.750)
70
+newagg = anndf.groupby(['read_time', 'cluster']).agg({
71
+        'kwh': ['median', 'mean', ('CI_low', qlow), ('CI_high', qhigh)]
72
+})
73
+newagg.columns = ['_tot_'.join(x) for x in newagg.columns.ravel()]
74
+newagg = newagg.reset_index()
75
+
76
+print(newagg)
77
+newagg.to_pickle(aggfile)