Browse Source

Download only 5k sample

Petra Lamborn 5 years ago
parent
commit
91e0ad4943
2 changed files with 7 additions and 7 deletions
  1. 5
    5
      py/downkwh.py
  2. 2
    2
      py/util.py

+ 5
- 5
py/downkwh.py View File

@@ -2,6 +2,7 @@ from util import getQuery, pickleQuery, getkwh
2 2
 import pandas as p
3 3
 import gc
4 4
 from datetime import datetime
5
+from tqdm import tqdm
5 6
 
6 7
 months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
7 8
 mstarts = list(range(1, 13))
@@ -10,10 +11,9 @@ mends.append(1)
10 11
 yends = [2017] * 11
11 12
 yends.append(2018)
12 13
 
13
-for i, m in enumerate(months):
14
-    if i < 11:
15
-        continue
16
-    print(i)
14
+for i, m in tqdm(enumerate(months)):
15
+    # if i < 11:
16
+    #     continue
17 17
     print(m)
18 18
     print(datetime.now().time())
19 19
     kwhdata = getkwh('2017-{:02d}-01'.format(mstarts[i]), 
@@ -24,7 +24,7 @@ for i, m in enumerate(months):
24 24
     print("Pivoting")
25 25
     kwhpiv = kwhdata.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
26 26
     print("Pickling")
27
-    kwhpiv.to_pickle('../data/2017-{}-all.pkl'.format(m))
27
+    kwhpiv.to_pickle('../data/2017-{}-5k.pkl'.format(m))
28 28
     del kwhdata
29 29
     del kwhpiv
30 30
     gc.collect()

+ 2
- 2
py/util.py View File

@@ -79,7 +79,7 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
79 79
             SELECT read_time 
80 80
             FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, 
81 81
                 '30 minutes'::interval) read_time
82
-        ) AS tsdata CROSS JOIN public.best_icp
82
+        ) AS tsdata CROSS JOIN public.icp_sample_5k
83 83
     ) AS comb
84 84
     LEFT JOIN
85 85
     (
@@ -97,7 +97,7 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
97 97
              and   a.read_date <  to_date(%(dateend)s,'yyyy-mm-dd')
98 98
              and   a.content_code  ~ ('UN|CN|EG')
99 99
              AND   a.icp_id IN (
100
-                SELECT icp_id FROM public.best_icp
100
+                SELECT icp_id FROM public.icp_sample_5k
101 101
              )
102 102
             GROUP BY 1, 2, 3
103 103
         ) AS coup_tall