Browse Source

Futher optimisation

Petra Lamborn 5 years ago
parent
commit
036f012d25
4 changed files with 28 additions and 20 deletions
  1. 1
    1
      py/clustering.py
  2. 1
    1
      py/downkwh.py
  3. 6
    18
      py/util.py
  4. 20
    0
      sql/queries.pgsql

+ 1
- 1
py/clustering.py View File

36
 # plt.show()
36
 # plt.show()
37
 
37
 
38
 numclusts = 7
38
 numclusts = 7
39
-df = p.read_pickle('../data/2017-1s.pkl')
39
+df = p.read_pickle('../data/2017-sample.pkl')
40
 dforig = df
40
 dforig = df
41
 
41
 
42
 print(df.info())
42
 print(df.info())

+ 1
- 1
py/downkwh.py View File

44
 print(kwhdata.info())
44
 print(kwhdata.info())
45
 
45
 
46
 print("Pickling")
46
 print("Pickling")
47
-kwhdata.to_pickle("../data/2017-1s.pkl")
47
+kwhdata.to_pickle("../data/2017-sample.pkl")

+ 6
- 18
py/util.py View File

77
             SELECT read_time 
77
             SELECT read_time 
78
             FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, 
78
             FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, 
79
                 '30 minutes'::interval) read_time
79
                 '30 minutes'::interval) read_time
80
-        ) AS tsdata CROSS JOIN
81
-        (
82
-            SELECT *
83
-            FROM
84
-            (
85
-                SELECT icp_id, COUNT(DISTINCT read_date) AS data_days 
86
-                FROM coup_prd.coupdatamaster
87
-                WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
88
-                    AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
89
-                    AND content_code = 'UN'
90
-                    AND icp_id LIKE %(subset)s
91
-                GROUP BY icp_id
92
-            ) AS cir 
93
-            WHERE data_days >= 360
94
-        ) AS qual_icp
80
+        ) AS tsdata CROSS JOIN public.icp_sample
95
     ) AS comb
81
     ) AS comb
96
     LEFT JOIN
82
     LEFT JOIN
97
     (
83
     (
108
             WHERE   a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
94
             WHERE   a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
109
              and   a.read_date <  to_date(%(dateend)s,'yyyy-mm-dd')
95
              and   a.read_date <  to_date(%(dateend)s,'yyyy-mm-dd')
110
              and   a.content_code  ~ ('UN|CN|EG')
96
              and   a.content_code  ~ ('UN|CN|EG')
111
-             AND   a.icp_id LIKE %(subset)s
97
+             AND   a.icp_id IN (
98
+                SELECT icp_id FROM public.icp_sample
99
+             )
112
             GROUP BY 1, 2, 3
100
             GROUP BY 1, 2, 3
113
         ) AS coup_tall
101
         ) AS coup_tall
114
     ) AS tall_timestamp 
102
     ) AS tall_timestamp 
118
         'datestart': datestart,
106
         'datestart': datestart,
119
         'dateend': dateend,
107
         'dateend': dateend,
120
         'tsstart': timestart,
108
         'tsstart': timestart,
121
-        'tsend': timeend,
122
-        'subset': subset
109
+        'tsend': timeend
110
+        # 'subset': subset
123
     }
111
     }
124
     print("Getting data with parameters:")
112
     print("Getting data with parameters:")
125
     print(pdict)
113
     print(pdict)

+ 20
- 0
sql/queries.pgsql View File

344
     ) AS coup_tall
344
     ) AS coup_tall
345
 ) AS tall_timestamp 
345
 ) AS tall_timestamp 
346
 ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
346
 ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
347
+
348
+-- Produces a view of all the icp's with at least 360 days of data, along with a column of days of data
349
+CREATE VIEW public.best_icp AS
350
+SELECT *
351
+FROM
352
+(
353
+    SELECT icp_id, COUNT(DISTINCT read_date) AS data_days 
354
+    FROM coup_prd.coupdatamaster
355
+    WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
356
+        AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
357
+        AND content_code = 'UN'
358
+    GROUP BY icp_id
359
+) AS cir 
360
+WHERE data_days >= 360;
361
+
362
+-- Produces sample table
363
+CREATE TABLE public.icp_sample AS
364
+SELECT * FROM public.best_icp
365
+ORDER BY random()
366
+LIMIT 1000;