Browse Source

More sql and python

Petra Lamborn 5 years ago
parent
commit
0b4da456dd
2 changed files with 52 additions and 5 deletions
  1. 16
    5
      py/clustering.py
  2. 36
    0
      sql/queries.pgsql

+ 16
- 5
py/clustering.py View File

4
 import seaborn as sns
4
 import seaborn as sns
5
 
5
 
6
 
6
 
7
+# query = """
8
+# SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
9
+# FROM public.coup_tall_april WHERE icp_id LIKE (%s) AND read_date = to_date(%s, 'dd/mm/yyyy') 
10
+# ORDER BY icp_id, read_time;
11
+# """
12
+# 
13
+# qparams = ['%%1117', '20/04/2017']
14
+
7
 query = """
15
 query = """
8
-SELECT icp_id, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time,
9
-    kwh_tot
10
-FROM public.coup_tall_april WHERE icp_id LIKE (%s) ORDER BY icp_id, read_time;
16
+SELECT read_date, period, AVG(kwh_tot) AS average
17
+FROM public.coup_tall_april
18
+GROUP BY read_date, period
19
+ORDER BY read_date, period;
11
 """
20
 """
12
 
21
 
13
-qparams = ['%%1117']
22
+qparams = []
14
 
23
 
15
 df = getQuery(query, qparams)
24
 df = getQuery(query, qparams)
16
 
25
 
18
 
27
 
19
 sns.set()
28
 sns.set()
20
 
29
 
21
-sns.scatterplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', style = 'icp_id', data = df)
30
+#sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', data = df)
31
+sns.lineplot(x = 'period', y = 'average', hue = 'read_date', data = df)
22
 
32
 
23
 plt.show()
33
 plt.show()
34
+

+ 36
- 0
sql/queries.pgsql View File

97
 -- Possible subset for further testing
97
 -- Possible subset for further testing
98
 SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
98
 SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
99
 FROM public.coup_tall_april WHERE icp_id LIKE '%1117' ORDER BY icp_id, read_time;
99
 FROM public.coup_tall_april WHERE icp_id LIKE '%1117' ORDER BY icp_id, read_time;
100
+
101
+-- Calculate averages for each half-hour
102
+SELECT read_date, period, AVG(kwh_tot) AS average
103
+FROM public.coup_tall_april 
104
+GROUP BY read_date, period
105
+ORDER BY read_date, period;
106
+
107
+-- Gerate timestamp list
108
+SELECT dd FROM GENERATE_SERIES('2017-04-01'::timestamp, '2017-04-02'::timestamp, '30 minutes'::interval) dd;
109
+
110
+-- Fraction of icp's in april with 1440 entries
111
+SELECT SUM(CASE WHEN isum.c = 1440 THEN 1 ELSE 0 END)::numeric / COUNT(*)::numeric As frac FROM
112
+(
113
+    SELECT COUNT(*) AS c, icp_id FROM public.coup_tall_april 
114
+    GROUP BY icp_id
115
+) AS isum;
116
+
117
+-- Get numbers of ICPs present over whole dataset
118
+SELECT COUNT(*) as c, icp_id, read_date FROM coup_prd.coupdatamaster
119
+    WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
120
+    AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
121
+    AND content_code = 'UN'
122
+GROUP BY icp_id, read_date ORDER BY c DESC, read_date LIMIT 40;
123
+
124
+
125
+SELECT MIN(read_date) as min_date, MAX(read_date) as max_date FROM coup_prd.coupdatamaster;
126
+
127
+SELECT SUM(CASE WHEN cir.c > 365 THEN 1 ELSE 0 END), COUNT(*)
128
+FROM
129
+(
130
+    SELECT COUNT(*) as c, icp_id FROM coup_prd.coupdatamaster
131
+        WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
132
+        AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
133
+        AND content_code = 'UN'
134
+    GROUP BY icp_id ORDER BY c DESC
135
+) AS cir;