Browse Source

More sql and python

Petra Lamborn 5 years ago
parent
commit
0b4da456dd
2 changed files with 52 additions and 5 deletions
  1. 16
    5
      py/clustering.py
  2. 36
    0
      sql/queries.pgsql

+ 16
- 5
py/clustering.py View File

@@ -4,13 +4,22 @@ import matplotlib.pyplot as plt
4 4
 import seaborn as sns
5 5
 
6 6
 
7
+# query = """
8
+# SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
9
+# FROM public.coup_tall_april WHERE icp_id LIKE (%s) AND read_date = to_date(%s, 'dd/mm/yyyy') 
10
+# ORDER BY icp_id, read_time;
11
+# """
12
+# 
13
+# qparams = ['%%1117', '20/04/2017']
14
+
7 15
 query = """
8
-SELECT icp_id, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time,
9
-    kwh_tot
10
-FROM public.coup_tall_april WHERE icp_id LIKE (%s) ORDER BY icp_id, read_time;
16
+SELECT read_date, period, AVG(kwh_tot) AS average
17
+FROM public.coup_tall_april
18
+GROUP BY read_date, period
19
+ORDER BY read_date, period;
11 20
 """
12 21
 
13
-qparams = ['%%1117']
22
+qparams = []
14 23
 
15 24
 df = getQuery(query, qparams)
16 25
 
@@ -18,6 +27,8 @@ print(df.info())
18 27
 
19 28
 sns.set()
20 29
 
21
-sns.scatterplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', style = 'icp_id', data = df)
30
+#sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', data = df)
31
+sns.lineplot(x = 'period', y = 'average', hue = 'read_date', data = df)
22 32
 
23 33
 plt.show()
34
+

+ 36
- 0
sql/queries.pgsql View File

@@ -97,3 +97,39 @@ FROM public.coup_tall_april ORDER BY icp_id, read_time limit 50;
97 97
 -- Possible subset for further testing
98 98
 SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
99 99
 FROM public.coup_tall_april WHERE icp_id LIKE '%1117' ORDER BY icp_id, read_time;
100
+
101
+-- Calculate averages for each half-hour
102
+SELECT read_date, period, AVG(kwh_tot) AS average
103
+FROM public.coup_tall_april 
104
+GROUP BY read_date, period
105
+ORDER BY read_date, period;
106
+
107
+-- Gerate timestamp list
108
+SELECT dd FROM GENERATE_SERIES('2017-04-01'::timestamp, '2017-04-02'::timestamp, '30 minutes'::interval) dd;
109
+
110
+-- Fraction of icp's in april with 1440 entries
111
+SELECT SUM(CASE WHEN isum.c = 1440 THEN 1 ELSE 0 END)::numeric / COUNT(*)::numeric As frac FROM
112
+(
113
+    SELECT COUNT(*) AS c, icp_id FROM public.coup_tall_april 
114
+    GROUP BY icp_id
115
+) AS isum;
116
+
117
+-- Get numbers of ICPs present over whole dataset
118
+SELECT COUNT(*) as c, icp_id, read_date FROM coup_prd.coupdatamaster
119
+    WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
120
+    AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
121
+    AND content_code = 'UN'
122
+GROUP BY icp_id, read_date ORDER BY c DESC, read_date LIMIT 40;
123
+
124
+
125
+SELECT MIN(read_date) as min_date, MAX(read_date) as max_date FROM coup_prd.coupdatamaster;
126
+
127
+SELECT SUM(CASE WHEN cir.c > 365 THEN 1 ELSE 0 END), COUNT(*)
128
+FROM
129
+(
130
+    SELECT COUNT(*) as c, icp_id FROM coup_prd.coupdatamaster
131
+        WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
132
+        AND read_date <  to_date('01/01/2018','dd/mm/yyyy')
133
+        AND content_code = 'UN'
134
+    GROUP BY icp_id ORDER BY c DESC
135
+) AS cir;