Browse Source

Optimisations

Petra Lamborn 5 years ago
parent
commit
a99d1fc52f
3 changed files with 8 additions and 4 deletions
  1. 1
    1
      py/clustering.py
  2. 2
    2
      py/downkwh.py
  3. 5
    1
      py/util.py

+ 1
- 1
py/clustering.py View File

@@ -36,7 +36,7 @@ from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
36 36
 # plt.show()
37 37
 
38 38
 numclusts = 7
39
-df = p.read_pickle('../data/2017-20s.pkl')
39
+df = p.read_pickle('../data/2017-1s.pkl')
40 40
 dforig = df
41 41
 
42 42
 print(df.info())

+ 2
- 2
py/downkwh.py View File

@@ -39,9 +39,9 @@ import seaborn as sns
39 39
 # 
40 40
 # pickleQuery(query, "../data/jan19s.pkl")
41 41
 
42
-kwhdata = getkwh('2017-01-01', '2018-01-01', '2017-01-01 00:30:00', '2018-01-01 00:00:00', '%%20')
42
+kwhdata = getkwh('2017-01-01', '2018-01-01', '2017-01-01 00:30:00', '2018-01-01 00:00:00', '%%1')
43 43
 
44 44
 print(kwhdata.info())
45 45
 
46 46
 print("Pickling")
47
-kwhdata.to_pickle("../data/2017-20s.pkl")
47
+kwhdata.to_pickle("../data/2017-1s.pkl")

+ 5
- 1
py/util.py View File

@@ -2,6 +2,7 @@ import psycopg2 as pg
2 2
 from configparser import ConfigParser
3 3
 import pandas.io.sql as psql
4 4
 import datetime as dt
5
+import numpy as np
5 6
 
6 7
 
7 8
 def config(filename='database.ini', section='postgresql'):
@@ -67,7 +68,7 @@ def pickleQuery(query, path, qparams=[]):
67 68
 
68 69
 def getkwh(datestart, dateend, timestart, timeend, subset):
69 70
     query = """
70
-    SELECT comb.icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
71
+    SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
71 72
     FROM
72 73
     (
73 74
         SELECT read_time, icp_id
@@ -123,6 +124,9 @@ def getkwh(datestart, dateend, timestart, timeend, subset):
123 124
     print("Getting data with parameters:")
124 125
     print(pdict)
125 126
     qdf = getQuery(query, pdict)
127
+    print("Optimising")
128
+    qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
129
+    qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float32)
126 130
     print("Done")
127 131
     return(qdf)
128 132