Browse Source

Bunch of proper python scripts

Petra Lamborn 5 years ago
parent
commit
d4e459128d
8 changed files with 212 additions and 206 deletions
  1. 33
    8
      py/agg.py
  2. 1
    87
      py/clustering.py
  3. 1
    0
      py/collate.py
  4. 7
    6
      py/dcorr.py
  5. 127
    29
      py/downkwh.py
  6. 14
    6
      py/downweather.py
  7. 2
    0
      py/projprocess.py
  8. 27
    70
      py/util.py

+ 33
- 8
py/agg.py View File

@@ -1,24 +1,49 @@
1 1
 # Aggregrate given clusters
2
+from argparse import ArgumentParser
2 3
 import pandas as p
4
+from tqdm import tqdm
3 5
 
4 6
 def aggregator(widedf, clusdf):
7
+    """Aggregate a (wide-form) dataframe by the cluster mappings in a second dataframe
8
+    """
5 9
     clusters = clusdf['cluster'].unique()
6 10
     clusters.sort()
7
-    aggv = {}
11
+    dflis = []
8 12
     qlow  = lambda x: x.quantile(0.250)
9 13
     qhigh = lambda x: x.quantile(0.750)
10
-    for c in clusters:
11
-        icps = clusdf[clusdf.cluster == c].icp_id.values
14
+    for c in tqdm(clusters):
15
+        icps = clusdf[clusdf.cluster == c].icp_id.unique()
12 16
         subdf = widedf[icps]
13
-        agged = subdf.agg(func = 'median', axis = 1)
14
-        print(agged)
17
+        aggmed = subdf.agg(func = 'median', axis = 1)
18
+        aggmen = subdf.agg(func = 'mean', axis = 1)
19
+        aggupq = subdf.agg(func = qlow, axis = 1)
20
+        aggdwq = subdf.agg(func = qhigh, axis = 1)
21
+        agged = p.DataFrame(data = {
22
+            "cluster":         c,
23
+            "kwh_tot_median":  aggmed,
24
+            "kwh_tot_mean":    aggmen,
25
+            "kwh_tot_CI_low":  aggupq,
26
+            "kwh_tot_CI_high": aggdwq,
27
+            }).reset_index()
28
+        dflis.append(agged)
29
+    adf = p.concat(dflis, axis = 0, ignore_index = True)
30
+    return adf
15 31
 
16 32
 
17 33
 def main():
18
-    wd = p.read_pickle("../data/2017-5k-wide.pkl")
19
-    cd = p.read_pickle("../data/5kclustable.pkl")
20
-    aggregator(wd, cd)
34
+    parser = ArgumentParser(description='Aggregate dataframe by specified clusters')
35
+    parser.add_argument("-i", "--input",  dest="input",      help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
36
+    parser.add_argument("-c", "--clusters", dest="clusfile", help = "cluster pickle path; default: ../data/5kclustable.pkl", metavar="[PATH]", default = "../data/5kclustable.pkl")
37
+    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/5k-ag.pkl", metavar="[PATH]", default = "../data/5k-ag.pkl")
38
+    parser.add_argument("-p", "--pivot", dest = "istall",    help = "input dataframe is in tall format and must be pivoted", action ="store_true")
39
+    args = parser.parse_args()
40
+    wd = p.read_pickle(args.input)
41
+    cd = p.read_pickle(args.clusfile)
42
+    if (args.istall):
43
+        wd = wd.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
21 44
 
45
+    agged = aggregator(wd, cd)
46
+    agged.to_pickle(args.output)
22 47
 
23 48
 
24 49
 if __name__ == "__main__":

+ 1
- 87
py/clustering.py View File

@@ -14,23 +14,6 @@ from itertools import combinations
14 14
 from math import factorial as f
15 15
 
16 16
 
17
-
18
-
19
-numclusts = 9
20
-Sourcedata =   '../data/2017-5k-wide.pkl'
21
-Sourcecorr =   '../data/5kdcorrmatrix.pkl'
22
-lableddata =   '../data/9-clusters-5k.pkl'
23
-aggdata =      '../data/9-clusters-5k-agg.pkl'
24
-clustertable = '../data/9-clusters-5k-table.pkl'
25
-
26
-# sourcep = p.read_pickle(Sourcecorr)
27
-
28
-# lmat = squareform(sourcep)
29
-
30
-# lobj = linkage(lmat, method = 'ward')
31
-# print(lobj)
32
-# print(cophenet(lobj, lmat))
33
-
34 17
 def cluster(dcmat, method, nclusters):
35 18
     """Cluster provided correlation dataframe
36 19
     """
@@ -41,6 +24,7 @@ def cluster(dcmat, method, nclusters):
41 24
     clustdf = p.DataFrame({'icp_id' : dcmat.index.values, 'cluster' : clusts})
42 25
     return lobj, clustdf
43 26
 
27
+
44 28
 def dendro(lobj, clustdf, numclusts, icps, fname):
45 29
     clabs = [x + 1 for x in range(numclusts)]
46 30
     cpal = dict(zip(clabs, sns.color_palette("colorblind", numclusts).as_hex()))
@@ -90,75 +74,5 @@ def main():
90 74
         dendro(l, c, args.numclusters, icps, args.treepath)
91 75
 
92 76
 
93
-
94
-
95
-# clabs = [x + 1 for x in range(numclusts)]
96
-# cpal = dict(zip(clabs, sns.color_palette("colorblind", numclusts).as_hex()))
97
-
98
-# clusts = fcluster(lobj, numclusts, criterion='maxclust')
99
-# print(clusts)
100
-# print(cmat.index.values)
101
-# clustdf = p.DataFrame({'icp_id' : cmat.index.values, 'cluster' : clusts})
102
-# print(clustdf)
103
-# clustdf.to_pickle(clustertable)
104
-# mdf = p.merge(clustdf, dforig, on = 'icp_id', how = 'left')
105
-# print(mdf)
106
-# print(mdf.info())
107
-# qlow  = lambda x: x.quantile(0.250)
108
-# qhigh = lambda x: x.quantile(0.750)
109
-# print(mdf.cluster.describe())
110
-# mdagg = mdf.groupby(['read_time', 'cluster']).agg({
111
-#         'kwh_tot': ['median', 'mean', ('CI_low', qlow), ('CI_high', qhigh)]
112
-# }, q = 0.025)
113
-# mdagg.columns = ['_'.join(x) for x in mdagg.columns.ravel()]
114
-# mdagg = mdagg.reset_index()
115
-# print(mdagg)
116
-# print(mdagg.info())
117
-# print(mdagg.describe())
118
-# # mdf.to_csv('~/windows/Documents/clusters-ward.csv')
119
-# print("Saving")
120
-# mdf.to_pickle(lableddata)
121
-# mdagg.to_pickle(aggdata)
122
-# print("saved")
123
-
124
-# # Algorithm via 
125
-# # <https://stackoverflow.com/questions/38153829/custom-cluster-colors-of-scipy-dendrogram-in-python-link-color-func>
126
-# ldict = {icp_id:cpal[cluster] for icp_id, cluster in zip(clustdf.icp_id, clustdf.cluster)}
127
-# link_cols = {}
128
-# for i, i12 in enumerate(lobj[:,:2].astype(int)):
129
-#   c1, c2 = (link_cols[x] if x > len(lobj) else ldict[clustdf.icp_id[x]]
130
-#     for x in i12)
131
-#   link_cols[i+1+len(lobj)] = c1 if c1 == c2 else '#000000'
132
-
133
-# plt.figure(figsize = (25, 10))
134
-# plt.title('ICP Clustering Dendrogram')
135
-# plt.xlabel('ICP ID/(Number of ICPs)')
136
-# plt.ylabel('distance')
137
-# dendrogram(
138
-#     lobj,
139
-#     labels = cmat.index.values,
140
-#     leaf_rotation=90,
141
-#     leaf_font_size=8,
142
-#     # show_leaf_counts = True,
143
-#     # truncate_mode = 'lastp',
144
-#     # p = 50,
145
-#     # show_contracted = True,
146
-#     link_color_func = lambda x: link_cols[x],
147
-#     color_threshold = None
148
-# )
149
-# # plt.show()
150
-# plt.savefig("../img/sample-9-dendro.png")
151
-
152
-# sns.set()
153
-
154
-# f, axes = plt.subplots(3,3)
155
-
156
-# for i, c in enumerate(clabs):
157
-#     fds = mdagg[mdagg.cluster == c]
158
-#     sns.lineplot(x = 'read_time', y = 'kwh_tot_mean', color = cpal[c], ax = axes[i//3][i%3], data = fds)
159
-#     axes[i//3][i%3].fill_between(fds.read_time.dt.to_pydatetime(), fds.kwh_tot_CI_low, fds.kwh_tot_CI_high, alpha = 0.1, color = cpal[c])
160
-# # plt.show()
161
-# plt.savefig("../img/sample-9-panedtrends.png")
162
-
163 77
 if __name__ == "__main__":
164 78
     main()

+ 1
- 0
py/collate.py View File

@@ -1,4 +1,5 @@
1 1
 # Collate 12 dataframes into one (wide) combined dataframe
2
+# Made redundant by downkwh.py
2 3
 import pandas as p
3 4
 import gc
4 5
 from tqdm import tqdm

+ 7
- 6
py/dcorr.py View File

@@ -12,10 +12,8 @@ def tqcorr(df):
12 12
     cols = df.columns
13 13
     ncols = len(cols)
14 14
     cdf = p.DataFrame(index = cols, columns = cols, dtype = np.float16)
15
-    print(cdf.info())
16 15
     for c in tqdm(cols):
17 16
         cdf.loc[c, c] = 0
18
-    print(cdf.info())
19 17
     comb = combinations(cols, 2)
20 18
     ncomb = f(ncols) // f(2) // f(ncols - 2)
21 19
     for c1, c2 in tqdm(comb, total = ncomb):
@@ -26,10 +24,12 @@ def tqcorr(df):
26 24
     return cdf
27 25
 
28 26
 
29
-def createCorr(source, output):
27
+def createCorr(source, output, piv):
30 28
     """Load a pkl in wide form from source, process, run tqcorr() and save response to output
31 29
     """
32 30
     df = p.read_pickle(source)
31
+    if piv:
32
+        df = df.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
33 33
     df = df[df.columns[df.max() != df.min()]]
34 34
     cmat = tqcorr(df)
35 35
     cmat.to_pickle(output)
@@ -37,7 +37,8 @@ def createCorr(source, output):
37 37
 
38 38
 if __name__ == "__main__":
39 39
     parser = ArgumentParser(description='Create distance correlation matrix from pickled wideform pandas dataframe')
40
-    parser.add_argument("-i", "--input",  dest="input",  help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
41
-    parser.add_argument("-o", "--output", dest="output", help = "output pickle path; default: ../data/5kdcorrmatrix.pkl", metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
40
+    parser.add_argument("-i", "--input",  dest="input",   help = "input pickle path; default: ../data/2017-5k-wide.pkl",  metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
41
+    parser.add_argument("-o", "--output", dest="output",  help = "output pickle path; default: ../data/5kdcorrmatrix.pkl", metavar="[PATH]", default = "../data/5kdcorrmatrix.pkl")
42
+    parser.add_argument("-p", "--pivot", dest = "istall", help = "input dataframe is in tall format and must be pivoted", action ="store_true")
42 43
     args = parser.parse_args()
43
-    createCorr(args.input, args.output)
44
+    createCorr(args.input, args.output, args.istall)

+ 127
- 29
py/downkwh.py View File

@@ -1,33 +1,131 @@
1
-from util import getQuery, pickleQuery, getkwh
1
+from argparse import ArgumentParser
2
+# from psycopg2 import sql
3
+import gc
4
+from util import getQuery, datevalid
2 5
 import pandas as p
3 6
 import gc
4 7
 from datetime import datetime
5
-from tqdm import tqdm
6
-
7
-months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
8
-mstarts = list(range(1, 13))
9
-mends = mstarts[1:13]
10
-mends.append(1)
11
-yends = [2017] * 11
12
-yends.append(2018)
13
-
14
-for i, m in tqdm(enumerate(months)):
15
-    # if i < 11:
16
-    #     continue
17
-    print(m)
18
-    print(datetime.now().time())
19
-    kwhdata = getkwh('2017-{:02d}-01'.format(mstarts[i]), 
20
-                     '{}-{:02d}-01'.format(yends[i], mends[i]), 
21
-                     '2017-{:02d}-01 00:30:00'.format(mstarts[i]), 
22
-                     '{}-{:02d}-01 00:00:00'.format(yends[i], mends[i]), 
23
-                     '%%1')
24
-    print("Pivoting")
25
-    kwhpiv = kwhdata.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
26
-    print("Pickling")
27
-    kwhpiv.to_pickle('../data/2017-{}-5k.pkl'.format(m))
28
-    del kwhdata
29
-    del kwhpiv
30
-    gc.collect()
31
-
32
-print('Done')
8
+from tqdm import tqdm, trange
9
+from pprint import pprint
10
+from tempfile import TemporaryDirectory
11
+import numpy as np
12
+
13
+tables = [
14
+    'public.best_icp', # All icps with at least 360 days of data in 2017
15
+    'public.best_icp_1618', # All icps with at least 720 days of data in 2 years from 1 April 2016
16
+    'public.best_icp_18m', # All icps with at least 540 days of data from July 2016 to end of 2017
17
+    'public.icp_sample', # A pre-generated 1k sample from best_icp
18
+    'public.icp_sample_5k', # A pre-generated 5k sample from best_icp
19
+    'public.icp_sample_1618', # A pre-generated 1k sample from best_icp_1618
20
+    'public.icp_sample_18m' # A pre-generated 1k sample from best_icp_18m
21
+]
22
+
23
+
24
+def getkwh(datestart, dateend, timestart, timeend, icp_tab, verbose = True):
25
+    """Get kwh data from database
26
+    """
27
+    query = """
28
+    SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
29
+    FROM
30
+    (
31
+        SELECT read_time, icp_id
32
+        FROM
33
+        (
34
+            SELECT read_time 
35
+            FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, 
36
+                '30 minutes'::interval) read_time
37
+        ) AS tsdata CROSS JOIN {}
38
+    ) AS comb
39
+    LEFT JOIN
40
+    (
41
+        SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
42
+        FROM (
43
+            SELECT  a.icp_id
44
+                 , a.read_date
45
+                 , c.period
46
+                 , sum(c.read_kwh) as kwh_tot
47
+                 , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
48
+                 , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
49
+            FROM    coup_prd.coupdatamaster a,
50
+                unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
51
+            WHERE   a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
52
+             and   a.read_date <  to_date(%(dateend)s,'yyyy-mm-dd')
53
+             and   a.content_code  ~ ('UN|CN|EG')
54
+             AND   a.icp_id IN (
55
+                SELECT icp_id FROM {}
56
+             )
57
+            GROUP BY 1, 2, 3
58
+        ) AS coup_tall
59
+    ) AS tall_timestamp 
60
+    ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
61
+    """
62
+    query = query.format(icp_tab, icp_tab)
63
+    pdict = {
64
+        'datestart': datestart,
65
+        'dateend': dateend,
66
+        'tsstart': timestart,
67
+        'tsend': timeend
68
+        # 'subset': subset
69
+    }
70
+
71
+    if verbose:
72
+        print("Getting data with parameters:")
73
+        pprint(pdict)
74
+    qdf = getQuery(query, pdict, verbose)
75
+    if verbose:
76
+        print("Optimising")
77
+    qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
78
+    qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float16)
79
+    # print("Done")
80
+    return(qdf)
81
+
82
+
83
+def collateddownload(startd, endd, numdivis, icp_tab, pivot, verbose):
84
+    """
85
+    Download dataset in pieces, then combine
86
+    """
87
+    with TemporaryDirectory() as tempdir:
88
+        divset = p.date_range(startd, endd, periods = numdivis + 1).strftime("%Y-%m-%d")
89
+        divlow = divset[:-1]
90
+        divhig = divset[1:]
91
+        for i in trange(numdivis):
92
+            datestart = divlow[i]
93
+            dateend   = divhig[i]
94
+            datetimeend = dateend + " 00:00:00"
95
+            datetimestart = datestart + " 00:30:00"
96
+            filename = "{}/{}temp.pkl".format(tempdir, i)
97
+            tdf = getkwh(datestart, dateend, datetimestart, datetimeend, icp_tab, verbose)
98
+            if pivot:
99
+                if verbose:
100
+                    print("Pivoting")
101
+                tdf = tdf.pivot(index = 'read_time', columns = 'icp_id', values = 'kwh_tot')
102
+            tdf.to_pickle(filename)
103
+            del tdf
104
+        coldf = p.read_pickle("{}/{}temp.pkl".format(tempdir, 0))
105
+        for i in trange(1, numdivis):
106
+            filename = "{}/{}temp.pkl".format(tempdir, i)
107
+            tdf = p.read_pickle(filename) 
108
+            coldf = p.concat([coldf, tdf])
109
+            del tdf
110
+            gc.collect()
111
+        return coldf
112
+
113
+
114
+def main():
115
+    parser = ArgumentParser(description='Download kwh data from dataframe')
116
+    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/2017-5k-wide.pkl", metavar="[PATH]", default = "../data/2017-5k-wide.pkl")
117
+    parser.add_argument("-s", "--start-date", dest = "startdate", help = "start date for download; format: YYYY-MM-DD; default: 2017-01-01", metavar="[DATE]", default = "2017-01-01", type = datevalid)
118
+    parser.add_argument("-e", "--end-date", dest = "enddate", help = "end date for download; format: YYYY-MM-DD; default: 2018-01-01", metavar="[DATE]", default = "2018-01-01", type = datevalid)
119
+    parser.add_argument("-t", "--table", dest = "table", help = "table for download (constrained to specific values in source); default: public.icp_sample", metavar="[TABLE]", default = "public.icp_sample", choices = tables)
120
+    parser.add_argument("-n", "--num-div",  dest="numdiv",  help = "number of segments to divide download into", metavar = "[NUM]", default = 12, type = int)
121
+    parser.add_argument("--no-pivot", dest = "pivot",    help = "output dataframe in tall (non-pivoted) format", action ="store_false")
122
+    parser.add_argument("-v", "--verbose", dest = "verbose", action ="store_true")
123
+    args = parser.parse_args()
124
+    cdata = collateddownload(args.startdate, args.enddate, args.numdiv, args.table, args.pivot, args.verbose)
125
+    cdata.to_pickle(args.output)
126
+
127
+
128
+
33 129
 
130
+if __name__ == "__main__":
131
+    main()

+ 14
- 6
py/downweather.py View File

@@ -1,10 +1,18 @@
1
-from util import gettemp
1
+from argparse import ArgumentParser
2
+from util import gettemp, datevalid
2 3
 import pandas as p
3 4
 
4
-tempdata = gettemp('2016-04-01', '2019-01-01', 2006)
5 5
 
6
-print(tempdata.info())
7
-print(tempdata.describe())
6
+def main():
7
+    parser = ArgumentParser(description='Download kwh data from dataframe')
8
+    parser.add_argument("-o", "--output", dest="output",     help = "output pickle path; default: ../data/2016-18-weather.pkl", metavar="[PATH]", default = "../data/2016-18-weather.pkl")
9
+    parser.add_argument("-s", "--start-date", dest = "startdate", help = "start date for download; format: YYYY-MM-DD; default: 2016-04-01", metavar="[DATE]", default = "2016-04-01", type = datevalid)
10
+    parser.add_argument("-e", "--end-date", dest = "enddate", help = "end date for download; format: YYYY-MM-DD; default: 2019-01-01", metavar="[DATE]", default = "2019-01-01", type = datevalid)
11
+    parser.add_argument("--station", dest = "station", help = "weather station to get data from; default: 2006", metavar="[STATION]", default = "2006")
12
+    parser.add_argument("-v", "--verbose", dest = "verbose", action ="store_true")
13
+    args = parser.parse_args()
14
+    tempdata = gettemp(args.startdate.strftime("%Y-%m-%d"), args.enddate.strftime("%Y-%m-%d"), args.station, args.verbose)
15
+    tempdata.to_pickle(args.output)
8 16
 
9
-print("Pickling")
10
-tempdata.to_pickle("../data/2016-18-weather.pkl")
17
+if __name__ == "__main__":
18
+    main()

+ 2
- 0
py/projprocess.py View File

@@ -1,5 +1,7 @@
1 1
 # This file simply takes future kwh data for the thousand previously 
2 2
 # sampled ICPs and calculates new aggregated measures for each cluster
3
+# 
4
+# Note: This is now redundant with agg.py
3 5
 
4 6
 import pandas as p
5 7
 

+ 27
- 70
py/util.py View File

@@ -1,3 +1,4 @@
1
+from argparse import ArgumentTypeError
1 2
 import psycopg2 as pg
2 3
 from configparser import ConfigParser
3 4
 import pandas.io.sql as psql
@@ -6,6 +7,12 @@ import datetime as dt
6 7
 import numpy as np
7 8
 from pprint import pprint
8 9
 
10
+def datevalid(d):
11
+    try:
12
+        return dt.datetime.strptime(d, "%Y-%m-%d")
13
+    except ValueError:
14
+        raise ArgumentTypeError("Invaid date: {}".format(d))
15
+
9 16
 
10 17
 def config(filename='database.ini', section='postgresql'):
11 18
     """Config parser from
@@ -28,7 +35,7 @@ def config(filename='database.ini', section='postgresql'):
28 35
     return db
29 36
 
30 37
 
31
-def getQuery(query, qparams=[]):
38
+def getQuery(query, qparams=[], verbose = True):
32 39
     """
33 40
     Get single query
34 41
     """
@@ -37,29 +44,33 @@ def getQuery(query, qparams=[]):
37 44
     try:
38 45
         params = config()
39 46
 
40
-        print("Connecting to database")
47
+        if verbose:
48
+            print("Connecting to database")
41 49
 
42 50
         conn = pg.connect(**params)
43 51
         cur = conn.cursor()
44 52
 
45 53
         # Get table
46
-        print("Retrieving table")
54
+        if verbose:
55
+            print("Retrieving table")
56
+
47 57
         dataframe = psql.read_sql(query, conn, params=qparams)
48 58
 
49 59
         cur.close()
50 60
 
51
-        print("Table recieved")
61
+        if verbose:
62
+            print("Table recieved")
52 63
 
53 64
         return dataframe
54 65
 
55 66
     except (Exception, pg.DatabaseError) as error:
56
-        print(error)
57
-        return None
67
+        raise error
58 68
 
59 69
     finally:
60 70
         if conn is not False:
61 71
             conn.close()
62
-            print('Database connection closed')
72
+            if verbose:
73
+                print('Database connection closed')
63 74
 
64 75
 
65 76
 def pickleQuery(query, path, qparams=[]):
@@ -68,60 +79,7 @@ def pickleQuery(query, path, qparams=[]):
68 79
     print("Table pickled")
69 80
 
70 81
 
71
-def getkwh(datestart, dateend, timestart, timeend, subset):
72
-    query = """
73
-    SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
74
-    FROM
75
-    (
76
-        SELECT read_time, icp_id
77
-        FROM
78
-        (
79
-            SELECT read_time 
80
-            FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, 
81
-                '30 minutes'::interval) read_time
82
-        ) AS tsdata CROSS JOIN public.icp_sample_5k
83
-    ) AS comb
84
-    LEFT JOIN
85
-    (
86
-        SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
87
-        FROM (
88
-            SELECT  a.icp_id
89
-                 , a.read_date
90
-                 , c.period
91
-                 , sum(c.read_kwh) as kwh_tot
92
-                 , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
93
-                 , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
94
-            FROM    coup_prd.coupdatamaster a,
95
-                unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
96
-            WHERE   a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
97
-             and   a.read_date <  to_date(%(dateend)s,'yyyy-mm-dd')
98
-             and   a.content_code  ~ ('UN|CN|EG')
99
-             AND   a.icp_id IN (
100
-                SELECT icp_id FROM public.icp_sample_5k
101
-             )
102
-            GROUP BY 1, 2, 3
103
-        ) AS coup_tall
104
-    ) AS tall_timestamp 
105
-    ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
106
-    """
107
-    pdict = {
108
-        'datestart': datestart,
109
-        'dateend': dateend,
110
-        'tsstart': timestart,
111
-        'tsend': timeend
112
-        # 'subset': subset
113
-    }
114
-    print("Getting data with parameters:")
115
-    pprint(pdict)
116
-    qdf = getQuery(query, pdict)
117
-    print("Optimising")
118
-    qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
119
-    qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float16)
120
-    print("Done")
121
-    return(qdf)
122
-
123
-
124
-def gettemp(datestart, dateend, station):
82
+def gettemp(datestart, dateend, station, verbose = True):
125 83
     query = """
126 84
     SELECT record_no, station, temp_date, temp_date + temp_time AS temp_timestamp, tmax_c, tmin_c,
127 85
         tgmin, tmean, rhmean
@@ -136,19 +94,18 @@ def gettemp(datestart, dateend, station):
136 94
         'dateend':   dateend,
137 95
         'station':   station
138 96
     }
139
-    print("Getting data with parameters:")
140
-    pprint(pdict)
141
-    qdf = getQuery(query, pdict)
142
-    print("converting")
97
+    if verbose:
98
+        print("Getting data with parameters:")
99
+        pprint(pdict)
100
+    qdf = getQuery(query, pdict, verbose)
101
+    if verbose:
102
+        print("converting")
143 103
     qdf.temp_date = p.to_datetime(qdf.temp_date)
144
-    # qdf.temp_time = qdf.temp_time.to_timestamp()
145
-    print('Done')
104
+    if verbose:
105
+        print('Done')
146 106
     return qdf
147 107
 
148 108
 
149
-
150
-
151
-
152 109
 if __name__ == "__main__":
153 110
     dv = getQuery('SELECT version()').version[0]
154 111
     print('PostgreSQL database version:')