import psycopg2 as pg from configparser import ConfigParser import pandas.io.sql as psql import pandas as p import datetime as dt import numpy as np from pprint import pprint def config(filename='database.ini', section='postgresql'): """Config parser from http://www.postgresqltutorial.com/postgresql-python/connect/""" # create a parser parser = ConfigParser() # read config file parser.read(filename) # get section, default to postgresql db = {} if parser.has_section(section): params = parser.items(section) for param in params: db[param[0]] = param[1] else: raise Exception( 'Section {0} not found in the {1} file'.format(section, filename)) return db def getQuery(query, qparams=[]): """ Get single query """ conn = False try: params = config() print("Connecting to database") conn = pg.connect(**params) cur = conn.cursor() # Get table print("Retrieving table") dataframe = psql.read_sql(query, conn, params=qparams) cur.close() print("Table recieved") return dataframe except (Exception, pg.DatabaseError) as error: print(error) return None finally: if conn is not False: conn.close() print('Database connection closed') def pickleQuery(query, path, qparams=[]): dq = getQuery(query, qparams) dq.to_pickle(path) print("Table pickled") def getkwh(datestart, dateend, timestart, timeend, subset): query = """ SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot FROM ( SELECT read_time, icp_id FROM ( SELECT read_time FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp, '30 minutes'::interval) read_time ) AS tsdata CROSS JOIN public.icp_sample ) AS comb LEFT JOIN ( SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time FROM ( SELECT a.icp_id , a.read_date , c.period , sum(c.read_kwh) as kwh_tot , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn FROM coup_prd.coupdatamaster a, unnest(a.read_array) WITH ORDINALITY c(read_kwh, period) WHERE a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd') and a.read_date < to_date(%(dateend)s,'yyyy-mm-dd') and a.content_code ~ ('UN|CN|EG') AND a.icp_id IN ( SELECT icp_id FROM public.icp_sample ) GROUP BY 1, 2, 3 ) AS coup_tall ) AS tall_timestamp ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id; """ pdict = { 'datestart': datestart, 'dateend': dateend, 'tsstart': timestart, 'tsend': timeend # 'subset': subset } print("Getting data with parameters:") pprint(pdict) qdf = getQuery(query, pdict) print("Optimising") qdf['icp_id'] = qdf['icp_id'].astype(np.int32) qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float32) print("Done") return(qdf) def gettemp(datestart, dateend, station): query = """ SELECT record_no, station, temp_date, temp_date + temp_time AS temp_timestamp, tmax_c, tmin_c, tgmin, tmean, rhmean FROM weather.temperature_fact WHERE station = %(station)s AND temp_date >= to_date(%(datestart)s, 'yyyy-mm-dd') AND temp_date < to_date(%(dateend)s, 'yyyy-mm-dd') ORDER BY temp_date, temp_time; """ pdict = { 'datestart': datestart, 'dateend': dateend, 'station': station } print("Getting data with parameters:") pprint(pdict) qdf = getQuery(query, pdict) print("converting") qdf.temp_date = p.to_datetime(qdf.temp_date) # qdf.temp_time = qdf.temp_time.to_timestamp() print('Done') return qdf if __name__ == "__main__": dv = getQuery('SELECT version()').version[0] print('PostgreSQL database version:') print(dv)