123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- import psycopg2 as pg
- from configparser import ConfigParser
- import pandas.io.sql as psql
- import datetime as dt
- import numpy as np
-
-
- def config(filename='database.ini', section='postgresql'):
- """Config parser from
- http://www.postgresqltutorial.com/postgresql-python/connect/"""
- # create a parser
- parser = ConfigParser()
- # read config file
- parser.read(filename)
-
- # get section, default to postgresql
- db = {}
- if parser.has_section(section):
- params = parser.items(section)
- for param in params:
- db[param[0]] = param[1]
- else:
- raise Exception(
- 'Section {0} not found in the {1} file'.format(section, filename))
-
- return db
-
-
- def getQuery(query, qparams=[]):
- """
- Get single query
- """
- conn = False
-
- try:
- params = config()
-
- print("Connecting to database")
-
- conn = pg.connect(**params)
- cur = conn.cursor()
-
- # Get table
- print("Retrieving table")
- dataframe = psql.read_sql(query, conn, params=qparams)
-
- cur.close()
-
- print("Table recieved")
-
- return dataframe
-
- except (Exception, pg.DatabaseError) as error:
- print(error)
- return None
-
- finally:
- if conn is not False:
- conn.close()
- print('Database connection closed')
-
-
- def pickleQuery(query, path, qparams=[]):
- dq = getQuery(query, qparams)
- dq.to_pickle(path)
- print("Table pickled")
-
-
- def getkwh(datestart, dateend, timestart, timeend, subset):
- query = """
- SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
- FROM
- (
- SELECT read_time, icp_id
- FROM
- (
- SELECT read_time
- FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp,
- '30 minutes'::interval) read_time
- ) AS tsdata CROSS JOIN public.icp_sample
- ) AS comb
- LEFT JOIN
- (
- SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
- FROM (
- SELECT a.icp_id
- , a.read_date
- , c.period
- , sum(c.read_kwh) as kwh_tot
- , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
- , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
- FROM coup_prd.coupdatamaster a,
- unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
- WHERE a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
- and a.read_date < to_date(%(dateend)s,'yyyy-mm-dd')
- and a.content_code ~ ('UN|CN|EG')
- AND a.icp_id IN (
- SELECT icp_id FROM public.icp_sample
- )
- GROUP BY 1, 2, 3
- ) AS coup_tall
- ) AS tall_timestamp
- ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
- """
- pdict = {
- 'datestart': datestart,
- 'dateend': dateend,
- 'tsstart': timestart,
- 'tsend': timeend
- # 'subset': subset
- }
- print("Getting data with parameters:")
- print(pdict)
- qdf = getQuery(query, pdict)
- print("Optimising")
- qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
- qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float32)
- print("Done")
- return(qdf)
-
-
-
-
- if __name__ == "__main__":
- dv = getQuery('SELECT version()').version[0]
- print('PostgreSQL database version:')
- print(dv)
|