Repository for Petra's work at ampli Jan-Feb 2019

util.py 3.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. import psycopg2 as pg
  2. from configparser import ConfigParser
  3. import pandas.io.sql as psql
  4. import datetime as dt
  5. import numpy as np
  6. from pprint import pprint
  7. def config(filename='database.ini', section='postgresql'):
  8. """Config parser from
  9. http://www.postgresqltutorial.com/postgresql-python/connect/"""
  10. # create a parser
  11. parser = ConfigParser()
  12. # read config file
  13. parser.read(filename)
  14. # get section, default to postgresql
  15. db = {}
  16. if parser.has_section(section):
  17. params = parser.items(section)
  18. for param in params:
  19. db[param[0]] = param[1]
  20. else:
  21. raise Exception(
  22. 'Section {0} not found in the {1} file'.format(section, filename))
  23. return db
  24. def getQuery(query, qparams=[]):
  25. """
  26. Get single query
  27. """
  28. conn = False
  29. try:
  30. params = config()
  31. print("Connecting to database")
  32. conn = pg.connect(**params)
  33. cur = conn.cursor()
  34. # Get table
  35. print("Retrieving table")
  36. dataframe = psql.read_sql(query, conn, params=qparams)
  37. cur.close()
  38. print("Table recieved")
  39. return dataframe
  40. except (Exception, pg.DatabaseError) as error:
  41. print(error)
  42. return None
  43. finally:
  44. if conn is not False:
  45. conn.close()
  46. print('Database connection closed')
  47. def pickleQuery(query, path, qparams=[]):
  48. dq = getQuery(query, qparams)
  49. dq.to_pickle(path)
  50. print("Table pickled")
  51. def getkwh(datestart, dateend, timestart, timeend, subset):
  52. query = """
  53. SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
  54. FROM
  55. (
  56. SELECT read_time, icp_id
  57. FROM
  58. (
  59. SELECT read_time
  60. FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp,
  61. '30 minutes'::interval) read_time
  62. ) AS tsdata CROSS JOIN public.icp_sample_1618
  63. ) AS comb
  64. LEFT JOIN
  65. (
  66. SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
  67. FROM (
  68. SELECT a.icp_id
  69. , a.read_date
  70. , c.period
  71. , sum(c.read_kwh) as kwh_tot
  72. , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
  73. , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
  74. FROM coup_prd.coupdatamaster a,
  75. unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
  76. WHERE a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
  77. and a.read_date < to_date(%(dateend)s,'yyyy-mm-dd')
  78. and a.content_code ~ ('UN|CN|EG')
  79. AND a.icp_id IN (
  80. SELECT icp_id FROM public.icp_sample_1618
  81. )
  82. GROUP BY 1, 2, 3
  83. ) AS coup_tall
  84. ) AS tall_timestamp
  85. ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
  86. """
  87. pdict = {
  88. 'datestart': datestart,
  89. 'dateend': dateend,
  90. 'tsstart': timestart,
  91. 'tsend': timeend
  92. # 'subset': subset
  93. }
  94. print("Getting data with parameters:")
  95. pprint(pdict)
  96. qdf = getQuery(query, pdict)
  97. print("Optimising")
  98. qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
  99. qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float32)
  100. print("Done")
  101. return(qdf)
  102. if __name__ == "__main__":
  103. dv = getQuery('SELECT version()').version[0]
  104. print('PostgreSQL database version:')
  105. print(dv)