Repository for Petra's work at ampli Jan-Feb 2019

util.py 3.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. import psycopg2 as pg
  2. from configparser import ConfigParser
  3. import pandas.io.sql as psql
  4. import datetime as dt
  5. import numpy as np
  6. def config(filename='database.ini', section='postgresql'):
  7. """Config parser from
  8. http://www.postgresqltutorial.com/postgresql-python/connect/"""
  9. # create a parser
  10. parser = ConfigParser()
  11. # read config file
  12. parser.read(filename)
  13. # get section, default to postgresql
  14. db = {}
  15. if parser.has_section(section):
  16. params = parser.items(section)
  17. for param in params:
  18. db[param[0]] = param[1]
  19. else:
  20. raise Exception(
  21. 'Section {0} not found in the {1} file'.format(section, filename))
  22. return db
  23. def getQuery(query, qparams=[]):
  24. """
  25. Get single query
  26. """
  27. conn = False
  28. try:
  29. params = config()
  30. print("Connecting to database")
  31. conn = pg.connect(**params)
  32. cur = conn.cursor()
  33. # Get table
  34. print("Retrieving table")
  35. dataframe = psql.read_sql(query, conn, params=qparams)
  36. cur.close()
  37. print("Table recieved")
  38. return dataframe
  39. except (Exception, pg.DatabaseError) as error:
  40. print(error)
  41. return None
  42. finally:
  43. if conn is not False:
  44. conn.close()
  45. print('Database connection closed')
  46. def pickleQuery(query, path, qparams=[]):
  47. dq = getQuery(query, qparams)
  48. dq.to_pickle(path)
  49. print("Table pickled")
  50. def getkwh(datestart, dateend, timestart, timeend, subset):
  51. query = """
  52. SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
  53. FROM
  54. (
  55. SELECT read_time, icp_id
  56. FROM
  57. (
  58. SELECT read_time
  59. FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp,
  60. '30 minutes'::interval) read_time
  61. ) AS tsdata CROSS JOIN public.icp_sample
  62. ) AS comb
  63. LEFT JOIN
  64. (
  65. SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
  66. FROM (
  67. SELECT a.icp_id
  68. , a.read_date
  69. , c.period
  70. , sum(c.read_kwh) as kwh_tot
  71. , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
  72. , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
  73. FROM coup_prd.coupdatamaster a,
  74. unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
  75. WHERE a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
  76. and a.read_date < to_date(%(dateend)s,'yyyy-mm-dd')
  77. and a.content_code ~ ('UN|CN|EG')
  78. AND a.icp_id IN (
  79. SELECT icp_id FROM public.icp_sample
  80. )
  81. GROUP BY 1, 2, 3
  82. ) AS coup_tall
  83. ) AS tall_timestamp
  84. ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
  85. """
  86. pdict = {
  87. 'datestart': datestart,
  88. 'dateend': dateend,
  89. 'tsstart': timestart,
  90. 'tsend': timeend
  91. # 'subset': subset
  92. }
  93. print("Getting data with parameters:")
  94. print(pdict)
  95. qdf = getQuery(query, pdict)
  96. print("Optimising")
  97. qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
  98. qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float32)
  99. print("Done")
  100. return(qdf)
  101. if __name__ == "__main__":
  102. dv = getQuery('SELECT version()').version[0]
  103. print('PostgreSQL database version:')
  104. print(dv)