Repository for Petra's work at ampli Jan-Feb 2019

util.py 3.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. import psycopg2 as pg
  2. from configparser import ConfigParser
  3. import pandas.io.sql as psql
  4. import datetime as dt
  5. def config(filename='database.ini', section='postgresql'):
  6. """Config parser from
  7. http://www.postgresqltutorial.com/postgresql-python/connect/"""
  8. # create a parser
  9. parser = ConfigParser()
  10. # read config file
  11. parser.read(filename)
  12. # get section, default to postgresql
  13. db = {}
  14. if parser.has_section(section):
  15. params = parser.items(section)
  16. for param in params:
  17. db[param[0]] = param[1]
  18. else:
  19. raise Exception(
  20. 'Section {0} not found in the {1} file'.format(section, filename))
  21. return db
  22. def getQuery(query, qparams=[]):
  23. """
  24. Get single query
  25. """
  26. conn = False
  27. try:
  28. params = config()
  29. print("Connecting to database")
  30. conn = pg.connect(**params)
  31. cur = conn.cursor()
  32. # Get table
  33. print("Retrieving table")
  34. dataframe = psql.read_sql(query, conn, params=qparams)
  35. cur.close()
  36. print("Table recieved")
  37. return dataframe
  38. except (Exception, pg.DatabaseError) as error:
  39. print(error)
  40. return None
  41. finally:
  42. if conn is not False:
  43. conn.close()
  44. print('Database connection closed')
  45. def pickleQuery(query, path, qparams=[]):
  46. dq = getQuery(query, qparams)
  47. dq.to_pickle(path)
  48. print("Table pickled")
  49. def getkwh(datestart, dateend, timestart, timeend, subset):
  50. query = """
  51. SELECT comb.icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
  52. FROM
  53. (
  54. SELECT read_time, icp_id
  55. FROM
  56. (
  57. SELECT read_time
  58. FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp,
  59. '30 minutes'::interval) read_time
  60. ) AS tsdata CROSS JOIN
  61. (
  62. SELECT *
  63. FROM
  64. (
  65. SELECT icp_id, COUNT(DISTINCT read_date) AS data_days
  66. FROM coup_prd.coupdatamaster
  67. WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
  68. AND read_date < to_date('01/01/2018','dd/mm/yyyy')
  69. AND content_code = 'UN'
  70. AND icp_id LIKE %(subset)s
  71. GROUP BY icp_id
  72. ) AS cir
  73. WHERE data_days >= 360
  74. ) AS qual_icp
  75. ) AS comb
  76. LEFT JOIN
  77. (
  78. SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
  79. FROM (
  80. SELECT a.icp_id
  81. , a.read_date
  82. , c.period
  83. , sum(c.read_kwh) as kwh_tot
  84. , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
  85. , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
  86. FROM coup_prd.coupdatamaster a,
  87. unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
  88. WHERE a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
  89. and a.read_date < to_date(%(dateend)s,'yyyy-mm-dd')
  90. and a.content_code ~ ('UN|CN|EG')
  91. AND a.icp_id LIKE %(subset)s
  92. GROUP BY 1, 2, 3
  93. ) AS coup_tall
  94. ) AS tall_timestamp
  95. ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
  96. """
  97. pdict = {
  98. 'datestart': datestart,
  99. 'dateend': dateend,
  100. 'tsstart': timestart,
  101. 'tsend': timeend,
  102. 'subset': subset
  103. }
  104. print("Getting data with parameters:")
  105. print(pdict)
  106. qdf = getQuery(query, pdict)
  107. print("Done")
  108. return(qdf)
  109. if __name__ == "__main__":
  110. dv = getQuery('SELECT version()').version[0]
  111. print('PostgreSQL database version:')
  112. print(dv)