Repository for Petra's work at ampli Jan-Feb 2019

util.py 4.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import psycopg2 as pg
  2. from configparser import ConfigParser
  3. import pandas.io.sql as psql
  4. import pandas as p
  5. import datetime as dt
  6. import numpy as np
  7. from pprint import pprint
  8. def config(filename='database.ini', section='postgresql'):
  9. """Config parser from
  10. http://www.postgresqltutorial.com/postgresql-python/connect/"""
  11. # create a parser
  12. parser = ConfigParser()
  13. # read config file
  14. parser.read(filename)
  15. # get section, default to postgresql
  16. db = {}
  17. if parser.has_section(section):
  18. params = parser.items(section)
  19. for param in params:
  20. db[param[0]] = param[1]
  21. else:
  22. raise Exception(
  23. 'Section {0} not found in the {1} file'.format(section, filename))
  24. return db
  25. def getQuery(query, qparams=[]):
  26. """
  27. Get single query
  28. """
  29. conn = False
  30. try:
  31. params = config()
  32. print("Connecting to database")
  33. conn = pg.connect(**params)
  34. cur = conn.cursor()
  35. # Get table
  36. print("Retrieving table")
  37. dataframe = psql.read_sql(query, conn, params=qparams)
  38. cur.close()
  39. print("Table recieved")
  40. return dataframe
  41. except (Exception, pg.DatabaseError) as error:
  42. print(error)
  43. return None
  44. finally:
  45. if conn is not False:
  46. conn.close()
  47. print('Database connection closed')
  48. def pickleQuery(query, path, qparams=[]):
  49. dq = getQuery(query, qparams)
  50. dq.to_pickle(path)
  51. print("Table pickled")
  52. def getkwh(datestart, dateend, timestart, timeend, subset):
  53. query = """
  54. SELECT SUBSTRING(comb.icp_id FROM 2 FOR 6)::int AS icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
  55. FROM
  56. (
  57. SELECT read_time, icp_id
  58. FROM
  59. (
  60. SELECT read_time
  61. FROM GENERATE_SERIES(%(tsstart)s::timestamp, %(tsend)s::timestamp,
  62. '30 minutes'::interval) read_time
  63. ) AS tsdata CROSS JOIN public.icp_sample_5k
  64. ) AS comb
  65. LEFT JOIN
  66. (
  67. SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
  68. FROM (
  69. SELECT a.icp_id
  70. , a.read_date
  71. , c.period
  72. , sum(c.read_kwh) as kwh_tot
  73. , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
  74. , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
  75. FROM coup_prd.coupdatamaster a,
  76. unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
  77. WHERE a.read_date >= to_date(%(datestart)s,'yyyy-mm-dd')
  78. and a.read_date < to_date(%(dateend)s,'yyyy-mm-dd')
  79. and a.content_code ~ ('UN|CN|EG')
  80. AND a.icp_id IN (
  81. SELECT icp_id FROM public.icp_sample_5k
  82. )
  83. GROUP BY 1, 2, 3
  84. ) AS coup_tall
  85. ) AS tall_timestamp
  86. ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
  87. """
  88. pdict = {
  89. 'datestart': datestart,
  90. 'dateend': dateend,
  91. 'tsstart': timestart,
  92. 'tsend': timeend
  93. # 'subset': subset
  94. }
  95. print("Getting data with parameters:")
  96. pprint(pdict)
  97. qdf = getQuery(query, pdict)
  98. print("Optimising")
  99. qdf['icp_id'] = qdf['icp_id'].astype(np.int32)
  100. qdf['kwh_tot'] = qdf['kwh_tot'].astype(np.float16)
  101. print("Done")
  102. return(qdf)
  103. def gettemp(datestart, dateend, station):
  104. query = """
  105. SELECT record_no, station, temp_date, temp_date + temp_time AS temp_timestamp, tmax_c, tmin_c,
  106. tgmin, tmean, rhmean
  107. FROM weather.temperature_fact
  108. WHERE station = %(station)s AND
  109. temp_date >= to_date(%(datestart)s, 'yyyy-mm-dd') AND
  110. temp_date < to_date(%(dateend)s, 'yyyy-mm-dd')
  111. ORDER BY temp_date, temp_time;
  112. """
  113. pdict = {
  114. 'datestart': datestart,
  115. 'dateend': dateend,
  116. 'station': station
  117. }
  118. print("Getting data with parameters:")
  119. pprint(pdict)
  120. qdf = getQuery(query, pdict)
  121. print("converting")
  122. qdf.temp_date = p.to_datetime(qdf.temp_date)
  123. # qdf.temp_time = qdf.temp_time.to_timestamp()
  124. print('Done')
  125. return qdf
  126. if __name__ == "__main__":
  127. dv = getQuery('SELECT version()').version[0]
  128. print('PostgreSQL database version:')
  129. print(dv)