Repository for Petra's work at ampli Jan-Feb 2019

clustering.py 1.8KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. from util import getQuery, pickleQuery
  2. import pandas as p
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. # query = """
  6. # SELECT *, read_date + CONCAT(period / 2, ':', period %% 2 * 30, ':00')::time AS read_time
  7. # FROM public.coup_tall_april WHERE icp_id LIKE (%s) AND read_date = to_date(%s, 'dd/mm/yyyy')
  8. # ORDER BY icp_id, read_time;
  9. # """
  10. #
  11. # qparams = ['%%1117', '20/04/2017']
  12. #query = """
  13. #SELECT read_date, period, AVG(kwh_tot) AS average
  14. #FROM public.coup_tall_april
  15. #GROUP BY read_date, period
  16. #ORDER BY read_date, period;
  17. #"""
  18. #
  19. #qparams = []
  20. #
  21. #df = getQuery(query, qparams)
  22. #
  23. #print(df.info())
  24. #
  25. #sns.set()
  26. #
  27. ##sns.lineplot(x = 'read_time', y = 'kwh_tot', hue = 'icp_id', data = df)
  28. #sns.lineplot(x = 'period', y = 'average', hue = 'read_date', data = df)
  29. #
  30. #plt.show()
  31. query = """
  32. SELECT comb.icp_id, comb.read_time, COALESCE(kwh_tot, 0) AS kwh_tot
  33. FROM
  34. (
  35. SELECT read_time, icp_id
  36. FROM
  37. (
  38. SELECT read_time
  39. FROM GENERATE_SERIES('2017-04-01 00:30:00'::timestamp, '2017-05-01 00:00:00'::timestamp,
  40. '30 minutes'::interval) read_time
  41. ) AS tsdata CROSS JOIN
  42. (
  43. SELECT *
  44. FROM
  45. (
  46. SELECT icp_id, COUNT(DISTINCT read_date) AS data_days
  47. FROM coup_prd.coupdatamaster
  48. WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
  49. AND read_date < to_date('01/01/2018','dd/mm/yyyy')
  50. AND content_code = 'UN'
  51. GROUP BY icp_id
  52. ) AS cir
  53. WHERE data_days >= 360
  54. ) AS qual_icp
  55. ) AS comb
  56. LEFT JOIN
  57. (
  58. SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
  59. FROM public.coup_tall_april
  60. ) AS tall_timestamp
  61. ON comb.read_time = tall_timestamp.read_time AND comb.icp_id = tall_timestamp.icp_id;
  62. """
  63. pickleQuery(query, "April.pkl")