Repository for Petra's work at ampli Jan-Feb 2019

queries.pgsql 5.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. -- Create "tall" table for april 2017 (from Jason)
  2. CREATE TABLE public.coup_tall_april AS
  3. SELECT a.icp_id
  4. , a.read_date
  5. , c.period
  6. , sum(c.read_kwh) as kwh_tot
  7. , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
  8. , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
  9. FROM coup_prd.coupdatamaster a,
  10. unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
  11. WHERE a.read_date >= to_date('01/04/2017','dd/mm/yyyy')
  12. and a.read_date < to_date('01/05/2017','dd/mm/yyyy')
  13. and a.content_code ~ ('UN|CN|EG')
  14. GROUP BY 1, 2, 3
  15. ORDER BY 1, 2, 3;
  16. -- "Head" of the data
  17. SELECT * FROM public.coup_tall_april limit 10;
  18. -- Count distinct ICPs for each date
  19. SELECT read_date, COUNT(DISTINCT icp_id) AS d_icp
  20. FROM public.coup_tall_april
  21. GROUP BY read_date;
  22. -- Calculate mininmums, maximums, averages for each date
  23. SELECT read_date, MIN(kwh_tot), AVG(kwh_tot), MAX(kwh_tot)
  24. FROM public.coup_tall_april
  25. GROUP BY read_date;
  26. -- Find negative values
  27. SELECT * FROM public.coup_tall_april WHERE kwh_tot < 0 OR kwh_un < 0 OR kwh_cn < 0;
  28. -- Count number of ICPs that end in "17"
  29. SELECT COUNT (DISTINCT icp_id) FROM public.coup_tall_april WHERE icp_id LIKE '%17';
  30. -- List first 10 ICPs that end with number 17, sorted
  31. SELECT DISTINCT icp_id FROM public.coup_tall_april WHERE icp_id LIKE '%17' ORDER BY icp_id LIMIT 10;
  32. -- Verify that all ICP IDs are of length 7
  33. SELECT MIN(CHAR_LENGTH(icp_id)) as minl, MAX(CHAR_LENGTH(icp_id)) as maxl FROM public.coup_tall_april;
  34. -- String conversion
  35. SELECT DISTINCT CAST(SUBSTRING(icp_id FROM 2 FOR 6) AS int) AS icp_val, icp_id FROM public.coup_tall_april
  36. -- Count nulls in cast (none)
  37. SELECT COUNT(*) - COUNT(CAST(SUBSTRING(icp_id FROM 2 FOR 6) AS int)) FROM public.coup_tall_april;
  38. -- Verify that there are only "period" values from 1 to 48
  39. SELECT DISTINCT period FROM public.coup_tall_april ORDER BY period;
  40. -- Look at the 2nd specifically (DST ends, 2017)
  41. SELECT * FROM public.coup_tall_april
  42. WHERE read_date = to_date('02/04/2017', 'dd/mm/yy')
  43. ORDER BY icp_id, period
  44. LIMIT 60;
  45. -- September interlude STARTS
  46. -- Create "tall" table for Septeber 24 2017 (mod from Jason, above) to look at DST starting
  47. CREATE TABLE public.coup_dst_2017 AS
  48. SELECT a.icp_id
  49. , a.read_date
  50. , c.period
  51. , sum(c.read_kwh) as kwh_tot
  52. , sum(case when a.content_code = 'UN' then c.read_kwh else 0 end) as kwh_un
  53. , sum(case when a.content_code in ('CN','EG') then c.read_kwh else 0 end) as kwh_cn
  54. FROM coup_prd.coupdatamaster a,
  55. unnest(a.read_array) WITH ORDINALITY c(read_kwh, period)
  56. WHERE a.read_date = to_date('24/09/2017','dd/mm/yyyy')
  57. and a.content_code ~ ('UN|CN|EG')
  58. GROUP BY 1, 2, 3
  59. ORDER BY 1, 2, 3;
  60. -- Look at September 24 2017 table
  61. SELECT * FROM public.coup_dst_2017
  62. ORDER BY icp_id, period
  63. LIMIT 60;
  64. -- Verify that there are only "period" values from 1 to 48
  65. SELECT COUNT(DISTINCT period) FROM public.coup_dst_2017;
  66. -- September interlude ENDS
  67. -- Timestamp conversion
  68. SELECT DISTINCT DATE_PART('year', read_date), DATE_PART('month', read_date),
  69. DATE_PART('day', read_date) FROM public.coup_tall_april ORDER BY 1, 2, 3;
  70. -- Creating "time" values
  71. SELECT *, (CONCAT(q.h, ':', q.m, ':00'))::time AS t FROM
  72. (
  73. SELECT DISTINCT period, period / 2 as h, period % 2 * 30 AS m
  74. FROM public.coup_tall_april
  75. ) AS q ORDER BY period;
  76. -- Creating timestamp column
  77. SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
  78. FROM public.coup_tall_april ORDER BY icp_id, read_time limit 50;
  79. -- Possible subset for further testing
  80. SELECT *, read_date + CONCAT(period / 2, ':', period % 2 * 30, ':00')::time AS read_time
  81. FROM public.coup_tall_april WHERE icp_id LIKE '%1117' ORDER BY icp_id, read_time;
  82. -- Calculate averages for each half-hour
  83. SELECT read_date, period, AVG(kwh_tot) AS average
  84. FROM public.coup_tall_april
  85. GROUP BY read_date, period
  86. ORDER BY read_date, period;
  87. -- Gerate timestamp list
  88. SELECT dd FROM GENERATE_SERIES('2017-04-01'::timestamp, '2017-04-02'::timestamp, '30 minutes'::interval) dd;
  89. -- Fraction of icp's in april with 1440 entries
  90. SELECT SUM(CASE WHEN isum.c = 1440 THEN 1 ELSE 0 END)::numeric / COUNT(*)::numeric As frac FROM
  91. (
  92. SELECT COUNT(*) AS c, icp_id FROM public.coup_tall_april
  93. GROUP BY icp_id
  94. ) AS isum;
  95. -- Get numbers of ICPs present over whole dataset
  96. SELECT COUNT(*) as c, icp_id, read_date FROM coup_prd.coupdatamaster
  97. WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
  98. AND read_date < to_date('01/01/2018','dd/mm/yyyy')
  99. AND content_code = 'UN'
  100. GROUP BY icp_id, read_date ORDER BY c DESC, read_date LIMIT 40;
  101. SELECT MIN(read_date) as min_date, MAX(read_date) as max_date FROM coup_prd.coupdatamaster;
  102. SELECT SUM(CASE WHEN cir.c > 365 THEN 1 ELSE 0 END), COUNT(*)
  103. FROM
  104. (
  105. SELECT COUNT(*) as c, icp_id FROM coup_prd.coupdatamaster
  106. WHERE read_date >= to_date('01/01/2017','dd/mm/yyyy')
  107. AND read_date < to_date('01/01/2018','dd/mm/yyyy')
  108. AND content_code = 'UN'
  109. GROUP BY icp_id ORDER BY c DESC
  110. ) AS cir;