HashAggregate got slow perfomance - postgresql

I have a table likse this:
my index is id_station_date_hour
My query:
select id_station,area_type,
sum(case when goinside = 1 and(zone1+zone2+zone3+cashiertime+special) > 0 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_time,
sum(case when goinside = 1 and(zone1+zone2+zone3+cashiertime+special) > 0 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num,
sum(case when goinside = 1 and(zone1+zone2+zone3+cashiertime+special) > 0 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_time_prev,
sum(case when goinside = 1 and(zone1+zone2+zone3+cashiertime+special) > 0 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 180 and (zone1+zone2+zone3+cashiertime+special) < 360 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_3to6,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 180 and (zone1+zone2+zone3+cashiertime+special) < 360 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_3to6_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 360 and (zone1+zone2+zone3+cashiertime+special) < 600 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_6to10,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 360 and (zone1+zone2+zone3+cashiertime+special) < 600 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_6to10_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) < 180 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_less3,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) < 180 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_less3_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 60 and (zone1+zone2+zone3+cashiertime+special) < 180 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_1to3,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 60 and (zone1+zone2+zone3+cashiertime+special) < 180 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_1to3_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 600 and (zone1+zone2+zone3+cashiertime+special) < 900 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_10to15,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 600 and (zone1+zone2+zone3+cashiertime+special) < 900 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_10to15_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) < 60 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_less1,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) < 60 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_less1_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 900 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_over15,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 900 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_over15_prev,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 600 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then zone1+zone2+zone3+cashiertime+special else 0 end) as ex_z1z2z3z4z5_num_over10,
sum(case when goinside = 1 and (zone1+zone2+zone3+cashiertime+special) >= 600 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as ex_z1z2z3z4z5_num_over10_prev,
sum(case when goinside = 1 and type = 1 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as man,
sum(case when goinside = 1 and type = 2 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as woman,
sum(case when goinside = 1 and type = 3 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as couple,
sum(case when goinside = 1 and type = 4 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as boy,
sum(case when goinside = 1 and type = 5 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as girl,
sum(case when goinside = 1 and type = 6 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as young_couple,
sum(case when goinside = 1 and type = 7 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as old_man,
sum(case when goinside = 1 and type = 8 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as old_woman,
sum(case when goinside = 1 and type = 9 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as old_couple,
sum(case when goinside = 1 and type = 10 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as family,
sum(case when goinside = 1 and type = 10 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then family_total else 0 end) as family_total,
sum(case when goinside = 1 and type = 11 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as group,
sum(case when goinside = 1 and type = 11 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then group_man else 0 end) as group_man,
sum(case when goinside = 1 and type = 11 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then group_woman else 0 end) as group_woman,
sum(case when goinside = 1 and type = 12 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as empty,
sum(case when goinside = 1 and date >= '2017-10-01' and date <= '2017-10-31' and hour >= 9 and hour < 22 then 1 else 0 end) as total_entries,
sum(case when goinside = 1 and type = 1 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as man_prev,
sum(case when goinside = 1 and type = 2 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as woman_prev,
sum(case when goinside = 1 and type = 3 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as couple_prev,
sum(case when goinside = 1 and type = 4 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as boy_prev,
sum(case when goinside = 1 and type = 5 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as girl_prev,
sum(case when goinside = 1 and type = 6 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as young_couple_prev,
sum(case when goinside = 1 and type = 7 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as old_man_prev,
sum(case when goinside = 1 and type = 8 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as old_woman_prev,
sum(case when goinside = 1 and type = 9 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as old_couple_prev,
sum(case when goinside = 1 and type = 10 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as family_prev,
sum(case when goinside = 1 and type = 10 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then family_total else 0 end) as family_total_prev,
sum(case when goinside = 1 and type = 11 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as grou_prev,
sum(case when goinside = 1 and type = 11 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then group_man else 0 end) as group_man_prev,
sum(case when goinside = 1 and type = 11 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then group_woman else 0 end) as group_woman_prev,
sum(case when goinside = 1 and type = 12 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as empty_prev,
sum(case when goinside = 1 and date >= '2017-08-31' and date <= '2017-09-30' and hour >= 9 and hour < 22 then 1 else 0 end) as total_entries_prev
from data_1034_detail where id_station IN (2399,2397) AND ((date >= '2017-10-01' and date <= '2017-10-31' AND hour >= 9 and hour < 22) OR (date >= '2017-08-31' and date <= '2017-09-30' AND hour >= 9 and hour < 22)) group by id_station, area_type
actually all fields selected area different condition, so it will make the query speed working slowly if I add more condition and selected fields. How can I set up proper index in this case?
Explain:
'HashAggregate (cost=304337.96..304338.36 rows=40 width=428) (actual time=7233.402..7233.449 rows=37 loops=1)'
' Group Key: id_station, area_type'
' -> Seq Scan on data_1034_detail (cost=0.00..17573.72 rows=218904 width=64) (actual time=0.035..394.334 rows=282899 loops=1)'
' Filter: ((id_station = ANY ('{2399,2397}'::bigint[])) AND (hour >= 9) AND (hour < 22) AND (((date >= '2017-10-01'::date) AND (date <= '2017-10-31'::date)) OR ((date >= '2017-08-31'::date) AND (date <= '2017-09-30'::date))))'
'Planning time: 0.735 ms'
'Execution time: 7234.169 ms'
And any chance to make this query execute in 1s?

It is the arithmetic that makes the query slow.
I am not sure, but maybe you can win some if you use a FILTER clause with the summation.
Instead of
sum(CASE WHEN goinside = 1
AND (zone1 + zone2 + zone3 + cashiertime + special) > 0
AND date >= '2017-10-01'
AND date <= '2017-10-31'
AND hour >= 9
AND hour < 22
THEN zone1 + zone2 + zone3 + cashiertime + special
ELSE 0
END) AS ex_z1z2z3z4z5_time
you could write:
sum(zone1 + zone2 + zone3 + cashiertime + special)
FILTER (WHERE goinside = 1
AND (zone1 + zone2 + zone3 + cashiertime + special) > 0
AND date >= '2017-10-01'
AND date <= '2017-10-31'
AND hour >= 9
AND hour < 22) AS ex_z1z2z3z4z5_time
You will still have to do all the calculations, but the sum will be over fewer items.
Perhaps PG-Strom can help you here – this extension delegates operations that can be parallelized to the GPU.

Related

Sum is not working in postgres. (Switching from mysql syntax to postgres)

I have this in Mysql and it works perfectly fine. Switching my project from mysql to postgres:
SELECT
CASE
WHEN EXTRACT(HOUR FROM t_stamp) >= 5 AND EXTRACT(HOUR FROM t_stamp) < 6 THEN '5-6'
WHEN EXTRACT(HOUR FROM t_stamp) >= 6 AND EXTRACT(HOUR FROM t_stamp) < 7 THEN '6-7'
WHEN EXTRACT(HOUR FROM t_stamp) >= 7 AND EXTRACT(HOUR FROM t_stamp) < 8 THEN '7-8'
WHEN EXTRACT(HOUR FROM t_stamp) >= 8 AND EXTRACT(HOUR FROM t_stamp) < 9 THEN '8-9'
WHEN EXTRACT(HOUR FROM t_stamp) >= 9 AND EXTRACT(HOUR FROM t_stamp) < 10 THEN '9-10'
WHEN EXTRACT(HOUR FROM t_stamp) >= 10 AND EXTRACT(HOUR FROM t_stamp) < 11 THEN '10-11'
WHEN EXTRACT(HOUR FROM t_stamp) >= 11 AND EXTRACT(HOUR FROM t_stamp) < 12 THEN '11-12'
WHEN EXTRACT(HOUR FROM t_stamp) >= 12 AND EXTRACT(HOUR FROM t_stamp) < 13 THEN '12-13'
WHEN EXTRACT(HOUR FROM t_stamp) >= 13 AND EXTRACT(HOUR FROM t_stamp) < 14 THEN '13-14'
WHEN EXTRACT(HOUR FROM t_stamp) >= 14 AND EXTRACT(HOUR FROM t_stamp) < 15 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 15 AND EXTRACT(HOUR FROM t_stamp) < 16 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 16 AND EXTRACT(HOUR FROM t_stamp) < 17 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 17 AND EXTRACT(HOUR FROM t_stamp) < 18 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 18 AND EXTRACT(HOUR FROM t_stamp) < 19 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 19 AND EXTRACT(HOUR FROM t_stamp) < 20 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 20 AND EXTRACT(HOUR FROM t_stamp) < 21 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 21 AND EXTRACT(HOUR FROM t_stamp) < 22 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 22 AND EXTRACT(HOUR FROM t_stamp) < 23 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 23 AND EXTRACT(HOUR FROM t_stamp) < 24 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 24 AND EXTRACT(HOUR FROM t_stamp) < 0 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 0 AND EXTRACT(HOUR FROM t_stamp) < 1 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 1 AND EXTRACT(HOUR FROM t_stamp) < 2 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 2 AND EXTRACT(HOUR FROM t_stamp) < 3 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 3 AND EXTRACT(HOUR FROM t_stamp) < 4 THEN '14-15'
WHEN EXTRACT(HOUR FROM t_stamp) >= 4 AND EXTRACT(HOUR FROM t_stamp) < 5 THEN '14-15'
END as onehour,
CASE
WHEN EXTRACT(HOUR FROM t_stamp) >= 5 AND EXTRACT(HOUR FROM t_stamp) < 6 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 6 AND EXTRACT(HOUR FROM t_stamp) < 7 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 7 AND EXTRACT(HOUR FROM t_stamp) < 8 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 8 AND EXTRACT(HOUR FROM t_stamp) < 9 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 9 AND EXTRACT(HOUR FROM t_stamp) < 10 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 10 AND EXTRACT(HOUR FROM t_stamp) < 11 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 11 AND EXTRACT(HOUR FROM t_stamp) < 12 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 12 AND EXTRACT(HOUR FROM t_stamp) < 13 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 13 AND EXTRACT(HOUR FROM t_stamp) < 14 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 14 AND EXTRACT(HOUR FROM t_stamp) < 15 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 15 AND EXTRACT(HOUR FROM t_stamp) < 16 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 16 AND EXTRACT(HOUR FROM t_stamp) < 17 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >=17 AND EXTRACT(HOUR FROM t_stamp) < 18 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 18 AND EXTRACT(HOUR FROM t_stamp) < 19 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 19 AND EXTRACT(HOUR FROM t_stamp) < 20 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 20 AND EXTRACT(HOUR FROM t_stamp) < 21 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 21 AND EXTRACT(HOUR FROM t_stamp) < 22 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 22 AND EXTRACT(HOUR FROM t_stamp) < 23 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 23 AND EXTRACT(HOUR FROM t_stamp) < 24 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 24 AND EXTRACT(HOUR FROM t_stamp) < 0 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 0 AND EXTRACT(HOUR FROM t_stamp) < 1 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 1 AND EXTRACT(HOUR FROM t_stamp) < 2 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 2 AND EXTRACT(HOUR FROM t_stamp) < 3 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 3 AND EXTRACT(HOUR FROM t_stamp) < 4 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
WHEN EXTRACT(HOUR FROM t_stamp) >= 4 AND EXTRACT(HOUR FROM t_stamp) < 5 THEN sum(case when "FullSheetNumber" != 0 then 1 else 0 end)
END as total_eachhour
FROM group_table
GROUP BY onehour,t_stamp
ORDER BY onehour
and the current outcome:
enter image description here
But what I really want is something like:
onehour
total_eachhour
10-11
56
11-12
32
Your root problem is: GROUP BY onehour,t_stamp it should only be GROUP BY onehour
However, your query can be simplified. You don't need the case expression for the count(), and I would use a proper time value to define the hour range:
The conditional aggregation can be done using filter() in Postgres which I think is more readable:
So the query would look like this:
select case
when t_stamp::time >= '05:00' and t_stamp::time < '06:00' then '5-6'
when t_stamp::time >= '06:00' and t_stamp::time < '07:00' then '6-7'
when t_stamp::time >= '07:00' and t_stamp::time < '08:00' then '7-8'
when t_stamp::time >= '08:00' and t_stamp::time < '09:00' then '8-9'
when t_stamp::time >= '09:00' and t_stamp::time < '10:00' then '9-10'
....
end as one_our,
count(*) filter (where "FullSheetNumber" <> 0 ) as total_eachhour
from group_table
group by one_our;

Performing Count() and Avg() within a Case statement

I am using this CASE statement to show a total count of visits and average length of visit grouped by age groups:
CASE WHEN AgeCalcSort = 0 AND AgeCalcSort <= 1 then (
Count(VisitID),
AVG(DATEDIFF(dd,StartDate,EndDate ))
)
WHEN AgeCalcSort >= 2 AND AgeCalcSort <= 17 then (
Count(VisitID),
AVG(DATEDIFF(dd,StartDate,EndDate ))
)
WHEN AgeCalcSort >= 18 AND AgeCalcSort <= 64 then (
Count(VisitID),
AVG(DATEDIFF(dd,StartDate,EndDate ))
)
WHEN AgeCalcSort >= 65 then (
Count(VisitID),
AVG(DATEDIFF(dd,StartDate,EndDate ))
)
END,
The result should look like this:
Age 1 Count Age 1 Avg LOS Age 2 Count Age 1 Avg LOS Age 3 Count Age 3 Avg LOS Age 4 Count Age 4 Avg LOS
5 5.3 18 9.2 20 12 0 0
Can anyone adivse what am I doing wrong, or a better way to achieve the end result? Thanks In advance.
You have mentioned 2 points:
1) Can anyone adivse what am I doing wrong
You are using the CASE expression in your query incorrectly.
You cannot have 2 columns returned when some condition is satisfied in CASE statement.
As per Microsoft documentation, CASE evaluates a list of conditions and returns one of multiple possible result expressions.
Check this Microsoft white paper for further study Ref. URL
2) a better way to achieve the end result
There can be many ways to achieve this.
Below is one of the simple way. try the query below:
SELECT
SUM(CASE WHEN AgeCalcSort = 0 AND AgeCalcSort <= 1 THEN 1 ELSE 0 END) [Age 1 Count],
AVG(CASE WHEN AgeCalcSort = 0 AND AgeCalcSort <= 1 THEN DATEDIFF(DAY,StartDate,EndDate) ELSE NULL END) [Age 1 Avg LOS]
SUM(CASE WHEN AgeCalcSort = 2 AND AgeCalcSort <= 17 THEN 1 ELSE 0 END) [Age 2 Count],
AVG(CASE WHEN AgeCalcSort = 2 AND AgeCalcSort <= 17 THEN DATEDIFF(DAY,StartDate,EndDate) ELSE NULL END) [Age 2 Avg LOS]
SUM(CASE WHEN AgeCalcSort = 18 AND AgeCalcSort <= 64 THEN 1 ELSE 0 END) [Age 3 Count],
AVG(CASE WHEN AgeCalcSort = 18 AND AgeCalcSort <= 64 THEN DATEDIFF(DAY,StartDate,EndDate) ELSE NULL END) [Age 3 Avg LOS]
SUM(CASE WHEN AgeCalcSort >= 65 THEN 1 ELSE 0 END) [Age 4 Count],
AVG(CASE WHEN AgeCalcSort >= 65 THEN DATEDIFF(DAY,StartDate,EndDate) ELSE NULL END) [Age 4 Avg LOS]
FROM [YourTableName]
Query explanation
I have achieved count by using SUM, if you look into any of count logic you will notice it says when it satisfies condition 1 else 0. and that is summed later using SUM. So that all 1's ultimately gives count.
for average you will notice that then condition is not satisfied I am using NULL, which is intentional, to avoid wrong math.
for in TSQL query AVG of (2,4,NULL) is 3, while AVG of (2,4,0) is 2. so in your case NULL will help to avoid messing average value.
A case expression returns a single value, which you may then count or average.
It is not possible to produce multiple columns of output from a singe case expression. So, you need one case expression for each wanted column of output, like this:
select
COUNT(CASE WHEN AgeCalcSort = 0 AND AgeCalcSort <= 1 then VisitID end)
, AVG(CASE WHEN AgeCalcSort = 0 AND AgeCalcSort <= 1 then DATEDIFF(dd,StartDate,EndDate ) end)
, COUNT(CASE WHEN AgeCalcSort = 2 AND AgeCalcSort <= 17 then VisitID end)
, AVG(CASE WHEN AgeCalcSort = 2 AND AgeCalcSort <= 17 then DATEDIFF(dd,StartDate,EndDate ) end)
, COUNT(CASE WHEN AgeCalcSort = 18 AND AgeCalcSort <= 64 then VisitID end)
, AVG(CASE WHEN AgeCalcSort = 18 AND AgeCalcSort <= 64 then DATEDIFF(dd,StartDate,EndDate ) end)
, COUNT(CASE WHEN AgeCalcSort = 65 then VisitID end)
, AVG(CASE WHEN AgeCalcSort = 65 then DATEDIFF(dd,StartDate,EndDate ) end)
from ...
Notes:
place each case expression inside the relevant aggregate function
the count() function ignores NULLs so if the when conditions are not met the count will not increment
whilst T-SQL documentation can sometimes use the term "case statement" technically that is incorrect; e.g. a complete select query is a "statement", and "expressions" evaluate to single values, hence the correct term is case expression

redshift/postgresql - previously selected column in a case statement

I have an sql query that basically queries a table containing two columns: event_name and event_count.
event_count is an integer representing seconds the event took to execute. I wrote the query below that rolls up events into tiers, ie 1-5 seconds, 5-10 seconds, etc so we can determine how long each takes to execute.
I'm wondering if there is a better approach to do what I'm doing below as I don't think this is currently very optimal since it has to run sum() on every case.
I tried to put the previously selected column (d_event_count) in the case statement like this:
(CASE WHEN d_event_count >= 0 and d_event_count < 5 THEN d_event_count ELSE 0 END) as d_time_1_5,
but it fails indicating column does not exist in the table so I'm here looking for some help :)
select
event_name as d_event_name,
sum(event_count) as d_event_count,
(CASE WHEN sum(event_count) >= 0 and sum(event_count) < 5 THEN sum(event_count) ELSE 0 END) as d_time_1_5,
(CASE WHEN sum(event_count) >= 5 and sum(event_count) < 10 THEN sum(event_count) ELSE 0 END) as d_time_5_10,
(CASE WHEN sum(event_count) >= 10 and sum(event_count) < 15 THEN sum(event_count) ELSE 0 END) as d_time_10_15,
(CASE WHEN sum(event_count) >= 15 and sum(event_count) < 30 THEN sum(event_count) ELSE 0 END) as d_time_15_30,
(CASE WHEN sum(event_count) >= 30 and sum(event_count) < 45 THEN sum(event_count) ELSE 0 END) as d_time_30_45,
(CASE WHEN sum(event_count) >= 45 and sum(event_count) < 60 THEN sum(event_count) ELSE 0 END) as d_time_45_60,
(CASE WHEN sum(event_count) >= 60 and sum(event_count) < 120 THEN sum(event_count) ELSE 0 END) as d_time_60_120,
(CASE WHEN sum(event_count) >= 120 and sum(event_count) < 180 THEN sum(event_count) ELSE 0 END) as d_time_120_180,
(CASE WHEN sum(event_count) >= 180 and sum(event_count) < 240 THEN sum(event_count) ELSE 0 END) as d_time_180_240,
(CASE WHEN sum(event_count) >= 240 and sum(event_count) < 300 THEN sum(event_count) ELSE 0 END) as d_time_240_300,
(CASE WHEN sum(event_count) >= 300 THEN sum(event_count) ELSE 0 END) as d_time_300
from
product_events
where
...
group by 1
Changing the table structure is not a possibility, there are dozens of other columns, this is just a stripped down representation.
Can someone help me out?
Try in this way, it uses a cumulative sum of d_event_count:
create temp table ev as
select *
from (
values
('event 1'::text, 5::int),
('event 2'::text, 5::int),
('event 3'::text, 5::int),
('event 4'::text, 5::int),
('event 5'::text, 5::int),
('event 6'::text, 5::int),
('event 7'::text, 5::int),
('event 8'::text, 5::int),
('event 9'::text, 5::int)
) t (event_name, d_event_count);
with sm as
(
select event_name, sum(d_event_count) over (order by event_name) as total
from ev
)
select event_name, total,
case when total between 0 and 5 then 'd_time_1_5'
when total between 6 and 10 then 'd_time_6_10'
when total between 11 and 15 then 'd_time_11_15'
else 'more than 15'
end as d_name
from sm;
+------------+-------+--------------+
| event_name | total | d_name |
+------------+-------+--------------+
| event 1 | 5 | d_time_1_5 |
+------------+-------+--------------+
| event 2 | 10 | d_time_6_10 |
+------------+-------+--------------+
| event 3 | 15 | d_time_11_15 |
+------------+-------+--------------+
| event 4 | 20 | more than 15 |
+------------+-------+--------------+
| event 5 | 25 | more than 15 |
+------------+-------+--------------+
| event 6 | 30 | more than 15 |
+------------+-------+--------------+
| event 7 | 35 | more than 15 |
+------------+-------+--------------+
| event 8 | 40 | more than 15 |
+------------+-------+--------------+
| event 9 | 45 | more than 15 |
+------------+-------+--------------+
Or another solution using a WINDOW function:
select event_name, sum(d_event_count) over w1 as total,
case when sum(d_event_count) over w1 between 0 and 5 then 'd_time_1_5'
when sum(d_event_count) over w1 between 6 and 10 then 'd_time_6_10'
when sum(d_event_count) over w1 between 11 and 15 then 'd_time_11_15'
else 'more than 15'
end as d_name
from ev
WINDOW w1 AS (order by event_name);
Check it here: http://rextester.com/BTR25807

Selecting data from 2 specific dates gives "Division by zero" error

I want to return data from yesterday and 8 days ago.
To do this I use the following line in my query:
WHERE (o.status_date::date = now()::date - INTERVAL '8 days')
OR (o.status_date::date = now()::date - INTERVAL '1 day')
However, this returns a "Division by zero" error. When I use only one of the two, so for example:
WHERE (o.status_date::date = now()::date - INTERVAL '8 days')
I get no error...
I don't understand where the error comes from, or perhaps I'm making a very straightforward mistake. Any help is appreciated!
Edited, these are the calculations done in my query:
SUM(CASE WHEN o.status_id = '12' THEN 1 ELSE 0 END) AS failed_63,
SUM(CASE WHEN o.status_id IN ('6','11','12','14','22','24') THEN 1 ELSE 0 END) AS total_orders,
ROUND(
(SUM(CASE WHEN o.status_id = '12' THEN 1 ELSE 0 END) * 100)::numeric /
(SUM(CASE WHEN o.status_id IN ('11','12','14','22','24') THEN 1 ELSE 0 END)), 2) AS perc_fail,
COUNT(DISTINCT i.order_id) AS order_issues,
ROUND(
(COUNT(DISTINCT i.order_id) * 100)::numeric / (SUM(CASE WHEN o.status_id IN ('11','12','14','22','24') THEN 1 ELSE 0 END)), 2) AS issue_rate,
SUM(CASE WHEN o.status_id = '6' THEN 1 ELSE 0 END) AS overdue_53,
ROUND(
(SUM(CASE WHEN o.status_id = '6' THEN 1 ELSE 0 END) * 100)::numeric /
(SUM(CASE WHEN o.status_id IN ('6','11','12','14','22','24') THEN 1 ELSE 0 END)), 2) AS perc_overdue,
ROUND(
(AVG(dop.vendor_confirmation_time)::numeric / 60), 2) AS avg_v_confirmation_time,
CASE
WHEN (AVG(dop.vendor_confirmation_time)::numeric / 60) < 3 THEN 'good'
WHEN (AVG(dop.vendor_confirmation_time)::numeric / 60) IS NULL THEN 'n/a'
ELSE 'bad'
END AS vendor_response
You have several cases in your query where your divisor might be 0, as in:
SUM(CASE WHEN o.status_id IN ('6','11','12','14','22','24') THEN 1 ELSE 0 END)
The best way to solve this is to use a sub-query to calculate all the sums, which are repeated anyway, and then do the division and rounding in the main query, where the divisor is not 0:
SELECT
sum12 AS failed_63,
sum6 + sum12 + sum11_24 AS total_orders,
CASE WHEN sum12 + summ11_24 > 0 THEN round(sum12 * 100. / (sum11_24 + sum 12), 2)
ELSE NULL END AS perc_fail,
order_issues,
CASE WHEN sum12 + summ11_24 > 0 THEN round(order_issues * 100. / (sum12 + sum11_24), 2)
ELSE NULL END AS issue_rate,
sum6 AS overdue_53,
CASE WHEN sum6 + sum12 + sum11_24 > 0 THEN round(sum6 / (sum6 + sum12 + sum11_24), 2)
ELSE NULL END AS perc_overdue,
round(avg_v_confirmation_time, 2) AS avg_v_confirmation_time,
CASE
WHEN (avg_v_confirmation_time) < 3 THEN 'good'
WHEN (avg_v_confirmation_time) IS NULL THEN 'n/a'
ELSE 'bad'
END AS vendor_response
FROM (
SELECT
sum(CASE WHEN o.status_id = '6' THEN 1 ELSE 0 END) AS sum6,
sum(CASE WHEN o.status_id = '12' THEN 1 ELSE 0 END) AS sum12,
sum(CASE WHEN o.status_id IN ('11','14','22','24') THEN 1 ELSE 0 END) AS sum11_24,
count(DISTINCT i.order_id) AS order_issues,
avg(dop.vendor_confirmation_time::numeric / 60) AS avg_v_confirmation_time
FROM o, i, dop
WHERE ... ) sub
In this case I set all columns where the divisor would be 0 to NULL; change as appropriate.
For future questions:
List your PostgreSQL version
Post the entire query with table qualifiers for all columns
Preferably, post the table structure
I still don't know why my first line didn't work, but I've now found a work-around by using the following:
WHERE o.status_date::date BETWEEN CURRENT_DATE - INTERVAL '8 days' AND CURRENT_DATE - INTERVAL '1 day'
AND o.status_date::date NOT BETWEEN CURRENT_DATE - INTERVAL '7 days' AND CURRENT_DATE - INTERVAL '2 days'

Sum up items between setup of custom times

We need to count the number of items that occur 10 minutes before and 10 minutes after the hour, by day. We have a table that tracks the items individually. Ideally i would like to have the output be something like the below, but and totally open to other suggestions.
Table - Attendance
Att_item timestamp
1 2012-09-12 18:08:00
2 2012-09-01 23:26:00
3 2012-09-23 09:33:00
4 2012-09-11 09:43:00
5 2012-09-06 05:57:00
6 2012-09-17 19:26:00
7 2012-09-06 10:51:00
8 2012-09-19 09:42:00
9 2012-09-06 13:55:00
10 2012-09-05 07:26:00
11 2012-09-02 03:08:00
12 2012-09-19 12:17:00
13 2012-09-12 18:14:00
14 2012-09-12 18:14:00
Output
Date Timeslot_5pm Timeslot_6pm Timeslot_7pm
9/11/2012 11 22 22
9/12/2012 30 21 55
9/13/2012 44 33 44
Your requirements are not totally clear, but if you only want to count the number of records in the 20 minute window:
select cast(tstmp as date) date,
sum(case when datepart(hour, tstmp) = 1 then 1 else 0 end) Timeslot_1am,
sum(case when datepart(hour, tstmp) = 2 then 1 else 0 end) Timeslot_2am,
sum(case when datepart(hour, tstmp) = 3 then 1 else 0 end) Timeslot_3am,
sum(case when datepart(hour, tstmp) = 4 then 1 else 0 end) Timeslot_4am,
sum(case when datepart(hour, tstmp) = 5 then 1 else 0 end) Timeslot_5am,
sum(case when datepart(hour, tstmp) = 6 then 1 else 0 end) Timeslot_6am,
sum(case when datepart(hour, tstmp) = 7 then 1 else 0 end) Timeslot_7am,
sum(case when datepart(hour, tstmp) = 8 then 1 else 0 end) Timeslot_8am,
sum(case when datepart(hour, tstmp) = 9 then 1 else 0 end) Timeslot_9am,
sum(case when datepart(hour, tstmp) = 10 then 1 else 0 end) Timeslot_10am,
sum(case when datepart(hour, tstmp) = 11 then 1 else 0 end) Timeslot_11am,
sum(case when datepart(hour, tstmp) = 12 then 1 else 0 end) Timeslot_12pm,
sum(case when datepart(hour, tstmp) = 13 then 1 else 0 end) Timeslot_1pm,
sum(case when datepart(hour, tstmp) = 14 then 1 else 0 end) Timeslot_2pm,
sum(case when datepart(hour, tstmp) = 15 then 1 else 0 end) Timeslot_3pm,
sum(case when datepart(hour, tstmp) = 16 then 1 else 0 end) Timeslot_4pm,
sum(case when datepart(hour, tstmp) = 17 then 1 else 0 end) Timeslot_5pm,
sum(case when datepart(hour, tstmp) = 18 then 1 else 0 end) Timeslot_6pm,
sum(case when datepart(hour, tstmp) = 19 then 1 else 0 end) Timeslot_7pm,
sum(case when datepart(hour, tstmp) = 20 then 1 else 0 end) Timeslot_8pm,
sum(case when datepart(hour, tstmp) = 21 then 1 else 0 end) Timeslot_9pm,
sum(case when datepart(hour, tstmp) = 22 then 1 else 0 end) Timeslot_10pm,
sum(case when datepart(hour, tstmp) = 23 then 1 else 0 end) Timeslot_11pm
from yourtable
where datepart(minute, tstmp) >= 50
or datepart(minute, tstmp) <= 10
group by cast(tstmp as date)
If you want to count the number of records within each hour plus the records that are in the >=50 and <= 10 timeframe, then you will have to adjust this.
This does just one column (well 4 but you get my point).
select DATEPART(YYYY, FTSdate) as [year], DATEPART(mm, FTSdate) as [month]
, DATEPART(dd, FTSdate) as [day], DATEPART(hh, FTSdate) as [hour], COUNT(*)
from [Gabe2a].[dbo].[docSVsys]
where DATEPART(mi, FTSdate) >= 50 or DATEPART(mi, FTSdate) <= 10
group by DATEPART(YYYY, FTSdate), DATEPART(mm, FTSdate), DATEPART(dd, FTSdate), DATEPART(hh, FTSdate)
order by DATEPART(YYYY, FTSdate), DATEPART(mm, FTSdate), DATEPART(dd, FTSdate), DATEPART(hh, FTSdate)
Separate columns.
select DATEPART(YYYY, FTSdate) as [year], DATEPART(mm, FTSdate) as [month]
, DATEPART(dd, FTSdate) as [day]
, sum(case when DATEPART(hh, FTSdate) = '0' then 1 else 0 end) as [0:00] -- midnight
, sum(case when DATEPART(hh, FTSdate) = '1' then 1 else 0 end) as [1:00]
, sum(case when DATEPART(hh, FTSdate) = '2' then 1 else 0 end) as [2:00]
, sum(case when DATEPART(hh, FTSdate) = '3' then 1 else 0 end) as [3:00]
, sum(case when DATEPART(hh, FTSdate) = '4' then 1 else 0 end) as [4:00]
from [Gabe2a].[dbo].[docSVsys]
where DATEPART(mi, FTSdate) >= 50 or DATEPART(mi, FTSdate) <= 10
group by DATEPART(YYYY, FTSdate), DATEPART(mm, FTSdate), DATEPART(dd, FTSdate)
order by DATEPART(YYYY, FTSdate), DATEPART(mm, FTSdate), DATEPART(dd, FTSdate)