GROUP BY with CASE expression IN PostgreSQL

GROUP BY with CASE expression IN PostgreSQL - postgresql

This is my query:
SELECT userid,
case when extract(day from date )=1 then string_agg(status,'') end "1" ,
case when extract(day from date )=2 then string_agg(status,'') end "2" ,
.
.
.
case when extract(day from date )=31 then string_agg(status,'') end "31"
from attendance.attendance group by userid;
But I have the next error:
ERROR: column "attendance.date" must appear in the GROUP BY clause or be used in an aggregate function
When group by userid and date the output:
userid 1 2 3 ... 31
1 P
1 A
1 P ...
1 P
I want to group by userid like this:
userid 1 2 3 ... 31
1 P A P ... P
2 P P P ... P
etc.
Any ideas? I'll very appreciate.

You'll want to put the string_agg around the CASE expression:
SELECT userid,
string_agg(case when extract(day from date)=1 then status end) "1" ,
string_agg(case when extract(day from date)=2 then status end) "2" ,
.
.
.
string_agg(case when extract(day from date)=31 then status end) "31"
FROM attendance.attendance
GROUP BY userid;
A nicer way to write this query would be using a FILTER for the aggregate:
SELECT userid,
string_agg(status) FILTER (WHERE extract(day from date)=1) "1" ,
string_agg(status) FILTER (WHERE extract(day from date)=2) "2" ,
.
.
.
string_agg(status) FILTER (WHERE extract(day from date)=31) "31"
FROM attendance.attendance
GROUP BY userid;

Related

Subquery in 2 different time column

I tried this but it said that ERROR: subquery must return only one column
Select date_trunc('week', kyc.kyc_verify_date::timestamptz) as "week",
COUNT(*) filter (where kyc.status = 4) AS "A1",
COUNT(CASE WHEN kyc.status = 5
THEN
(Select date_trunc('week', kyc.last_update_time::timestamptz) AS "week",
count(*) filter (where kyc.status = 5)
From kyc
Group by 1)
END) AS "A2"
from KYC
Where kyc.kyc_verify_date >= date_trunc('week', CURRENT_TIMESTAMP - interval '4 week')
and kyc.kyc_verify_date < date_trunc('week', CURRENT_TIMESTAMP)
Group by 1
i do this query to get the result that if status = 4 will take the date is kyc_verify_date,
but if status = 5 will take the date is last_update_time
What should I have to change in this query? or is there any way better?

A case expression returns a single value. Since you need 2 values from it you will need to repeat the expression.

How to subtract a seperate count from one grouping

I have a postgres query like this
select application.status as status, count(*) as "current_month" from application
where to_char(application.created, 'mon') = to_char('now'::timestamp - '1 month'::interval, 'mon')
and date_part('year',application.created) = date_part('year', CURRENT_DATE)
and application.job_status != 'expired'
group by application.status
it returns the table below that has the number of applications grouped by status for the current month. However I want to subtract a total count of a seperate but related query from the internal review number only. I want to count the number of rows with type = abc within the same table and for the same date range and then subtract that amount from the internal review number (Type is a seperate field). Current_month_desired is how it should look.
status
current_month
current_month_desired
fail
22
22
internal_review
95
22
pass
146
146

UNTESTED: but maybe...
The intent here is to use an analytic and case expression to conditionally sum. This way, the subtraction is not needed in the first place as you are only "counting" the values needed.
SELECT application.status as status
, sum(case when type = 'abc'
and application.status ='internal_review' then 0
else 1 end) over (partition by application.status)) as
"current_month"
FROM application
WHERE to_char(application.created, 'mon') = to_char('now'::timestamp - '1 month'::interval, 'mon')
and date_part('year',application.created) = date_part('year', CURRENT_DATE)
and application.job_status != 'expired'
GROUP BY application.status

Window Function To Calculate Closing Percentage

I am working with a query in PSQL and I am trying to use a window function to divide two other window function counts. This is what I currently have:
WITH month_cte as ( Select generate_series(date_trunc('month', current_date) - interval '12' month, date_trunc('month', current_date), interval '1' month) as month_year
)
select DISTINCT ON (month_year, q.rep_name) month_cte.*, q.*
FROM month_CTE
LEFT JOIN (
select *,
CASE
WHEN date_quoted IS NOT NULL THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))
ELSE NULL
END as month_quotes,
CASE WHEN edocs_signed_date IS NOT NULL THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))
ELSE NULL
END as month_sales,
CASE
WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))/
COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))* 100.0 AS numeric)
ELSE NULL
END as month_closing
FROM quote_report_view
ORDER BY rep_name, edocs_signed_date, date_quoted
) q
ON (date_trunc('month', q.date_quoted) = month_cte.month_year OR date_trunc('month', q.edocs_signed_date) = month_cte.month_year)
ORDER BY month_year, rep_name, month_quotes, month_sales
The line that I am trying to get to work is the 3rd Case:
CASE WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))/
COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))* 100.0 AS numeric)
ELSE NULL
END as month_closing
I am basically trying to divide the 2nd count window function by the 1st count window function and get a percentage for month_closing.
These are my current results:
"2020-08-01 00:00:00-04" 869272 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Jesus" 1 1 100.0
"2020-08-01 00:00:00-04" 875518 "2020-08-19 00:00:00" "2020-09-01 00:00:00" "Jim" 36 1 0.0
"2020-08-01 00:00:00-04" 876462 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Nick" 39 12 0.0
"2020-08-01 00:00:00-04" 873572 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Piero" 63 36 0.0
I am only getting either 0.00 or 1.00 in my last column where I am trying to calculate the closing percentage. How can I make this work to get a true percentage?
Thanks!

what is happening is that for ex using your data db engine first calculate division integer / integer ( ex 1/ 36 which result is 0 with data type of integer) then it does the multiply 0 * 100.0 ( integer * numeric which output data type is numeric but the result is 0.00
so either cast first count(*) to numeric
or multiply it by 1.0
or if you are calculating percentage multiply first count(*) by 100.00 first like so:
WITH month_cte as ( Select generate_series(date_trunc('month', current_date) - interval '12' month, date_trunc('month', current_date), interval '1' month) as month_year
)
select DISTINCT ON (month_year, q.rep_name) month_cte.*, q.*
FROM month_CTE
LEFT JOIN (
select *,
CASE
WHEN date_quoted IS NOT NULL
THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))
ELSE NULL
END as month_quotes,
CASE WHEN edocs_signed_date IS NOT NULL
THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))
ELSE NULL
END as month_sales,
CASE WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL
THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date)) * 100.0
/ COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted)) AS numeric)
ELSE NULL
END as month_closing
FROM quote_report_view
ORDER BY rep_name, edocs_signed_date, date_quoted
) q
ON (date_trunc('month', q.date_quoted) = month_cte.month_year OR date_trunc('month', q.edocs_signed_date) = month_cte.month_year)
ORDER BY month_year, rep_name, month_quotes, month_sales

DateDiff Rows Where UserID is a match

On Sql Server 2012 (T-SQL), I would like to analyse the date difference between the end dates and start dates for the same userid, and to see if there is a equal or greater than twelve month gap between times.
So for which ContractID the start date is =>12m than the previous end date.
ContractID UserID StartDate EndDate 12m Lapse
1 779 01/01/2000 01/01/2010 False
2 779 01/01/2010 01/01/2015 False
3 779 01/01/2016 NULL True
4 1021 09/03/2008 NULL False
Things perhaps to note are the userID is not in order on the real table, only the contractID is.

Using a CTE and the LAG() window function it's quite easy:
Create sample data:
DECLARE #T as table
(
ContractID int,
UserID int,
StartDate date,
EndDate date
)
INSERT INTO #T VALUES
(1, 779, '01/01/2000', '01/01/2010'),
(2, 779, '01/01/2010', '01/01/2015'),
(3, 779, '01/01/2016', NULL),
(4, 1021, '09/03/2008', NULL)
The query:
;WITH CTE AS
(
SELECT ContractID,
UserID,
StartDate,
EndDate,
LAG(EndDate) OVER(PARTITION BY UserId ORDER BY StartDate) As PreviousEndDate
FROM #T
)
SELECT ContractID,
UserID,
StartDate,
EndDate,
CASE WHEN DATEDIFF(MONTH, ISNULL(PreviousEndDate, StartDate), StartDate) >= 12 THEN
'True'
ELSE
'False'
END As '12m Lapse'
FROM CTE
Results:
ContractID UserID StartDate EndDate 12m Lapse
----------- ----------- ---------- ---------- ---------
1 779 2000-01-01 2010-01-01 False
2 779 2010-01-01 2015-01-01 False
3 779 2016-01-01 NULL True
4 1021 2008-09-03 NULL False

SELECT * FROM Table WHERE DATEDIFF(M,StartDate,EndDate) >=12

Starting with SQL Server 2012, there is a function called Lag that will help you get what you need.
The partition by of the window function will make sure that its separated by userID, and the order by will make sure its in ContractID order.
with prevEndDate as
(
select t.contractID
, t.userID
, t.startDate
, t.endDate
, lag(t.endDate,1,NULL) over (partition by t.userID order by t.contractID asc) as prevEndDate
from db_name.dbo.myTable as t
)
select p.contractID
, p.userID
, p.startDate
, p.endDate
, case when datediff(m,p.prevEndDate, p.startDate) >= 12 then 'True' else 'False' end as [12m Lapse]
from prevEndDate as p

Number of entries between dates

I have a table with the following structure: -
day, id
2016-03-13, 123
2016-03-13, 123
2016-03-13, 231
2016-03-14, 231
2016-03-14, 231
2016-03-15, 129
And I'd like to build a table that looks like: -
id, d1, d7, d14
123, 1, 1, 1
231, 1, 2, 2
129, 1, 1, 1
Essentially for a given id, list the number of days which have an entry within a time window. So if id 123 has 10 entries within the last 14 days - d14 would be 10.
So far I have: -
SELECT
day,
id
FROM
events
WHERE
datediff (DAY, day, getdate()) <= 7
GROUP BY
day,
id

This query will do:
SELECT
id,
COUNT(DISTINCT CASE WHEN current_date - day <= 1 THEN 1 END) d1,
COUNT(DISTINCT CASE WHEN current_date - day <= 7 THEN 1 END) d7,
COUNT(DISTINCT CASE WHEN current_date - day <= 14 THEN 1 END) d14
FROM
events
GROUP BY
id
ORDER BY
id
Or, since PostgreSQL 9.4, slightly more concise:
SELECT
id,
COUNT(DISTINCT day) FILTER (WHERE current_date - day <= 1) d1,
COUNT(DISTINCT day) FILTER (WHERE current_date - day <= 7) d7,
COUNT(DISTINCT day) FILTER (WHERE current_date - day <= 14) d14
FROM
events
GROUP BY
id
ORDER BY
id

try this:
SELECT id
, count(case when DAY = getdate() then 1 else null end) as d1
, count(case when DAY + 7 >= getdate() then 1 else null end) as d7
, count(case when DAY + 14 >= getdate() then 1 else null end) as d14
FROM events
WHERE DAY between DAY >= getdate() - 14
--or if you can have day > today ... and DAY between getdate() - 14 and getdate()
GROUP By id