How to group by when using a modulo - postgresql

For each company, I want to sum the revenue for the 4 most recent quarters, then the 4 subsequent ones, and so on (see screenshot attached for details). How can I do that?
SQL query and result - 1st attempt (failed)
https://i.stack.imgur.com/wWhhb.png
SELECT
ticker,
period_end_date,
revenue,
1+ ((rn - 1) % 4) AS test
FROM (
SELECT
ticker,
period_end_date,
revenue,
ROW_NUMBER() OVER (PARTITION BY ticker ORDER BY period_end_date DESC) rn
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN'
ORDER BY ticker
) q
EDIT: the following code meets my needs. The 'revenue' is summed using the most recent quarter and the 3 quarters thereafter.
SELECT
ticker,
period_end_date,
SUM(revenue) OVER (PARTITION BY ticker ORDER BY period_end_date DESC ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS total_revenue
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN'
ORDER BY ticker

You can try this :
SELECT ticker
, period_end_date
, total_revenue
FROM (
SELECT ticker
, period_end_date
, SUM(revenue) OVER (PARTITION BY ticker ORDER BY period_end_date DESC ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS total_revenue
, max(period_end_date) OVER (PARTITION BY ticker) AS period_end_date_max
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN
) q
WHERE EXTRACT(MONTH FROM period_end_date) = EXTRACT(MONTH FROM period_end_date_max)
ORDER BY ticker, period_end_date ASC

Related

How can I increment the numerical value in my WHERE clause using a loop?

I am currently using the UNION ALL workaround below to calculate old_eps_tfq regression slopes of each ticker based off its corresponding rownum value (see WHERE rownum < x). I am interested to know what the old_eps_tfq is when rownum < 4 then increment 4 by 1 to find out what old_eps_tfq is when rownum < 5, and so on (there are ~20 rownum)
Could I use PL/pgSQL for this?
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 4
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
UNION ALL
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 5
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
Here is my table
The top query SELECT * FROM (...) q sounds like useless.
Then you can try this :
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
max,
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
INNER JOIN generate_series(4, 24) AS max -- the range 4 to 24 can be adjusted to the need
ON rownum < max
GROUP BY max, ticker, current_period_end_date
ORDER BY max asc, ticker asc

SQL Debugging Help Needed

I am writing a query in Redshift to answer the question "Give the average lifetime spend of users who spent more on their first order than their second order." This is based off of an order_items table which has one row for every item ordered (so an order with 3 items would be represented in 3 rows). Here's a snapshot of the first 10 rows:
First 10 rows of order_items:
Here is my solution:
with
cte1_lifetime as (
select oi.user_id, sum(oi.sale_price) as lifetime_spend
from order_items as oi
group by oi.user_id
),
cte2_order as (
select oi.user_id, oi.order_id, sum(oi.sale_price) as order_total, rank() over(partition by oi.user_id order by oi.created_at) as order_rank
from order_items as oi
group by oi.user_id, oi.order_id, oi.created_at
order by oi.user_id, oi.order_id
),
cte3_first_order as (
select user_id, order_id, order_total
from cte2_order
where order_rank=1
order by user_id, order_id
),
cte4_second_order as (
select user_id, order_id, order_total
from cte2_order
where order_rank=2
order by user_id, order_id
)
select avg(cte1.lifetime_spend) as average_lifetime_spend
from cte1_lifetime as cte1
where exists (
select *
from cte3_first_order as cte3, cte4_second_order as cte4
where cte3.user_id=cte4.user_id
and cte1.user_id=cte3.user_id
and cte3.order_total > cte4.order_total)
And here is the answer key:
WITH
table1 AS
(SELECT user_id, order_id,
SUM(sale_price) OVER (PARTITION BY order_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as order_total,
RANK() OVER (PARTITION BY user_id ORDER BY created_at) AS "sequence"
FROM order_items)
,
table2 AS
(SELECT user_id, SUM(sale_price) AS lifetime_spend
FROM order_items
WHERE EXISTS
(SELECT t1.user_id
FROM table1 t1, table1 t2
WHERE t1.user_id = t2.user_id AND t1.sequence = 1 AND t2.sequence = 2 AND t1.order_total>t2.order_total
AND t1.user_id = order_items.user_id)
GROUP BY 1
ORDER BY 1)
SELECT AVG(lifetime_spend)
FROM table2
These answers yield slightly different results on the same data- an average lifetime spend of $215 vs $220. I'd really like to understand why they are different but so far I can't figure it out. Any ideas?

How to get number of consecutive days from current date using postgres?

I want to get the number of consecutive days from the current date using Postgres SQL.
enter image description here
Above is the scenario in which I have highlighted consecutive days count should be like this.
Below is the SQL query which I have created but it's not returning the expected result
with grouped_dates as (
select user_id, created_at::timestamp::date,
(created_at::timestamp::date - (row_number() over (partition by user_id order by created_at::timestamp::date) || ' days')::interval)::date as grouping_date
from watch_history
)
select * , dense_rank() over (partition by grouping_date order by created_at::timestamp::date) as in_streak
from grouped_dates where user_id = 702
order by created_at::timestamp::date
Can anyone please help me to resolve this issue?
If anyhow we can able to apply distinct for created_at field to below query then I will get solutions for my issue.
WITH list AS
(
SELECT user_id,
(created_at::timestamp::date - (row_number() over (partition by user_id order by created_at::timestamp::date) || ' days')::interval)::date as next_day
FROM watch_history
)
SELECT user_id, count(*) AS number_of_consecutive_days
FROM list
WHERE next_day IS NOT NULL
GROUP BY user_id
Does anyone have an idea how to apply distinct to created_at for the above mentioned query ?
To get the "number of consecutive days" for the same user_id :
WITH list AS
(
SELECT user_id
, array_agg(created_at) OVER (PARTITION BY user_id ORDER BY created_at RANGE BETWEEN CURRENT ROW AND '1 day' FOLLOWING) AS consecutive_days
FROM watch_history
)
SELECT user_id, count(DISTINCT d.day) AS number_of_consecutive_days
FROM list
CROSS JOIN LATERAL unnest(consecutive_days) AS d(day)
WHERE array_length(consecutive_days, 1) > 1
GROUP BY user_id
To get the list of "consecutive days" for the same user_id :
WITH list AS
(
SELECT user_id
, array_agg(created_at) OVER (PARTITION BY user_id ORDER BY created_at RANGE BETWEEN CURRENT ROW AND '1 day' FOLLOWING) AS consecutive_days
FROM watch_history
)
SELECT user_id
, array_agg(DISTINCT d.day ORDER BY d.day) AS list_of_consecutive_days
FROM list
CROSS JOIN LATERAL unnest(consecutive_days) AS d(day)
WHERE array_length(consecutive_days, 1) > 1
GROUP BY user_id
full example & result in dbfiddle

Reset increment in PostgreSQL

I just started learning Postgres, and I'm trying to make an aggregation table that has the columns:
user_id
booking_sequence
booking_created_time
booking_paid_time
booking_price_amount
total_spent
All columns are provided, except for the booking_sequence column. I need to make a query that shows the first five flights of each user that has at least x purchases and has spent more than a certain amount of money, then sort it by the amount of money spent by the user, and then sort it by the booking sequence column.
I've tried :
select user_id,
row_number() over(partition by user_id order by user_id) as booking_sequence,
booking_created_time as booking_created_date,
booking_price_amount,
sum(booking_price_amount) as total_booking_price_amount
from fact_flight_sales
group by user_id, booking_created_time, booking_price_amount
having count(user_id) > 5
and total_booking_price_amount > 1000
order by total_booking_price_amount;
I got 0 when I added count(user_id) > 5, and total_booking_price_amount is not found when I add the second condition in the HAVING clause.
Edit:
I managed to make the code function correctly, for those who are curious:
select x.user_id, row_number() over(partition by x.user_id)
as booking_sequence, x.booking_created_time::date as booking_created_date, x.booking_price_amount,
sum(y.booking_price_amount) as total_booking_price_amount from
(
select user_id, booking_created_time, booking_price_amount from fact_flight_sales
group by user_id, booking_created_time, booking_price_amount
) as x
join
(
select user_id, booking_price_amount
from fact_flight_sales group by user_id, booking_price_amount
) as y
on x.user_id = y.user_id
group by x.user_id, x.booking_created_time, x.booking_price_amount
having count(x.user_id) >= 1 and sum(y.booking_price_amount) >250000
order by total_booking_price_amount desc, booking_sequence asc;
Big thanks to Laurenz for the help!
About count(user_id) > 5:
HAVING is calculated before window functions are evaluated, So result rows excluded by the HAVING clause will not be used to calculate the window function.
About total_booking_price_amount in HAVING:
You cannot use aliases from the SELECT list in the HAVING clause. You will have to repeat the expression (or use a subquery).

Selecting the 1st and 10th Records Only

Have a table with 3 columns: ID, Signature, and Datetime, and it's grouped by Signature Having Count(*) > 9.
select * from (
select s.Signature
from #Sigs s
group by s.Signature
having count(*) > 9
) b
join #Sigs o
on o.Signature = b.Signature
order by o.Signature desc, o.DateTime
I now want to select the 1st and 10th records only, per Signature. What determines rank is the Datetime descending. Thus, I would expect every Signature to have 2 rows.
Thanks,
I would go with a couple of common table expressions.
The first will select all records from the table as well as a count of records per signature, and the second one will select from the first where the record count > 9 and add row_number partitioned by signature - and then just select from that where the row_number is either 1 or 10:
With cte1 AS
(
SELECT ID, Signature, Datetime, COUNT(*) OVER(PARTITION BY Signature) As NumberOfRows
FROM #Sigs
), cte2 AS
(
SELECT ID, Signature, Datetime, ROW_NUMBER() OVER(PARTITION BY Signature ORDER BY DateTime DESC) As Rn
FROM cte1
WHERE NumberOfRows > 9
)
SELECT ID, Signature, Datetime
FROM cte2
WHERE Rn IN (1, 10)
ORDER BY Signature desc
Because I don't know what your data looks like, this might need some adjustment.
The simplest way here, since you already know your sort order (DateTime DESC) and partitioning (Signature), is probably to assign row numbers and then select the rows you want.
SELECT *
FROM
(
select o.Signature
,o.DateTime
,ROW_NUMBER() OVER (PARTITION BY o.Signature ORDER BY o.DateTime DESC) [Row]
from (
select s.Signature
from #Sigs s
group by s.Signature
having count(*) > 9
) b
join #Sigs o
on o.Signature = b.Signature
order by o.Signature desc, o.DateTime
)
WHERE [Row] IN (1,10)