postgresql monthly usage retention query - postgresql

I'm trying to write a query that checks customer retention.
This is my query:
with users_per_month as (
select count(distinct user_id) as count_active_users,
array_agg(distinct user_id) as active_users,
date_trunc('month', user_id) as month
from app_logs
group by month
),
users_per_4_month as (
select month,
active_users,
lead(active_users) over (order by month) as lead_1,
lead(active_users, 2) over (order by month) as lead_2,
lead(active_users, 3) over (order by month) as lead_3,
lead(active_users, 4) over (order by month) as lead_4
from users_per_month
)
select *
from users_per_4_month;
But I get users in a subsequent month who were not in the first month.
like user_id 3 in March (lead_3)
this is the result:
any help will be helpful, tnx :)

It works :)
with per_month as (
select to_char(a1.device_timestamp, 'yyyy-mm') as month,
array_agg(distinct a1.user_id) as active_users
from facetune2_usage_app_background a1
group by month
),
m2 as (
select month,
active_users,
lead(active_users) over (order by month) as january_users,
lead(active_users, 2) over (order by month) as february_users,
lead(active_users, 3) over (order by month) as march_users,
lead(active_users, 4) over (order by month) as april_users
from per_month
),
m3 as (
select month,
cardinality(active_users) as num_users,
(select count(*)
from (select unnest(active_users) intersect select unnest(january_users)) t) as january_retained,
(select count(*)
from (select unnest(active_users)
intersect
select unnest(february_users)) t) as febrary_retained,
(select count(*)
from (select unnest(active_users) intersect select unnest(march_users)) t) as march_retained,
(select count(*)
from (select unnest(active_users) intersect select unnest(april_users)) t) as april_retained
from m2
)
select month,
num_users,
'100' as "0",
january_retained * 100 / num_users as "1",
febrary_retained * 100 / num_users as "2",
march_retained * 100 / num_users as "3",
april_retained * 100 / num_users as "4"
from m3

Related

How can I increment the numerical value in my WHERE clause using a loop?

I am currently using the UNION ALL workaround below to calculate old_eps_tfq regression slopes of each ticker based off its corresponding rownum value (see WHERE rownum < x). I am interested to know what the old_eps_tfq is when rownum < 4 then increment 4 by 1 to find out what old_eps_tfq is when rownum < 5, and so on (there are ~20 rownum)
Could I use PL/pgSQL for this?
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 4
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
UNION ALL
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 5
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
Here is my table
The top query SELECT * FROM (...) q sounds like useless.
Then you can try this :
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
max,
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
INNER JOIN generate_series(4, 24) AS max -- the range 4 to 24 can be adjusted to the need
ON rownum < max
GROUP BY max, ticker, current_period_end_date
ORDER BY max asc, ticker asc

How to group by when using a modulo

For each company, I want to sum the revenue for the 4 most recent quarters, then the 4 subsequent ones, and so on (see screenshot attached for details). How can I do that?
SQL query and result - 1st attempt (failed)
https://i.stack.imgur.com/wWhhb.png
SELECT
ticker,
period_end_date,
revenue,
1+ ((rn - 1) % 4) AS test
FROM (
SELECT
ticker,
period_end_date,
revenue,
ROW_NUMBER() OVER (PARTITION BY ticker ORDER BY period_end_date DESC) rn
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN'
ORDER BY ticker
) q
EDIT: the following code meets my needs. The 'revenue' is summed using the most recent quarter and the 3 quarters thereafter.
SELECT
ticker,
period_end_date,
SUM(revenue) OVER (PARTITION BY ticker ORDER BY period_end_date DESC ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS total_revenue
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN'
ORDER BY ticker
You can try this :
SELECT ticker
, period_end_date
, total_revenue
FROM (
SELECT ticker
, period_end_date
, SUM(revenue) OVER (PARTITION BY ticker ORDER BY period_end_date DESC ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS total_revenue
, max(period_end_date) OVER (PARTITION BY ticker) AS period_end_date_max
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN
) q
WHERE EXTRACT(MONTH FROM period_end_date) = EXTRACT(MONTH FROM period_end_date_max)
ORDER BY ticker, period_end_date ASC

How to divide a period in columns

I am trying to create a query where the first column shows the list of the companies and the other 3 columns their revenues per month. This is what I do:
WITH time_frame AS
(SELECT date_trunc('month',NOW())-interval '0 week'),
time_frame1 AS
(SELECT date_trunc('month',NOW())-interval '1 month'),
time_frame2 AS
(SELECT date_trunc('month',NOW())-interval '2 month')
select table1.company_name,
(CASE
WHEN table2.date_of_transaction = (SELECT * FROM time_frame2) THEN sum(table2.amount)
ELSE NULL
END) AS "current week - 2",
(CASE
WHEN table2.date_of_transaction = (SELECT * FROM time_frame1) THEN sum(table2.amount)
ELSE NULL
END) AS "current week - 1",
(CASE
WHEN table2.date_of_transaction = (SELECT * FROM time_frame2) THEN
sum(table2.amount)
ELSE NULL
END) AS "current week - 2"
from table1
join table2 on table2.table1_id = table.id
where table1.company_joined >= '04-20-2019'
group by 1
When I execute the table this comes out: Error running query: column "table2.date_of_transaction" must appear in the GROUP BY clause or be used in an aggregate function LINE 15: WHEN table2.date_of_transaction = (SELECT * FROM time_frame) TH... ^
Do you have any ideas on how to solve it? Thank you.
company name
month1
month2
name 1
£233
£343
name 2
£243
£34
name 3
£133
£43
you can simplify the statement by using the filter() operator
select t1.company_name,
sum(t2.amount) filter (where t2.date_of_transaction = date_trunc('month',NOW())-interval '2 month'),
sum(t2.amount) filter (where t2.date_of_transaction = date_trunc('month',NOW())-interval '1 month'),
sum(t2.amount) filter (where t2.date_of_transaction = date_trunc('month',NOW()))
from table1 t1
join table2 t2 on t2.table1_id = t1.id
where t1.company_joined >= date '2019-04-20'
group by t1.company_name;
If you really want to put the date ranges into a CTE, you only need one:
with dates (r1, r2, r3) as (
values
(date_trunc('month',NOW())-interval '2 month',
date_trunc('month',NOW())-interval '1 month',
date_trunc('month',NOW()))
)
select t1.company_name,
sum(t2.amount) filter (where t2.date_of_transaction = d.r1),
sum(t2.amount) filter (where t2.date_of_transaction = d.r2),
sum(t2.amount) filter (where t2.date_of_transaction = d.r3)
from table1 t1
cross join dates d
join table2 t2 on t2.table1_id = t1.id
where t1.company_joined >= date '2019-04-20'
group by t1.company_name
;
The CTE dates returns a single row with three columns and thus the cross join doesn't change the resulting number of rows.

How to do percentage in Postgresql

I'm trying to create a query. I have two tables
customer (customer_id)
rental (rental_date (timestamp), customer_id)
SELECT to_char( rental_date, 'Month') AS month,
(count(Distinct customer.customer_id)/
(count (distinct rental.customer_id) * 100)
) AS percentage
FROM RENTAL,customer
GROUP BY month;
But the result is zero.
When all "numbers" involved in an operation are integers, the result will be an integer too.
Try this way:
SELECT
to_char( rental_date, 'Month') AS month,
(
count(distinct customer.customer_id)::float /
(count(distinct rental.customer_id)::float * 100::float)
)
as percentage
FROM
RENTAL,customer
GROUP BY month;

Join on generate_series and count

I'm trying to find the # users who did action A or action B on a monthly basis.
Table: User
- id
- "creationDate"
Table: action_A
- user_id (= user.id)
- "creationDate"
Table: action_B
- user_id (= user.id)
- "creationDate"
The general idea of what I was trying to do was that I'd find the list of users who did action A in Month X and the list of users who did action B in Month X, then count how many ids are there for every month based on a generate_series of monthly dates.
I tried the following, however, the query times out when running and I'm not sure if there's any way to optimize it (or if it is even correct).
SELECT monthseries."Month", count(*)
FROM
(SELECT to_char(DAY::date, 'YYYY-MM') AS "Month"
FROM generate_series('2014-01-01'::date, CURRENT_DATE, '1 month') DAY) monthseries
LEFT JOIN
(SELECT to_char("creationDate", 'YYYY-MM') AS "Month",
id
FROM action_A) did_action_A ON monthseries."Month" = did_action_A."Month"
LEFT JOIN
(SELECT to_char("creationDate", 'YYYY-MM') AS "Month",
id
FROM action_B) did_action_B ON monthseries."Month" = did_action_B."Month"
GROUP BY monthseries."Month"
Any comments/ help would be immensely helpful!
If you want to count distinct users:
select to_char(month, 'YYYY-MM') as "Month", count(*)
from
generate_series(
'2014-01-01'::date, current_date, '1 month'
) monthseries (month)
left join (
(
select distinct date_trunc('month', "creationDate") as month, id
from action_a
) a
full outer join (
select distinct date_trunc('month', "creationDate") as month, id
from action_b
) b using (month, id)
) s using (month)
group by 1
order by 1