Window Function To Calculate Closing Percentage - postgresql

I am working with a query in PSQL and I am trying to use a window function to divide two other window function counts. This is what I currently have:
WITH month_cte as ( Select generate_series(date_trunc('month', current_date) - interval '12' month, date_trunc('month', current_date), interval '1' month) as month_year
)
select DISTINCT ON (month_year, q.rep_name) month_cte.*, q.*
FROM month_CTE
LEFT JOIN (
select *,
CASE
WHEN date_quoted IS NOT NULL THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))
ELSE NULL
END as month_quotes,
CASE WHEN edocs_signed_date IS NOT NULL THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))
ELSE NULL
END as month_sales,
CASE
WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))/
COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))* 100.0 AS numeric)
ELSE NULL
END as month_closing
FROM quote_report_view
ORDER BY rep_name, edocs_signed_date, date_quoted
) q
ON (date_trunc('month', q.date_quoted) = month_cte.month_year OR date_trunc('month', q.edocs_signed_date) = month_cte.month_year)
ORDER BY month_year, rep_name, month_quotes, month_sales
The line that I am trying to get to work is the 3rd Case:
CASE WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))/
COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))* 100.0 AS numeric)
ELSE NULL
END as month_closing
I am basically trying to divide the 2nd count window function by the 1st count window function and get a percentage for month_closing.
These are my current results:
"2020-08-01 00:00:00-04" 869272 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Jesus" 1 1 100.0
"2020-08-01 00:00:00-04" 875518 "2020-08-19 00:00:00" "2020-09-01 00:00:00" "Jim" 36 1 0.0
"2020-08-01 00:00:00-04" 876462 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Nick" 39 12 0.0
"2020-08-01 00:00:00-04" 873572 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Piero" 63 36 0.0
I am only getting either 0.00 or 1.00 in my last column where I am trying to calculate the closing percentage. How can I make this work to get a true percentage?
Thanks!

what is happening is that for ex using your data db engine first calculate division integer / integer ( ex 1/ 36 which result is 0 with data type of integer) then it does the multiply 0 * 100.0 ( integer * numeric which output data type is numeric but the result is 0.00
so either cast first count(*) to numeric
or multiply it by 1.0
or if you are calculating percentage multiply first count(*) by 100.00 first like so:
WITH month_cte as ( Select generate_series(date_trunc('month', current_date) - interval '12' month, date_trunc('month', current_date), interval '1' month) as month_year
)
select DISTINCT ON (month_year, q.rep_name) month_cte.*, q.*
FROM month_CTE
LEFT JOIN (
select *,
CASE
WHEN date_quoted IS NOT NULL
THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))
ELSE NULL
END as month_quotes,
CASE WHEN edocs_signed_date IS NOT NULL
THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))
ELSE NULL
END as month_sales,
CASE WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL
THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date)) * 100.0
/ COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted)) AS numeric)
ELSE NULL
END as month_closing
FROM quote_report_view
ORDER BY rep_name, edocs_signed_date, date_quoted
) q
ON (date_trunc('month', q.date_quoted) = month_cte.month_year OR date_trunc('month', q.edocs_signed_date) = month_cte.month_year)
ORDER BY month_year, rep_name, month_quotes, month_sales

Related

Postgresql how to get last year_month from previous year in where clause

I need to show how many active customers we had and the end of the year. Therefore I need to get always last year_month from the previous year. Working with PostgreSQL.
Here my SQL to get the customer base on monthly (year_month) view.
select *
from (
with data as (
select
a.brand,
a.d,
a.activations,
t.terminations,
a.activations-t.terminations count
from (select c.brand, dd.year_month d,
COALESCE(case when dd.year_month is not null then count(c.customer_number) else 0 end, 0) as activations
from generate_series(current_date - interval '8 years', current_date, '1 day') d
left join dim_date dd on dd."date" = d.d
left join r_contracts_report c on to_date(c.service_start_date, 'dd.mm.yyy') = d
where c.contract_status in ('aktiv', 'Kündigung vorgemerkt', 'gekündigt')
and c.contract in ('3048', '3049', '3050', '3055', '3056')
group by dd.year_month,
brand) a,
(select c.brand, dd.year_month d,
COALESCE(case when dd.year_month is not null then count(c.customer_number) else 0 end, 0) as terminations
from generate_series(current_date - interval '8 years', current_date, '1 day') d
left join dim_date dd on dd."date" = d.d
left join r_contracts_report c on to_date(c.termination_date, 'dd.mm.yyy') = d
where c.contract_status in ('aktiv', 'Kündigung vorgemerkt', 'gekündigt')
and c.contract in ('3048', '3049', '3050', '3055', '3056')
group by dd.year_month,
brand) t
where a.d = t.d
and a.brand = t.brand)
select
d.d year_month,
d.brand,
sum(count) over (order by d.d asc rows between unbounded preceding and current row) eop
from data d
where d.brand = '3'
) as foo
Using after "as foo" the following where clause I get the customer base for the last 12 months:
WHERE year_month >= to_char ((current_date - INTERVAL '12 months'), 'YYYY-MM')
And result looks like this:
But I always want to have only the December of the previous year. In this case it would be '2021-12'.
...
where year_month = '2021-12'
or automatically for the previous year:
...
where year_month = (extract(year from current_date) - 1)::text || '-12'
But this is a really inefficient way to get this data.

Postgresql, Get the top 5 products that have increased in value from yesterday to today, returning the delta

I have a pricing table that contains the pricing data for products. There are around 600 unique product_id, each currently having 4 days worth of pricing data, which will eventually go up to 30 days. The table below is a small subset of the data to represent that table structure:
date
product_id
price_trend
2022-08-21
1
0.08
2022-08-22
1
0.18
2022-08-23
1
0.30
2022-08-21
2
0.15
2022-08-22
2
0.20
2022-08-23
2
0.22
So in my script, for each product_id I am trying to get yesterdays price_trend and todays price_trend and then calculate the price_delta between the two. I then order by price_delta and limit the results to 5.
I am having some issues as in some cases yesterdays price_trend is 0 and then todays price trend is 0.50 for example. This does not mean that the price trend has increased, but mostly likely that price_trend was not gathered yesterday for whatever reason.
Now I would like to remove any records where price_trend for today or yesterday equals 0, however, when I add AND pricing.trend_price > 0 the value return is just null instead.
Script:
SELECT
magic_sets_cards.name,
(SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) - INTERVAL '2 DAY' FROM pricing)
AND pricing.trend_price > 0) AS price_yesterday,
(SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) FROM pricing)
AND pricing.trend_price > 0) AS price_today,
((SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) FROM pricing)) -
(SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) - INTERVAL '2 DAY' FROM pricing))) AS price_delta
FROM magic_sets
JOIN magic_sets_cards ON magic_sets_cards.set_id = magic_sets.id
JOIN magic_sets_cards_identifiers ON magic_sets_cards_identifiers.card_id = magic_sets_cards.id
JOIN pricing ON pricing.product_id = magic_sets_cards_identifiers.mcm_id
WHERE magic_sets.code = '2X2'
AND pricing.date = (SELECT MAX(date) FROM pricing)
ORDER BY price_delta DESC
LIMIT 5
Results:
name
price_yesterday
price_today
price_delta
"Fiery Justice"
null
0.50
0.50
"Hostage Taker"
3.50
4.00
0.50
"Damnation"
17.02
17.33
0.31
"Bring to Light"
0.42
0.72
0.30
"City of Brass"
17.41
17.68
0.27
I would like to get it so that the "Fiery Justice" in this example is just ignored.
with the use of rank() you can get the output ., Look into...
Query without null rows :
with cte as (Select
product_id,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE null END) today,
SUM(CASE WHEN rank = 2 THEN price_trend ELSE null END) yesterday,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE 0 END) -
SUM(CASE WHEN rank = 2 THEN price_trend ELSE 0 END) as diff
FROM (
SELECT
product_id,
price_trend,
date,
rank() OVER (PARTITION BY product_id ORDER BY date DESC) as rank
FROM tableName where price_trend>0 and date between current_date-5 and current_date-4) p
WHERE rank in (1,2)
GROUP BY product_id
) select * from cte where (case when today is null or yesterday is null then 'NULL' else 'VALID' end)!='NULL'
Query with null values :
Select
product_id,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE 0 END) today,
SUM(CASE WHEN rank = 2 THEN price_trend ELSE 0 END) yesterday,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE 0 END) -
SUM(CASE WHEN rank = 2 THEN price_trend ELSE 0 END) as diff
FROM (
SELECT
product_id,
price_trend,
date,
rank() OVER (PARTITION BY product_id ORDER BY date DESC) as rank
FROM tableName where date between current_date-5 and current_date-4) p
WHERE rank in (1,2)
GROUP BY product_id
Change the condition :
where date between current_date-3 and current_date-2
OUTPUT :
product_id today yesterday diff
1 0.06 0.02 0.04
2 0.64 0.62 0.02
CREATE TABLE tableName
(
date date,
product_id int,
price_trend numeric(9,2)
);
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-21 ', '1 ', '0.02');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-22 ', '1 ', '0.06');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-23 ', '1 ', '0.10');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-24 ', '1 ', '0.13');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-25 ', '1 ', '0.18');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-26 ', '1 ', '0.30');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-21 ', '2 ', '0.62');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-22 ', '2 ', '0.64');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-23 ', '2 ', '0.69');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-24 ', '2 ', '0.78');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-25 ', '2 ', '0.88');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-26 ', '2 ', '0.90');

Dynamic value passing in Postgres

Here is a complex query where i need to pass some dates as dynamic to this, As of now i have hardcoded this '2021-08-01' AND '2022-07-31' these 2 dates.
But i have to pass this dates dynamically in such a way that next dates ie, 2022-06 month , thew dates passed will be '2021-07-01' and '2022-06-30' , basically 12 months behind data.
if we take 2022-05 then the passed date should be '2021-06-01' and '2022-05-31'.
How can we achieve this ? Any suggestions or help will be much appreciated.
below is the query for reference
WITH base as
(
SELECT created_at as period ,order_number, TRIM(email) as email ,is_first_order
FROM orders
WHERE created_at::DATE BETWEEN '2021-08-01' AND '2022-07-31'
)
,base_agg as
(
select TO_CHAR(period,'YYYY-MM') as period
,COUNT(DISTINCT email)FILTER(WHERE is_first_order IS TRUE) as new_users
,COUNT(DISTINCT order_number)FILTER(WHERE is_first_order IS FALSE) as returning_orders
FROM base
GROUP BY 1
)
,base_cumulative as
(
SELECT ROW_NUMBER() OVER(ORDER BY PERIOD DESC ) as rno
,period
,new_users
,returning_orders
,sum("new_users")over (order by "period" asc rows between unbounded preceding and current row) as "cumulative_total"
from base_agg
)
SELECT
(SELECT period FROM base_cumulative WHERE rno=1) period
,(SELECT cumulative_total FROM base_cumulative WHERE rno=1) as cumulated_customers
,SUM(returning_orders) as returning_orders
,SUM(returning_orders)/NULLIF((SELECT cumulative_total FROM base_cumulative WHERE rno=1),0) as rate
FROM base_cumulative
You can calculate the end of current month based on NOW() and some logic, the same can be applied with the rest of the calculation
select date_trunc('month', now())::date + interval '1 month - 1 day' end_of_this_month,
date_trunc('month', now())::date + interval '1 month - 1 day'::interval - '1 year'::interval + '1 day'::interval first_day_of_prev_year_month
;
Result
end_of_this_month | first_day_of_prev_year_month
---------------------+------------------------------
2022-08-31 00:00:00 | 2021-09-01 00:00:00
(1 row)

Generating series Postgres

I want to be able to generate groups of row by days, weeks, month or depending on the interval I set
Following this solution, it works when granularity is by month. But trying the interval of 1 week, no records are being returned.
This is the rows on my table
This is the current query I have for per month interval, which works perfectly.
SELECT *
FROM (
SELECT day::date
FROM generate_series(timestamp '2018-09-01'
, timestamp '2018-12-01'
, interval '1 month') day
) d
LEFT JOIN (
SELECT date_trunc('month', created_date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date >= date '2018-09-01'
AND created_date <= date '2018-12-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
ORDER BY day;
Result from this query
And this is the per week interval query. I will reduce the range to two months for brevity.
SELECT *
FROM (
SELECT day::date
FROM generate_series(timestamp '2018-09-01'
, timestamp '2018-11-01'
, interval '1 week') day
) d
LEFT JOIN (
SELECT date_trunc('week', created_date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date >= date '2018-09-01'
AND created_date <= date '2018-11-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
ORDER BY day;
Take note that I have records from October, but the result here doesn't show anything for October dates.
Any idea what I am missing here?
Results from your first query are not truncated to the begin of the week.
date_trunc('2018-09-01'::date, 'week')::date
is equal to
'2018-08-27'::date
so your join using day is not working
'2018-09-01'::date <> '2018-08-27'::date
Your query should look more like that:
SELECT *
FROM (
SELECT day::date
FROM generate_series(date_trunc('week',timestamp '2018-09-01') --series begin trunc
, timestamp '2018-11-01'
, interval '1 week') day
) d
LEFT JOIN (
SELECT date_trunc('week', created_date::date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date::date >= date '2018-09-01'
AND created_date::date <= date '2018-11-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
WHERE day >= '2018-09-01' --to skip days from begining of the week to the begining of the series before trunc
ORDER BY day;

Find date sequence in PostgreSQL

I'm trying to find the maximum sequence of days by customer in my data. I want to understand what is the max sequence of days that specific customer made. If someone enter to my app in the 25/8/16 AND 26/08/16 AND 27/08/16 AND 01/09/16 AND 02/09/16 - The max sequence will be 3 days (25,26,27).
In the end (The output) I want to get two fields: custid | MaxDaySequence
I have the following fields in my data table: custid | orderdate(timestemp)
For exmple:
custid orderdate
1 25/08/2007
1 03/10/2007
1 13/10/2007
1 15/01/2008
1 16/03/2008
1 09/04/2008
2 18/09/2006
2 08/08/2007
2 28/11/2007
2 04/03/2008
3 27/11/2006
3 15/04/2007
3 13/05/2007
3 19/06/2007
3 22/09/2007
3 25/09/2007
3 28/01/2008
I'm using PostgreSQL 2014.
Thanks
Trying:
select custid, max(num_days) as longest
from (
select custid,rn, count (*) as num_days
from (
select custid, date(orderdate),
cast (row_number() over (partition by custid order by date(orderdate)) as varchar(5)) as rn
from table_
) x group by custid, CURRENT_DATE - INTERVAL rn|| ' day'
) y group by custid
Try:
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= 1
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid
Demo: http://sqlfiddle.com/#!15/00422/11
===== EDIT ===========
Got "operator does not exist: interval <= integer"
This means that orderdate column is of type timestamp, not date.
In this case you need to use <= interval '1' day condition instead of <= 1:
Please see this link: https://www.postgresql.org/docs/9.0/static/functions-datetime.html to learn more about date arithmetic in PostgreSQL
Please see this demo:
http://sqlfiddle.com/#!15/7c2200/2
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= interval '1' day
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid