Postgresql, Get the top 5 products that have increased in value from yesterday to today, returning the delta - postgresql

I have a pricing table that contains the pricing data for products. There are around 600 unique product_id, each currently having 4 days worth of pricing data, which will eventually go up to 30 days. The table below is a small subset of the data to represent that table structure:
date
product_id
price_trend
2022-08-21
1
0.08
2022-08-22
1
0.18
2022-08-23
1
0.30
2022-08-21
2
0.15
2022-08-22
2
0.20
2022-08-23
2
0.22
So in my script, for each product_id I am trying to get yesterdays price_trend and todays price_trend and then calculate the price_delta between the two. I then order by price_delta and limit the results to 5.
I am having some issues as in some cases yesterdays price_trend is 0 and then todays price trend is 0.50 for example. This does not mean that the price trend has increased, but mostly likely that price_trend was not gathered yesterday for whatever reason.
Now I would like to remove any records where price_trend for today or yesterday equals 0, however, when I add AND pricing.trend_price > 0 the value return is just null instead.
Script:
SELECT
magic_sets_cards.name,
(SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) - INTERVAL '2 DAY' FROM pricing)
AND pricing.trend_price > 0) AS price_yesterday,
(SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) FROM pricing)
AND pricing.trend_price > 0) AS price_today,
((SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) FROM pricing)) -
(SELECT pricing.trend_price
FROM pricing
WHERE pricing.product_id = magic_sets_cards_identifiers.mcm_id
AND pricing.date = (SELECT MAX(date) - INTERVAL '2 DAY' FROM pricing))) AS price_delta
FROM magic_sets
JOIN magic_sets_cards ON magic_sets_cards.set_id = magic_sets.id
JOIN magic_sets_cards_identifiers ON magic_sets_cards_identifiers.card_id = magic_sets_cards.id
JOIN pricing ON pricing.product_id = magic_sets_cards_identifiers.mcm_id
WHERE magic_sets.code = '2X2'
AND pricing.date = (SELECT MAX(date) FROM pricing)
ORDER BY price_delta DESC
LIMIT 5
Results:
name
price_yesterday
price_today
price_delta
"Fiery Justice"
null
0.50
0.50
"Hostage Taker"
3.50
4.00
0.50
"Damnation"
17.02
17.33
0.31
"Bring to Light"
0.42
0.72
0.30
"City of Brass"
17.41
17.68
0.27
I would like to get it so that the "Fiery Justice" in this example is just ignored.

with the use of rank() you can get the output ., Look into...
Query without null rows :
with cte as (Select
product_id,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE null END) today,
SUM(CASE WHEN rank = 2 THEN price_trend ELSE null END) yesterday,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE 0 END) -
SUM(CASE WHEN rank = 2 THEN price_trend ELSE 0 END) as diff
FROM (
SELECT
product_id,
price_trend,
date,
rank() OVER (PARTITION BY product_id ORDER BY date DESC) as rank
FROM tableName where price_trend>0 and date between current_date-5 and current_date-4) p
WHERE rank in (1,2)
GROUP BY product_id
) select * from cte where (case when today is null or yesterday is null then 'NULL' else 'VALID' end)!='NULL'
Query with null values :
Select
product_id,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE 0 END) today,
SUM(CASE WHEN rank = 2 THEN price_trend ELSE 0 END) yesterday,
SUM(CASE WHEN rank = 1 THEN price_trend ELSE 0 END) -
SUM(CASE WHEN rank = 2 THEN price_trend ELSE 0 END) as diff
FROM (
SELECT
product_id,
price_trend,
date,
rank() OVER (PARTITION BY product_id ORDER BY date DESC) as rank
FROM tableName where date between current_date-5 and current_date-4) p
WHERE rank in (1,2)
GROUP BY product_id
Change the condition :
where date between current_date-3 and current_date-2
OUTPUT :
product_id today yesterday diff
1 0.06 0.02 0.04
2 0.64 0.62 0.02
CREATE TABLE tableName
(
date date,
product_id int,
price_trend numeric(9,2)
);
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-21 ', '1 ', '0.02');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-22 ', '1 ', '0.06');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-23 ', '1 ', '0.10');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-24 ', '1 ', '0.13');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-25 ', '1 ', '0.18');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-26 ', '1 ', '0.30');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-21 ', '2 ', '0.62');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-22 ', '2 ', '0.64');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-23 ', '2 ', '0.69');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-24 ', '2 ', '0.78');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-25 ', '2 ', '0.88');
INSERT INTO tableName (date ,product_id ,price_trend) VALUES ('2022-08-26 ', '2 ', '0.90');

Related

Window Function To Calculate Closing Percentage

I am working with a query in PSQL and I am trying to use a window function to divide two other window function counts. This is what I currently have:
WITH month_cte as ( Select generate_series(date_trunc('month', current_date) - interval '12' month, date_trunc('month', current_date), interval '1' month) as month_year
)
select DISTINCT ON (month_year, q.rep_name) month_cte.*, q.*
FROM month_CTE
LEFT JOIN (
select *,
CASE
WHEN date_quoted IS NOT NULL THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))
ELSE NULL
END as month_quotes,
CASE WHEN edocs_signed_date IS NOT NULL THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))
ELSE NULL
END as month_sales,
CASE
WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))/
COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))* 100.0 AS numeric)
ELSE NULL
END as month_closing
FROM quote_report_view
ORDER BY rep_name, edocs_signed_date, date_quoted
) q
ON (date_trunc('month', q.date_quoted) = month_cte.month_year OR date_trunc('month', q.edocs_signed_date) = month_cte.month_year)
ORDER BY month_year, rep_name, month_quotes, month_sales
The line that I am trying to get to work is the 3rd Case:
CASE WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))/
COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))* 100.0 AS numeric)
ELSE NULL
END as month_closing
I am basically trying to divide the 2nd count window function by the 1st count window function and get a percentage for month_closing.
These are my current results:
"2020-08-01 00:00:00-04" 869272 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Jesus" 1 1 100.0
"2020-08-01 00:00:00-04" 875518 "2020-08-19 00:00:00" "2020-09-01 00:00:00" "Jim" 36 1 0.0
"2020-08-01 00:00:00-04" 876462 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Nick" 39 12 0.0
"2020-08-01 00:00:00-04" 873572 "2020-08-04 00:00:00" "2020-08-04 00:00:00" "Piero" 63 36 0.0
I am only getting either 0.00 or 1.00 in my last column where I am trying to calculate the closing percentage. How can I make this work to get a true percentage?
Thanks!
what is happening is that for ex using your data db engine first calculate division integer / integer ( ex 1/ 36 which result is 0 with data type of integer) then it does the multiply 0 * 100.0 ( integer * numeric which output data type is numeric but the result is 0.00
so either cast first count(*) to numeric
or multiply it by 1.0
or if you are calculating percentage multiply first count(*) by 100.00 first like so:
WITH month_cte as ( Select generate_series(date_trunc('month', current_date) - interval '12' month, date_trunc('month', current_date), interval '1' month) as month_year
)
select DISTINCT ON (month_year, q.rep_name) month_cte.*, q.*
FROM month_CTE
LEFT JOIN (
select *,
CASE
WHEN date_quoted IS NOT NULL
THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted))
ELSE NULL
END as month_quotes,
CASE WHEN edocs_signed_date IS NOT NULL
THEN COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date))
ELSE NULL
END as month_sales,
CASE WHEN date_quoted IS NOT NULL And edocs_signed_date IS NOT NULL
THEN CAST(COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', edocs_signed_date)) * 100.0
/ COUNT(*) OVER (PARTITION BY rep_name, date_trunc('month', date_quoted)) AS numeric)
ELSE NULL
END as month_closing
FROM quote_report_view
ORDER BY rep_name, edocs_signed_date, date_quoted
) q
ON (date_trunc('month', q.date_quoted) = month_cte.month_year OR date_trunc('month', q.edocs_signed_date) = month_cte.month_year)
ORDER BY month_year, rep_name, month_quotes, month_sales

Generating series Postgres

I want to be able to generate groups of row by days, weeks, month or depending on the interval I set
Following this solution, it works when granularity is by month. But trying the interval of 1 week, no records are being returned.
This is the rows on my table
This is the current query I have for per month interval, which works perfectly.
SELECT *
FROM (
SELECT day::date
FROM generate_series(timestamp '2018-09-01'
, timestamp '2018-12-01'
, interval '1 month') day
) d
LEFT JOIN (
SELECT date_trunc('month', created_date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date >= date '2018-09-01'
AND created_date <= date '2018-12-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
ORDER BY day;
Result from this query
And this is the per week interval query. I will reduce the range to two months for brevity.
SELECT *
FROM (
SELECT day::date
FROM generate_series(timestamp '2018-09-01'
, timestamp '2018-11-01'
, interval '1 week') day
) d
LEFT JOIN (
SELECT date_trunc('week', created_date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date >= date '2018-09-01'
AND created_date <= date '2018-11-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
ORDER BY day;
Take note that I have records from October, but the result here doesn't show anything for October dates.
Any idea what I am missing here?
Results from your first query are not truncated to the begin of the week.
date_trunc('2018-09-01'::date, 'week')::date
is equal to
'2018-08-27'::date
so your join using day is not working
'2018-09-01'::date <> '2018-08-27'::date
Your query should look more like that:
SELECT *
FROM (
SELECT day::date
FROM generate_series(date_trunc('week',timestamp '2018-09-01') --series begin trunc
, timestamp '2018-11-01'
, interval '1 week') day
) d
LEFT JOIN (
SELECT date_trunc('week', created_date::date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date::date >= date '2018-09-01'
AND created_date::date <= date '2018-11-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
WHERE day >= '2018-09-01' --to skip days from begining of the week to the begining of the series before trunc
ORDER BY day;

Query tuning postgres

I'm trying to reduce response time for below postgresql query from current 5 seconds to 1...attaching explain plan too for this query..please help...
(
SELECT
1 AS RowNumber
,'Total Countries' AS RowLabel
,COUNT(DISTINCT ITS.abc CountryTrading) AS Aggregation
FROM ObjectViews.abc InstnTradeSummary AS ITS
WHERE ITS.KeyInstn = 7402194
AND ITS.TradeDataMonthYearPublish >= date_trunc('month', current_date) + interval '-5 years'
AND ITS.TradeDataMonthYearPublish <= date_trunc('month', current_date)
AND ITS.abc CountryTrading IS NOT NULL
GROUP BY ITS.KeyInstn
UNION
SELECT
2 AS RowNumber
,'Total Shipments' AS RowLabel
,SUM(ITS.ShipmentCount) AS TotalShipments
FROM ObjectViews.abc InstnTradeSummary AS ITS
WHERE ITS.KeyInstn = 7402194
AND ITS.TradeDataMonthYearPublish >= date_trunc('month', current_date) + interval '-5 years'
AND ITS.TradeDataMonthYearPublish <= date_trunc('month', current_date)
GROUP BY ITS.KeyInstn
UNION
SELECT
3 AS RowNumber
,'Total Weight in kg' AS RowLabel
,SUM(COALESCE(ITS.ShipmentWeightAR, ITS.ShipmentWeightEst)) AS TotalShipmentWeight
FROM ObjectViews.abc InstnTradeSummary AS ITS
WHERE ITS.KeyInstn = 7402194
AND ITS.TradeDataMonthYearPublish >= date_trunc('month', current_date) + interval '-5 years'
AND ITS.TradeDataMonthYearPublish <= date_trunc('month', current_date)
GROUP BY ITS.KeyInstn
UNION
SELECT
4 AS RowNumber
,'Total Volume in TEU' AS RowLabel
,SUM(COALESCE(ITS.ShipmentVolumeAR, ITS.ShipmentVolumeEst)) AS TotalShipmentVolume
FROM ObjectViews.abc InstnTradeSummary AS ITS
WHERE ITS.KeyInstn = 7402194
AND ITS.TradeDataMonthYearPublish >= date_trunc('month', current_date) + interval '-5 years'
AND ITS.TradeDataMonthYearPublish <= date_trunc('month', current_date)
GROUP BY ITS.KeyInstn
) ORDER BY RowNumber
Below is explain plan for the query...
https://explain.depesz.com/s/xgC2
Read the table once, do your formatting after:
SELECT
v.row_number,
v.row_label,
CASE v.row_number
WHEN 1 THEN s.total_countries
WHEN 2 THEN s.total_shipments
WHEN 3 THEN s.total_shipment_weight
ELSE s.total_shipment_volume
END AS total
FROM (
VALUES
(1, 'Total Countries'),
(2, 'Total Shipments'),
(3, 'Total Weight in kg'),
(4, 'Total Volume in TEU')
) AS v(row_number, row_label)
LEFT JOIN (
SELECT
COUNT(DISTINCT ITS.abc CountryTrading) FILTER (WHERE ITS.abc CountryTrading IS NOT NULL) AS total_countries,
SUM(ITS.ShipmentCount) AS total_shipments,
SUM(COALESCE(ITS.ShipmentWeightAR, ITS.ShipmentWeightEst)) AS total_shipment_weight,
SUM(COALESCE(ITS.ShipmentVolumeAR, ITS.ShipmentVolumeEst)) AS total_shipment_volume
FROM ObjectViews.abc InstnTradeSummary AS ITS
WHERE ITS.KeyInstn = 7402194
AND ITS.TradeDataMonthYearPublish >= date_trunc('month', current_date) + interval '-5 years'
AND ITS.TradeDataMonthYearPublish <= date_trunc('month', current_date)
GROUP BY ITS.KeyInstn
) AS s ON TRUE
ORDER BY v.row_number

Find date sequence in PostgreSQL

I'm trying to find the maximum sequence of days by customer in my data. I want to understand what is the max sequence of days that specific customer made. If someone enter to my app in the 25/8/16 AND 26/08/16 AND 27/08/16 AND 01/09/16 AND 02/09/16 - The max sequence will be 3 days (25,26,27).
In the end (The output) I want to get two fields: custid | MaxDaySequence
I have the following fields in my data table: custid | orderdate(timestemp)
For exmple:
custid orderdate
1 25/08/2007
1 03/10/2007
1 13/10/2007
1 15/01/2008
1 16/03/2008
1 09/04/2008
2 18/09/2006
2 08/08/2007
2 28/11/2007
2 04/03/2008
3 27/11/2006
3 15/04/2007
3 13/05/2007
3 19/06/2007
3 22/09/2007
3 25/09/2007
3 28/01/2008
I'm using PostgreSQL 2014.
Thanks
Trying:
select custid, max(num_days) as longest
from (
select custid,rn, count (*) as num_days
from (
select custid, date(orderdate),
cast (row_number() over (partition by custid order by date(orderdate)) as varchar(5)) as rn
from table_
) x group by custid, CURRENT_DATE - INTERVAL rn|| ' day'
) y group by custid
Try:
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= 1
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid
Demo: http://sqlfiddle.com/#!15/00422/11
===== EDIT ===========
Got "operator does not exist: interval <= integer"
This means that orderdate column is of type timestamp, not date.
In this case you need to use <= interval '1' day condition instead of <= 1:
Please see this link: https://www.postgresql.org/docs/9.0/static/functions-datetime.html to learn more about date arithmetic in PostgreSQL
Please see this demo:
http://sqlfiddle.com/#!15/7c2200/2
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= interval '1' day
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid

Yearly total for items over multiple years

I am trying to find the total per year
For example
Start date End Date Total Value
1 07/01/14 01/01/15 $10,000
2 08/01/13 12/01/14 $10,000
3 03/01/13 05/01/15 $10,000
As you can see, Some items are over multiple years. Is there a way to find out what the total value is per year.
Solution should be:
item 3
2013- $3600
2014-$4800
2015-1600
Then a summation would be down for all three items to give a yearly total.
What I have so far:
I have a rolling summation code which is shown below.
case when
(
[begin date] >= dateadd(mm,0,DATEADD(mm,DATEDIFF(mm,0,getdate()),0))
and [end date] >= dateadd(mm,0,DATEADD(mm,DATEDIFF(mm,0,getdate()),0))
)
OR
(
[Begin Date] < dateadd(mm,0,DATEADD(mm,DATEDIFF(mm,0,getdate()),0))
and [End Date] >= dateadd(mm,0,DATEADD(mm,DATEDIFF(mm,0,getdate()),0))
)
then [Totalvalue]/nullif(DATEDIFF(mm,[begin date],[end date]),0)
else 0
end [Current Month]
I dono how you got that total values for item 3
but for item 3 i hope it should be
2013 = 3704
2014 = 4444
2015 = 1852
Dono how efficient this code is just have a try
CREATE TABLE #tblName
(
itemid INT,
startdate DATETIME,
endate DATETIME,
value int
)
INSERT INTO #tblName
VALUES (1,'2014/07/01','2015/01/01',10000),
(2,'2013/08/01','2014/12/01',10000),
(3,'2013/03/01','2015/05/01',10000)
DECLARE #mindate DATETIME,
#maxdate DATETIME
SELECT #mindate = Min(startdate),
#maxdate = Max(endate)
FROM #tblName
SELECT *
FROM #tblName;
WITH cte
AS (SELECT #mindate startdate
UNION ALL
SELECT Dateadd(mm, 1, startdate) startdate
FROM cte
WHERE startdate <= Dateadd(mm, -1, #maxdate))
SELECT a.value * ( ( convert(numeric(22,6),a.cnt) / convert(numeric(22,6),c.total) ) * 100 ) / 100,a.itemid,a.startdate
FROM (SELECT Avg(value) value,
Count(1) cnt,
itemid,
Year(a.startdate) startdate
FROM cte a
JOIN #tblName b
ON a.startdate BETWEEN b.startdate AND b.endate
GROUP BY itemid,
Year(a.startdate)) a
JOIN(SELECT Sum(cnt) total,
itemid
FROM (SELECT Avg(value) value,
Count(1) cnt,
itemid,
Year(a.startdate) startdate
FROM cte a
JOIN #tblName b
ON a.startdate BETWEEN b.startdate AND b.endate
GROUP BY itemid,
Year(a.startdate)) B
GROUP BY itemid) C
ON a.itemid = c.itemid
WHERE a.itemid = 3