How can I remove the null values and make it to 10 rows in Postgresql? - postgresql

I am new to Postgresql. I have a table called 'sales'.
create table sales
(
cust varchar(20),
prod varchar(20),
day integer,
month integer,
year integer,
state char(2),
quant integer
)
insert into sales values ('Bloom', 'Pepsi', 2, 12, 2001, 'NY', 4232);
insert into sales values ('Knuth', 'Bread', 23, 5, 2005, 'PA', 4167);
insert into sales values ('Emily', 'Pepsi', 22, 1, 2006, 'CT', 4404);
insert into sales values ('Emily', 'Fruits', 11, 1, 2000, 'NJ', 4369);
insert into sales values ('Helen', 'Milk', 7, 11, 2006, 'CT', 210);
insert into sales values ('Emily', 'Soap', 2, 4, 2002, 'CT', 2549);
something like this:
Now I want to find the “most favorable” month (when most amount of the product was
sold) and the “least favorable” month (when the least amount of the product was sold) for each product.
The result should be like this:
I entered
SELECT
prod product,
MAX(CASE WHEN rn2 = 1 THEN month END) MOST_FAV_MO,
MAX(CASE WHEN rn1 = 1 THEN month END) LEAST_FAV_MO
FROM (
SELECT
*,
ROW_NUMBER() OVER(PARTITION BY prod ORDER BY quant ) rn1,
ROW_NUMBER() OVER(PARTITION BY prod ORDER BY quant DESC) rn2
FROM sales
) x
WHERE rn1 = 1 or rn2 = 1
GROUP BY prod,quant;
Then there are null values for each product and there are 20 rows in total:
So how can I remove the null values in these rows and make the total number of rows to 10 (There are 10 distinct products in total)???

I would say that the GROUP BY clause should be
GROUP BY prod
Otherwise you get one line per different quant, which is not what you want.

Related

Oracle SQL return value from child table with minimum row number with values in specific list

I have a need to select all rows from a table (main table) and join to another table (child table). In the results set, I want to include one column from the child table, that is only the first row / line number with a column value in a specified list. If there is no match for the specified list, it should be (null)
Desired Result:
ORDER_NO
ORDER_DATE
ORDER CUST
ORDER_VALUE
ITEM
1
02/14/2022
12345
$1,000.00
APPLES
2
02/13/2022
67890
$5,000.00
(null)
3
02/12/2022
45678
$100.00
PEARS
Example:
Main Table: Order Table
Order Number (Handle)
Order Date,
Order Customer,
Order Value
ORDER_NO
ORDER_DATE
ORDER CUST
ORDER_VALUE
1
02/14/2022
12345
$1,000.00
2
02/13/2022
67890
$5,000.00
3
02/12/2022
45678
$100.00
Child Table: Order Details Tbl
Order Number (Handle)
Line Number = Order Line No
Ordered Item,
Ordered Qty
ORDER_NO
LINE_NO
ITEM
1
10
APPLES
1
20
ORANGES
1
30
LETTUCE
2
10
BROCCOLI
2
20
CAULIFLOWER
2
30
LETTUCE
3
10
KALE
3
20
RADISHES
3
30
PEARS
In this example, the returned column is essentially the first line of the order that is a fruit, not a vegetable. And if the order includes no matching fruit, null is returned.
What my code is thus far:
SELECT
MAIN.ORDER_NO,
MAIN.ORDER_DATE,
MAIN.ORDER_CUST,
MAIN.ORDER_VALUE,
B.ITEM
FROM
MAIN
LEFT JOIN
(
SELECT
CHILD.ORDER_NO,
CHILD.LINE_NO,
CHILD.ITEM
FROM
CHILD
WHERE
CHILD.ORDER_NO||'_'||LINE_NO IN
(
SELECT
CHILD.ORDER_NO||'_'||MIN(LINE_NO) AS ORDER_LINE_NO
FROM
CHILD
WHERE
CHILD.ITEM IN ('APPLES','ORANGES','PEACHES','PEARS','GRAPES')
GROUP BY
CHILD.ORDER_NO
)
) B ON MAIN.ORDER_NO = B.ORDER_NO
'''
This code is of course not working as desired, as table 'B' is including all results from CHILD.
From Oracle 12, you can use:
SELECT o.*,
d.item
FROM orders o
LEFT OUTER JOIN LATERAL(
SELECT *
FROM order_details d
WHERE o.order_no = d.order_no
AND item IN ('APPLES','ORANGES','PEACHES','PEARS','GRAPES')
ORDER BY line_no ASC
FETCH FIRST ROW ONLY
) d
ON (1 = 1)
In earlier versions you can use:
SELECT o.*,
d.item
FROM orders o
LEFT OUTER JOIN(
SELECT d.*,
ROW_NUMBER() OVER (PARTITION BY order_no ORDER BY line_no ASC)
AS rn
FROM order_details d
WHERE item IN ('APPLES','ORANGES','PEACHES','PEARS','GRAPES')
) d
ON (o.order_no = d.order_no AND rn = 1)
Which, for the sample data:
CREATE TABLE orders (ORDER_NO, ORDER_DATE, ORDER_CUST, ORDER_VALUE) AS
SELECT 1, DATE '2022-02-14', 12345, 1000.00 FROM DUAL UNION ALL
SELECT 2, DATE '2022-02-13', 67890, 5000.00 FROM DUAL UNION ALL
SELECT 3, DATE '2022-02-12', 45678, 100.00 FROM DUAL;
CREATE TABLE Order_Details (ORDER_NO, LINE_NO, ITEM) AS
SELECT 1, 10, 'APPLES' FROM DUAL UNION ALL
SELECT 1, 20, 'ORANGES' FROM DUAL UNION ALL
SELECT 1, 30, 'LETTUCE' FROM DUAL UNION ALL
SELECT 2, 10, 'BROCCOLI' FROM DUAL UNION ALL
SELECT 2, 20, 'CAULIFLOWER' FROM DUAL UNION ALL
SELECT 2, 30, 'LETTUCE' FROM DUAL UNION ALL
SELECT 3, 10, 'KALE' FROM DUAL UNION ALL
SELECT 3, 20, 'RADISHES' FROM DUAL UNION ALL
SELECT 3, 30, 'PEARS' FROM DUAL;
Both output:
ORDER_NO
ORDER_DATE
ORDER_CUST
ORDER_VALUE
ITEM
1
2022-02-14 00:00:00
12345
1000
APPLES
2
2022-02-13 00:00:00
67890
5000
null
3
2022-02-12 00:00:00
45678
100
PEARS
db<>fiddle here

Cohort Analysis with RedShift by Month

I am trying to build a cohort analysis for monthly retention but experiencing challenge getting the Month Number column right. The month number is supposed to return month(s) user transacted i.e 0 for registration month, 1 for the first month after registration month, 2 for the second month until the last month but currently, it returns negative month numbers in some cells.
It should be like this table:
cohort_month total_users month_number percentage
---------- ----------- -- ------------ ---------
January 100 0 40
January 341 1 90
January 115 2 90
February 103 0 73
February 100 1 40
March 90 0 90
Here is the SQL:
with cohort_items as (
select
extract(month from insert_date) as cohort_month,
msisdn as user_id
from mfscore.t_um_user_detail where extract(year from insert_date)=2020
order by 1, 2
),
user_activities as (
select
A.sender_msisdn,
extract(month from A.insert_date)-C.cohort_month as month_number
from mfscore.t_wm_transaction_logs A
left join cohort_items C ON A.sender_msisdn = C.user_id
where extract(year from A.insert_date)=2020
group by 1, 2
),
cohort_size as (
select cohort_month, count(1) as num_users
from cohort_items
group by 1
order by 1
),
B as (
select
C.cohort_month,
A.month_number,
count(1) as num_users
from user_activities A
left join cohort_items C ON A.sender_msisdn = C.user_id
group by 1, 2
)
select
B.cohort_month,
S.num_users as total_users,
B.month_number,
B.num_users * 100 / S.num_users as percentage
from B
left join cohort_size S ON B.cohort_month = S.cohort_month
where B.cohort_month IS NOT NULL
order by 1, 3
I think the RANK window function is the right solution. So the idea is to assigne a rank to months of user activities for each user, order by year and month.
Something like:
WITH activity_per_user AS (
SELECT
user_id,
event_date,
RANK() OVER (PARTITION BY user_id ORDER BY DATE_PART('year', event_date) , DATE_PART('month', event_date) ASC) AS month_number
FROM user_activities_table
)
RANK number starts from 1, so you may want to substract 1.
Then, you can group by user_id and month_number to get the number of interactions for each user per month from the subscription (adapt to your use case accordingly).
SELECT
user_id,
month_number,
COUNT(1) AS n_interactions
FROM activity_per_user
GROUP BY 1, 2
Here is the documentation:
https://docs.aws.amazon.com/redshift/latest/dg/r_WF_RANK.html

Capture first character of last group of 1s in a binary series

I have a series something like this:
Month J F M A M J J A S O N D
Status 1 0 0 1 0 1 0 0 1 1 1 1
Using t-SQL, I am trying to capture the month corresponding to the first 1 in the last group of 1s, i.e., September in this example.
Here is the code I'm using:
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL DROP TABLE #Temp1
;WITH PARTITIONED1 AS
(SELECT , t0.ID
, t0.Year_Month
, t0.Status
, LAST_VALUE(t0.Year_Month) OVER (PARTITION BY t0.ID ORDER BY t0.Year_Month) AS D_YM
, ROW_NUMBER() OVER (PARTITION BY t0.ID ORDER BY t0.Year_Month) AS rn1
FROM #Temp0 t0
However, this just returns the first occurence of a 1; January here.
I really can't figure this one out, so any help would be very much appreciated.
Carefull with
although the ordering is performed in a previous stage
The previous sorting does not guarantee the later processing!
Try something like this. It is a very simple approach where you rely on gapless IDs:
DECLARE #tbl TABLE(ID INT IDENTITY,Mnth VARCHAR(100),[Status] TINYINT);
INSERT INTO #tbl VALUES
('J',1)
,('F',0)
,('M',0)
,('A',1)
,('M',0)
,('J',1)
,('J',0)
,('A',0)
,('S',1)
,('O',1)
,('N',1)
,('D',1);
SELECT a.*
FROM #tbl AS a
WHERE a.ID=(SELECT MAX(b.ID)+1 FROM #tbl AS b WHERE b.[Status]=0)
this can also be used :
select top 1 Month from table t where Status=1
and not exists
(select id from table t1 where stat=0 and t1.id>t.id)
order by t.id
I might have overcomplicated this but not knowing the table structure I put the below together:
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL DROP TABLE #Temp1
CREATE TABLE #Temp1
(
Jan int,
Feb int,
Mar int,
Apr int,
May int,
June int,
July int ,
Aug int,
Sep int,
Oct int,
Nov int,
Dec int
)
insert into #temp1
select
1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1
IF OBJECT_ID('tempdb..#monthTranslate') IS NOT NULL DROP TABLE #monthTranslate
create table #monthTranslate
(
MonthValue varchar(50),
MonthInt int
)
insert into #monthTranslate
select 'Jan',1
union all select 'Feb',2
union all select 'Mar',3
union all select 'Apr',4
union all select 'May',5
union all select 'June',6
union all select 'July',7
union all select 'Aug',8
union all select 'Sep',9
union all select 'OCt',10
union all select 'Nov',11
union all select 'Dec',12
--find the max month w\ 0 and add 1... becareful on null, it might return January incorrectly. I'd check for that in a a case statement
select max(b.MonthInt)+1
from
(
select
MonthPassVal, months , t.MonthInt
from
(
select Jan, Feb, Mar, Apr, May, June, July, Aug, Sep, Oct, Nov, Dec
from #temp1
) as r
Unpivot
(
MonthPassVal for Months
in (Jan, Feb, Mar, Apr, May, June, July, Aug, Sep, Oct, Nov, Dec)
) as u
inner join #monthTranslate t
on t.MonthValue = months
) as b
where
MonthPassVal=0

Count number of unique purchase dates

I have a log of purchases made by customers. Sometimes a customer purchases multiple items during a given purchase, other times they only purchase a single item. What I want to do, on a line by line basis, is identify which purchase events have happened (i.e. not on an item by item basis, but on a checkout by checkout basis).
Each row of the source database contains the following fields
cust_id, purchase_date, sku
So a customer who purchases three items during a given transaction would look like this
1, 01/01/01, dog1
1, 01/01/01, cat1
1, 01/01/01, mouse1
1, 01/02/01, wolf1
1, 01/03/01, lion1
WHat I want out is
cust_id, purchase_date, sku, item_purchase_number_within_purchase, unique_purchase_date_across_dates
And that would look like
1, 01/01/01, dog1, 1, 1
1, 01/01/01, cat1, 2, 1
1, 01/01/01, mouse1, 3, 1
1, 01/02/01, wolf1, 1, 2
1, 01/03/01, lion1, 1, 3
In words, on the first date, three items where purchased arbitrarily identified as purchase numbers, 1, 2, and 3, on the second purchase date (Jan 2nd, 2001), only a single item was purchase, but this was the second purchasing event, and then on the third purchasing date (Jan 3, 2001) there was another single item purchased.
I'm trying to do this in oracle10g. I'm not sure how to describe what I'm accomplishing.
This is the sql I have so far
SELECT
cust_id, purchase_date, sku, ROW_NUMBER() OVER (PARTITION BY purchase_date ORDER BY sku)
FROM
[table]
Thanks
You seem to want dense_rank() rather than row_number() (or rank()) to avoid gaps. With your sample data in a CTE:
with t (cust_id, purchase_date, sku) as (
select 1, date '2001-01-01', 'dog1' from dual
union all select 1, date '2001-01-01', 'cat1' from dual
union all select 1, date '2001-01-01', 'mouse1' from dual
union all select 1, date '2001-01-02', 'wolf1' from dual
union all select 1, date '2001-01-03', 'lion1' from dual
)
select cust_id, purchase_date, sku,
dense_rank() over (partition by cust_id, purchase_date order by sku)
as item_within_purchase,
dense_rank() over (partition by cust_id order by purchase_date)
as purchase_event
from t;
CUST_ID PURCHASE_D SKU ITEM_WITHIN_PURCHASE PURCHASE_EVENT
---------- ---------- ------ -------------------- --------------
1 2001-01-01 cat1 1 1
1 2001-01-01 dog1 2 1
1 2001-01-01 mouse1 3 1
1 2001-01-02 wolf1 1 2
1 2001-01-03 lion1 1 3
The first extra column is partition by both customer and date, and ordered by SKU as you had; the second is only partitioned by customer, and ordered by date.

generate new column using other columns

I am stuck with generating a new column. The table has three columns(C_ID, C_rank, Date).
C_ID C_ Rank NewColumn(Cycle) Date
42 A 1 October 14, 2010
42 B 1 October 26, 2010
42 A 2 February 16, 2011
43 A 1 December 17, 2010
44 A 1 July 28, 2010
44 B 1 August 10, 2010
44 A 2 January 11, 2011
44 B 2 January 28, 2011
45 A 1 July 30, 2010
45 B 1 August 9, 2010
45 B 1 September 24, 2010
45 A 2 April 5, 2011
45 B 2 April 26, 2011
I want to generate one more column called Cycle in such a way that for each C_ID, it should generate the number start from one and increment the number from next C_rank = 'A' (a shown above).
I tried using row_number, but no luck.
Maybe some loop option till next C_Rank = 'A' works.
How can this be done?
You should be able to get this done using ROW_NUMBER() and PARTITION BY
;WITH YourDataCTE AS
(
SELECT
C_ID, C_Rank, Date,
ROW_NUMBER() OVER(PARTITION BY C_ID,C_Rank ORDER BY Date DESC) AS 'Cycle'
FROM
dbo.YourTable
)
SELECT *
FROM YourDataCTE
Does that do what you're looking for??
The PARTITION BY C_ID,C_Rank will cause the ROW_NUMBER to start at 1 again for each different value of C_ID,C_Rank - I didn't know what ORDER BY clause within a single partition (a single value of C_ID,C_Rank) you're looking for and just guessed it might be Date DESC (newest date first).
You could count the number of previous A's in a subquery:
select *
, (
select count(*)
from #YourTable yt2
where yt2.C_ID = yt1.C_ID
and yt2.C_Rank = 'A'
and yt2.Date <= yt1.Date
) as Cycle
from #YourTable yt1
order by
C_ID, Date
Example at ODATA.
Do a self join for all records with the same C_ID, a previous date, and a C_Rank='A' and count them.
select t1.C_ID, t1.C_Rank, count(t2.C_Rank) Cycle, t1.Date
from MyTable t1
left join MyTable t2 on t1.C_ID=t2.C_ID
and t2.Date<=t1.Date
and t2.C_Rank='A'
group by t1.C_ID, t1.C_Rank, t1.Date
order by t1.C_ID, t1.Date
Below code fulfill the requirement:
create table #Temp_Table
(
C_ID int
, C_Rank char(1)
, Date datetime
, NewColumn int
)
insert into #Temp_Table
(
C_ID
, C_Rank
, Date
)
select 42, ‘A’, ’10/14/2010′
union all
select 42, ‘B’, ’10/26/2010′
union all
select 42, ‘B’, ’10/14/2010′
union all
select 42, ‘C’, ’10/26/2010′
union all
select 42, ‘A’,’02/16/2011′
union all
select 43, ‘A’, ’12/17/2010′
union all
select 44, ‘A’, ’07/28/2010′
union all
select 44, ‘B’, ’08/10/2010′
union all
select 44, ‘A’, ’01/11/2011′
union all
select 44, ‘B’, ’01/28/2011′
union all
select 44, ‘C’, ’10/14/2010′
union all
select 44, ‘D’, ’10/26/2010′
Select ‘Original Data’ Comment
,*
from #Temp_Table
/*
This would be Actual Script to get the New ID based on information you provided
*/
Declare #Count int
,#C_ID int
,#C_Rank char(1)
,#total_Count int
,#Count_Partition int
,#Previous_ID int
Declare #Table Table (ID int IDENTITY(1,1), C_ID int, C_Rank char(1), Date datetime, NewColumn int )
Set #Count = 1
Set #Count_Partition = 0
insert into #Table
Select *
from #Temp_Table
Select #total_Count = ISNULL(MAX(ID),0)
from #Table
While #Count < = #total_Count
Begin
Select #C_ID = C_ID
,#C_Rank = C_Rank
From #Table
Where ID = #Count
If #Count = 1
Set #Previous_ID = #C_ID
If #Previous_ID != #C_ID
Set #Count_Partition = 1
Else If #C_Rank = 'A'
Set #Count_Partition = #Count_Partition + 1
update #Table
Set NewColumn = #Count_Partition
Where ID = #Count
Set #Previous_ID = #C_ID
Set #Count = #Count + 1
End
Select C_ID
, C_Rank
, [Date]
, NewColumn
from #Table
–Drop table #Temp_Table