Combine pairs from two columns with dense_rank - postgresql

I have pairs of values that I'd like to group with common ID.
Sample data FIDDLE
create table sample_data(value_a integer, value_b integer);
insert into sample_data values
(100,200),
(400,500),
(400,600),
(800,900),
(800,1500),
(1000,800);
So far I tried this one, which works only when common value is value_a
select value_a as value, dense_rank() over(order by value_a) as group_id
from sample_data
UNION
select value_b as value, dense_rank() over(order by value_a) as group_id
from sample_data
order by 2,1
First two groups are fine, but I want last 3 rows from the table to be grouped together like this:
100, 1
200, 1
400, 2
500, 2
600, 2
800, 3
900, 3
1000, 3
1500, 3

Ok, I've made it using some way around by this sh*tty query
with sd2 as (
select value_a,value_b, dense_rank() over(order by value_a) as group_id
from sample_data sd
)
, pre_result as (
select sd2.value_a, sd2.value_b, (select group_id from sd2 s2 where
sd2.value_b = s2.value_a
or sd2.value_a = s2.value_b
or sd2.value_a = s2.value_a
or sd2.value_b = s2.value_b
order by 1 limit 1)
from sd2 )
select group_id, value_a as id from pre_result
union
select group_id, value_b as id from pre_result
order by 1,2

Related

Find the first five occurence of unique values and implement them into aggregation functions

As the title said, I need to find the first five occurence of each value in my table and then aggregate them.
Table structure:
user_id
booking_created_time
booking_paid_time
booking_price_amount
Code:
select x.user_id, row_number() over(partition by x.user_id order by x.booking_created_time)
as booking_sequence, x.booking_created_time::date as booking_created_date, x.booking_price_amount,
sum(y.booking_price_amount) as total_booking_price_amount from
(
select user_id, booking_created_time, booking_price_amount from fact_flight_sales
group by user_id, booking_created_time, booking_price_amount
) as x
join
(
select user_id, booking_price_amount
from fact_flight_sales group by user_id, booking_price_amount
) as y
on x.user_id = y.user_id
group by x.user_id, x.booking_created_time, x.booking_price_amount
having count(x.user_id) >= 1 and sum(y.booking_price_amount) >25000000
order by total_booking_price_amount desc, booking_sequence asc;
The output that I have now looks like
Sample output:
user_id
booking_sequence
booking_created_date
booking_price_amount
total_booking_price_amount
sforlongf
1
2017-05-21
8257056
39826576
sforlongf
2
2017-09-19
8449307
39826576
sforlongf
3
2018-01-08
8677950
39826576
sforlongf
4
2018-09-01
4317539
39826576
sforlongf
5
2018-09-16
6196224
39826576
sforlongf
6
2018-12-16
3928500
39826576
smassy0
1
2017-04-09
9109669
33241207
smassy0
2
2017-06-11
2609767
33241207
smassy0
3
2018-03-31
9809016
33241207
smassy0
4
2018-11-02
7223492
33241207
smassy0
5
2018-11-06
4489263
33241207
As you can see, sforlongf has 6 occurences, how do I limit it's occurence to 5 and make it affect the total_booking_price_amount?
I just can't get the table to work! Sorry about that! It works in preview :/
I managed to make it work by adding a number_row() into each subquery and limiting it
select x.user_id, row_number() over(partition by x.user_id order by x.booking_created_time)
as booking_sequence, x.booking_created_time::date as booking_created_date, x.booking_price_amount,
sum(y.booking_price_amount) as total_booking_price_amount from
(
select user_id, booking_created_time, booking_price_amount, row_number() over(partition by
user_id order by booking_created_time)
as booking_sequence from fact_flight_sales
group by user_id, booking_created_time, booking_price_amount
) as x
join
(
select user_id, booking_price_amount, row_number() over(partition by
user_id order by booking_created_time) as booking_sequence
from fact_flight_sales order by booking_created_time
) as y
on x.user_id = y.user_id where x.booking_sequence<=5 and y.booking_sequence<=5
group by x.user_id, x.booking_created_time, x.booking_price_amount
having count(x.user_id) >= 1 and sum(y.booking_price_amount) >25000000
order by total_booking_price_amount desc, booking_sequence asc;

Selecting the 1st and 10th Records Only

Have a table with 3 columns: ID, Signature, and Datetime, and it's grouped by Signature Having Count(*) > 9.
select * from (
select s.Signature
from #Sigs s
group by s.Signature
having count(*) > 9
) b
join #Sigs o
on o.Signature = b.Signature
order by o.Signature desc, o.DateTime
I now want to select the 1st and 10th records only, per Signature. What determines rank is the Datetime descending. Thus, I would expect every Signature to have 2 rows.
Thanks,
I would go with a couple of common table expressions.
The first will select all records from the table as well as a count of records per signature, and the second one will select from the first where the record count > 9 and add row_number partitioned by signature - and then just select from that where the row_number is either 1 or 10:
With cte1 AS
(
SELECT ID, Signature, Datetime, COUNT(*) OVER(PARTITION BY Signature) As NumberOfRows
FROM #Sigs
), cte2 AS
(
SELECT ID, Signature, Datetime, ROW_NUMBER() OVER(PARTITION BY Signature ORDER BY DateTime DESC) As Rn
FROM cte1
WHERE NumberOfRows > 9
)
SELECT ID, Signature, Datetime
FROM cte2
WHERE Rn IN (1, 10)
ORDER BY Signature desc
Because I don't know what your data looks like, this might need some adjustment.
The simplest way here, since you already know your sort order (DateTime DESC) and partitioning (Signature), is probably to assign row numbers and then select the rows you want.
SELECT *
FROM
(
select o.Signature
,o.DateTime
,ROW_NUMBER() OVER (PARTITION BY o.Signature ORDER BY o.DateTime DESC) [Row]
from (
select s.Signature
from #Sigs s
group by s.Signature
having count(*) > 9
) b
join #Sigs o
on o.Signature = b.Signature
order by o.Signature desc, o.DateTime
)
WHERE [Row] IN (1,10)

Selecting other columns not in count, group by

So I have a table as follows
product_id sender_id timestamp ...other columns...
1 2 1222
1 2 3423
1 2 1231
2 2 890
3 4 234
2 3 234234
I want to get rows where sender_id = 2, but I want to count and group by product_id and sort by timestamp descending. This means I need the following result
product_id sender_id timestamp count ...other columns...
1 2 3423 3
2 2 890 1
I tried the following query:
SELECT product_id, sender_id, timestamp, count(product_id), ...other columns...
FROM table
WHERE sender_id = 2
GROUP BY product_id
But I get the following error Error in query: ERROR: column "table.sender_id" must appear in the GROUP BY clause or be used in an aggregate function
Seems like I cannot SELECT columns that are not in the GROUP BY. Another method which I found online was to join
SELECT product_id, sender_id, timestamp, count, ...other columns...
FROM table
JOIN (
SELECT product_id, COUNT(product_id) AS count
FROM table
GROUP BY (product_id)
) table1 ON table.product_id = table1.product_id
WHERE sender_id = 2
GROUP BY product_id
But doing this simply lists all rows without grouping or counting. My guess is that the ON part simply extends table again.
Try grouping using product_id, sender_id
select product_id, sender_id, count(product_id), max(timestamp) maxtm
from t
where sender_id = 2
group by product_id, sender_id
order by maxtm desc
If you want other columns too:
select t.*, t1.product_count
from t
inner join (
select product_id, sender_id, count(product_id) product_count, max(timestamp) maxtm
from t
where sender_id = 2
group by product_id, sender_id
) t1
on t.product_id = t1.product_id and t.sender_id = t1.sender_id and t.timestamp = t1.maxtm
order by t1.maxtm desc
Just do a workout with your data:
CREATE TABLE products (product_id INTEGER,
sender_id INTEGER,
time_stamp INTEGER)
INSERT INTO products VALUES
(1,2,1222),
(1,2,3423),
(1,2,1231),
(2,2,890),
(3,4,234),
(2,3,234234)
SELECT product_id,sender_id,string_agg(time_stamp::text,','),count(product_id)
FROM products
WHERE sender_id=2
GROUP BY product_id,sender_id
Here you have distinct time_stamp ,so you need to apply some aggregate or just remove that column in select statement.
If you remove time_stamp in select statement then it would be very easy like below :
SELECT product_id,sender_id,count(product_id)
FROM products
WHERE sender_id=2
GROUP BY product_id,sender_id

Use WHERE statement in OVER()

I'm trying to create a query, which will give me a row_number for all the returned records. I can do that for all records present in the database. The problem is, i need to somehow retrieve a row number for a query with WHERE statement inside (WHERE posts.status = 'published').
My original query looks like that:
SELECT
posts.*,
row_number() over (ORDER BY posts.score DESC) as position
FROM posts
However, adding a where statement inside over() throws syntax error:
SELECT
posts.*,
row_number() over (
WHERE posts.status = 'published'
ORDER BY posts.score DESC
) as position
FROM posts
SELECT posts.*, row_number() over (ORDER BY posts.score DESC) as position
FROM posts
WHERE posts.status = 'published'
Not quite sure what you are after. Maybe show an example of expected output. Here is an an example of an approach:
create table posts(id int, score int, status text);
insert into posts values(1, 1, 'x');
insert into posts values(2, 2, 'published');
insert into posts values(3, 3, 'x');
insert into posts values(4, 4, 'x');
SELECT x.id, x.score, x.status
,CASE WHEN x.status = 'published' THEN null ELSE x.position END
FROM (SELECT posts.*,
row_number() OVER (ORDER BY posts.score DESC)
-SUM(CASE WHEN status = 'published' THEN 1 ELSE 0 END)
OVER (ORDER BY posts.score DESC) as position
FROM posts
) x
Result:
4 4 x 1
3 3 x 2
2 2 published
1 1 x 3

T-SQL group by partition

I have below table in SQL server 2008.Please help to get expected output
Thanks.
CREATE TABLE [dbo].[Test]([Category] [varchar](10) NULL,[Value] [int] NULL,
[Weightage] [int] NULL,[Rn] [smallint] NULL ) ON [PRIMARY]
insert into Test values ('Cat1',310,674,1),('Cat1',783,318,2),('Cat1',310,96,3),('Cat1',109,917,4),('Cat2',441,397,1),('Cat2',637,725,2),('Cat2',460,742,3),('Cat2',542,583,4),('Cat2',601,162,5),('Cat2',45,719,6),('Cat2',46,305,7),('Cat3',477,286,1),('Cat3',702,484,2),('Cat3',797,836,3),('Cat3',541,890,4),('Cat3',750,962,5),('Cat3',254,407,6),('Cat3',136,585,7),('Cat3',198,477,8),('Cat4',375,198,1),('Cat4',528,351,2),('Cat4',845,380,3),('Cat4',716,131,4),('Cat4',781,919,5)
For per category Average Weightage
SELECT
Category,
AVG(Value),
SUM(CASE WHEN RN<4 THEN Weightage ELSE 0 END) / (NULLIF(SUM(CASE WHEN RN<4 THEN 1 ELSE 0 END), 0))
FROM
MyTable
GROUP BY
Category
Average Weightage over the whole set
SELECT
M.Category,
AVG(Value),
foo.AvgWeightage
FROM
MyTable M
CROSS JOIN
(SELECT AVG(Weightage) As AvgWeightage FROM MyTable WHERE Rn < 4) foo
GROUP BY
M.Category, foo.AvgWeightage
Simple:)
SELECT Category,
AVG(Value) AS AvgValue,
AVG(CASE WHEN RN< 4 THEN (Weightage) END ) AS AvgWeightage
FROM Test
GROUP BY Category
Try this
SELECT AvgValue.Category, AvgValue.AvgValue, AvgWeight.Weight
FROM(
(SELECT c.Category,
AVG(c.Value) AS AvgValue
FROM Test c
GROUP BY Category) AvgValue
INNER JOIN
(SELECT Category, AVG(Weightage) AS Weight
FROM Test
WHERE Rn < 4
GROUP BY Category) AvgWeight
ON AvgValue.Category = AvgWeight.Category)