how to find three records with highest average? - tsql

I have to find highest average for 3 shipped countries:
select shipcountry, AVG(freight) as "avgfreight"
from sales.orders where year(shippeddate)=2007
group by shipcountry
order by 2 desc
I am not able to use top command to find top 3 averages. any pointers will be highly appreciated.

Here's one way using a subquery:
select top 3 shipcountry, avgfreight
from (
select shipcountry, avg(freight) avgfreight
from sales.orders
where year(shppeddate) = 2007
group by shipcountry
) t
order by avgfreight desc

Using CTE
;With cte as (
select shipcountry, AVG(freight) avgfreight
from sales.orders
where year(shippeddate)=2007
group by shipcountry
)
select top(3) shipcountry, avgfreight
from cte
order by avgfreight desc

Try
set rowcount 3
select shipcountry, AVG(freight) as "avgfreight"
from sales.orders where year(shippeddate)=2007
group by shipcountry
order by 2 desc
This will limit the number of rows returned (for every query executed on that connection).
If you're re-using the connection (issuing more statements etc) you'll want to reset rowcount when you're done.
e.g.
set rowcount 3
select shipcountry, AVG(freight) as "avgfreight"
from sales.orders where year(shippeddate)=2007
group by shipcountry
order by 2 desc
set rowcount 0

select top 3 shipcountry, AVG(freight) as "avgfreight"
from sales.orders
where year(shippeddate)=2007
group by shipcountry
order by AVG(freight) desc

try this:
declare #sales_orders table
(
shipcountry varchar(max),
freight int,
shippeddate datetime
)
insert into #sales_orders values
('India', '2000', dateadd(yy,-6, getutcdate())),
('India', '2100', dateadd(yy,-6, getutcdate())),
('India', '2500', dateadd(yy,-6, getutcdate())),
('SriLanka', '1000', dateadd(yy,-6, getutcdate())),
('SriLanka', '1500', dateadd(yy,-6, getutcdate())),
('SriLanka', '1200', dateadd(yy,-6, getutcdate())),
('China', '500', dateadd(yy,-6, getutcdate())),
('China', '1000', dateadd(yy,-6, getutcdate())),
('China', '900', dateadd(yy,-6, getutcdate())),
('USA', '100', dateadd(yy,-6, getutcdate())),
('USA', '200', dateadd(yy,-6, getutcdate())),
('USA', '600', dateadd(yy,-6, getutcdate()))
;with cte
as
(
select shipcountry, AVG(freight) as avgfreight
from #sales_orders where year(shippeddate)=2007
group by shipcountry
)
select top 3 avgfreight,shipcountry from cte order by avgfreight desc

Related

Combine pairs from two columns with dense_rank

I have pairs of values that I'd like to group with common ID.
Sample data FIDDLE
create table sample_data(value_a integer, value_b integer);
insert into sample_data values
(100,200),
(400,500),
(400,600),
(800,900),
(800,1500),
(1000,800);
So far I tried this one, which works only when common value is value_a
select value_a as value, dense_rank() over(order by value_a) as group_id
from sample_data
UNION
select value_b as value, dense_rank() over(order by value_a) as group_id
from sample_data
order by 2,1
First two groups are fine, but I want last 3 rows from the table to be grouped together like this:
100, 1
200, 1
400, 2
500, 2
600, 2
800, 3
900, 3
1000, 3
1500, 3
Ok, I've made it using some way around by this sh*tty query
with sd2 as (
select value_a,value_b, dense_rank() over(order by value_a) as group_id
from sample_data sd
)
, pre_result as (
select sd2.value_a, sd2.value_b, (select group_id from sd2 s2 where
sd2.value_b = s2.value_a
or sd2.value_a = s2.value_b
or sd2.value_a = s2.value_a
or sd2.value_b = s2.value_b
order by 1 limit 1)
from sd2 )
select group_id, value_a as id from pre_result
union
select group_id, value_b as id from pre_result
order by 1,2

SQL Debugging Help Needed

I am writing a query in Redshift to answer the question "Give the average lifetime spend of users who spent more on their first order than their second order." This is based off of an order_items table which has one row for every item ordered (so an order with 3 items would be represented in 3 rows). Here's a snapshot of the first 10 rows:
First 10 rows of order_items:
Here is my solution:
with
cte1_lifetime as (
select oi.user_id, sum(oi.sale_price) as lifetime_spend
from order_items as oi
group by oi.user_id
),
cte2_order as (
select oi.user_id, oi.order_id, sum(oi.sale_price) as order_total, rank() over(partition by oi.user_id order by oi.created_at) as order_rank
from order_items as oi
group by oi.user_id, oi.order_id, oi.created_at
order by oi.user_id, oi.order_id
),
cte3_first_order as (
select user_id, order_id, order_total
from cte2_order
where order_rank=1
order by user_id, order_id
),
cte4_second_order as (
select user_id, order_id, order_total
from cte2_order
where order_rank=2
order by user_id, order_id
)
select avg(cte1.lifetime_spend) as average_lifetime_spend
from cte1_lifetime as cte1
where exists (
select *
from cte3_first_order as cte3, cte4_second_order as cte4
where cte3.user_id=cte4.user_id
and cte1.user_id=cte3.user_id
and cte3.order_total > cte4.order_total)
And here is the answer key:
WITH
table1 AS
(SELECT user_id, order_id,
SUM(sale_price) OVER (PARTITION BY order_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as order_total,
RANK() OVER (PARTITION BY user_id ORDER BY created_at) AS "sequence"
FROM order_items)
,
table2 AS
(SELECT user_id, SUM(sale_price) AS lifetime_spend
FROM order_items
WHERE EXISTS
(SELECT t1.user_id
FROM table1 t1, table1 t2
WHERE t1.user_id = t2.user_id AND t1.sequence = 1 AND t2.sequence = 2 AND t1.order_total>t2.order_total
AND t1.user_id = order_items.user_id)
GROUP BY 1
ORDER BY 1)
SELECT AVG(lifetime_spend)
FROM table2
These answers yield slightly different results on the same data- an average lifetime spend of $215 vs $220. I'd really like to understand why they are different but so far I can't figure it out. Any ideas?

Find the first five occurence of unique values and implement them into aggregation functions

As the title said, I need to find the first five occurence of each value in my table and then aggregate them.
Table structure:
user_id
booking_created_time
booking_paid_time
booking_price_amount
Code:
select x.user_id, row_number() over(partition by x.user_id order by x.booking_created_time)
as booking_sequence, x.booking_created_time::date as booking_created_date, x.booking_price_amount,
sum(y.booking_price_amount) as total_booking_price_amount from
(
select user_id, booking_created_time, booking_price_amount from fact_flight_sales
group by user_id, booking_created_time, booking_price_amount
) as x
join
(
select user_id, booking_price_amount
from fact_flight_sales group by user_id, booking_price_amount
) as y
on x.user_id = y.user_id
group by x.user_id, x.booking_created_time, x.booking_price_amount
having count(x.user_id) >= 1 and sum(y.booking_price_amount) >25000000
order by total_booking_price_amount desc, booking_sequence asc;
The output that I have now looks like
Sample output:
user_id
booking_sequence
booking_created_date
booking_price_amount
total_booking_price_amount
sforlongf
1
2017-05-21
8257056
39826576
sforlongf
2
2017-09-19
8449307
39826576
sforlongf
3
2018-01-08
8677950
39826576
sforlongf
4
2018-09-01
4317539
39826576
sforlongf
5
2018-09-16
6196224
39826576
sforlongf
6
2018-12-16
3928500
39826576
smassy0
1
2017-04-09
9109669
33241207
smassy0
2
2017-06-11
2609767
33241207
smassy0
3
2018-03-31
9809016
33241207
smassy0
4
2018-11-02
7223492
33241207
smassy0
5
2018-11-06
4489263
33241207
As you can see, sforlongf has 6 occurences, how do I limit it's occurence to 5 and make it affect the total_booking_price_amount?
I just can't get the table to work! Sorry about that! It works in preview :/
I managed to make it work by adding a number_row() into each subquery and limiting it
select x.user_id, row_number() over(partition by x.user_id order by x.booking_created_time)
as booking_sequence, x.booking_created_time::date as booking_created_date, x.booking_price_amount,
sum(y.booking_price_amount) as total_booking_price_amount from
(
select user_id, booking_created_time, booking_price_amount, row_number() over(partition by
user_id order by booking_created_time)
as booking_sequence from fact_flight_sales
group by user_id, booking_created_time, booking_price_amount
) as x
join
(
select user_id, booking_price_amount, row_number() over(partition by
user_id order by booking_created_time) as booking_sequence
from fact_flight_sales order by booking_created_time
) as y
on x.user_id = y.user_id where x.booking_sequence<=5 and y.booking_sequence<=5
group by x.user_id, x.booking_created_time, x.booking_price_amount
having count(x.user_id) >= 1 and sum(y.booking_price_amount) >25000000
order by total_booking_price_amount desc, booking_sequence asc;

PostgreSQL pass value into INNER JOIN

PostgreSQL 11
How to pass o.create_date value into INNER JOIN? I need Max ID before o.create_date
SELECT o.id,
o.create_date date,
sum(oi.quantity) qty,
sum(oi.quantity * sp.price) total
FROM ax_order o
LEFT JOIN ax_order_invenotry oi on o.id = oi.order_id
LEFT JOIN ax_inventory i on i.id = oi.inventory_id
LEFT JOIN ax_suppliers s on s.id = o.supplier_id
INNER JOIN ax_supplier_price sp ON (sp.inventory_id = oi.inventory_id and sp.supplier_id = o.supplier_id)
INNER JOIN
(
SELECT inventory_id,
max(id) id
FROM ax_supplier_price
WHERE create_date <= o.create_date
GROUP BY inventory_id
) lsp ON (sp.id = lsp.id)
WHERE o.store_id = 13
AND o.supplier_id = 35
GROUP BY o.id, o.create_date
ORDER BY o.id
You could use the LATERAL join mechanism to make it work:
WITH ax_order AS (
SELECT *
FROM (VALUES (1, '2000-1-1'::date, 1, 1)) as x(id, create_date, store_id, supplier_id)
), ax_order_inventory AS (
SELECT *
FROM (VALUES (1, 2, 4)) as x(order_id, inventory_id, quantity)
), ax_supplier_price AS (
SELECT *
FROM (VALUES (1, 2, 1, 10, '1999-12-31'::date)) as x(id, inventory_id, supplier_id, price, create_date)
)
SELECT o.id,
o.create_date date,
sum(oi.quantity) qty,
sum(oi.quantity * sp.price) total
FROM ax_order o
LEFT JOIN ax_order_inventory oi on o.id = oi.order_id
INNER JOIN ax_supplier_price sp ON (sp.inventory_id = oi.inventory_id and sp.supplier_id = o.supplier_id)
INNER JOIN LATERAL
(
SELECT inventory_id,
max(lsp.id) id
FROM ax_supplier_price lsp
WHERE sp.create_date <= o.create_date
GROUP BY inventory_id
) lsp ON sp.id = lsp.id
GROUP BY o.id, o.create_date
ORDER BY o.id
I deleted some JOINs that were not strictly necessary and mocked your data as well as I could see. Note, however, that you could also use a WHERE clause to find it - which should be more efficient:
WITH ax_order AS (
SELECT *
FROM (VALUES (1, '2000-1-1'::date, 1, 1)) as x(id, create_date, store_id, supplier_id)
),
ax_order_inventory AS (
SELECT *
FROM (VALUES (1, 2, 4)) as x(order_id, inventory_id, quantity)
),
ax_supplier_price AS (
SELECT *
FROM (VALUES (1, 2, 1, 10, '1999-12-31'::date)) as x(id, inventory_id, supplier_id, price, create_date)
)
SELECT o.id,
o.create_date date,
sum(oi.quantity) qty,
sum(oi.quantity * sp.price) total
FROM ax_order o
LEFT JOIN ax_order_inventory oi on o.id = oi.order_id
INNER JOIN ax_supplier_price sp
ON (sp.inventory_id = oi.inventory_id and sp.supplier_id = o.supplier_id)
WHERE sp.id =
(
-- NOTE: no GROUP BY necessary!
SELECT max(lsp.id) id
FROM ax_supplier_price lsp
WHERE sp.create_date <= o.create_date
AND lsp.inventory_id = sp.inventory_id
)
GROUP BY o.id, o.create_date
ORDER BY o.id

SQL Query correct way of doing a right outer join?

Having the following query as example:
SELECT t1.itemid,
t2.yearcreated
FROM (SELECT '100051' AS 'itemid',
'2012' AS yearcreated
UNION
SELECT '100051' AS 'itemid',
'2013' AS yearcreated
UNION
SELECT '100052' AS 'itemid',
'2011' AS yearcreated
UNION
SELECT '100052' AS 'itemid',
'2012' AS yearcreated
UNION
SELECT '100052' AS 'itemid',
'2013' AS yearcreated) t1
RIGHT OUTER JOIN (SELECT '2011' AS yearcreated
UNION
SELECT '2012'
UNION
SELECT '2013') t2
ON t1.yearcreated = t2.yearcreated
ORDER BY t1.itemid,
t2.yearcreated
It gives this result:
100051 2012
100051 2013
100052 2011
100052 2012
100052 2013
What i need to change in order to get 1 row per year like this?
100051 2011(desired new row generated by correct outer join)
100051 2012
100051 2013
100052 2011
100052 2012
100052 2013
Take into acount that the real query will have more columns that need grouping by or min() function to be shown..
Your explanation is somewhat unclear.
To get your desired results in this instance you can use a CROSS JOIN rather than a RIGHT JOIN
SELECT DISTINCT t1.itemid,
t2.yearcreated
FROM (SELECT '100051' AS 'itemid',
'2012' AS yearcreated
UNION
SELECT '100051' AS 'itemid',
'2013' AS yearcreated
UNION
SELECT '100052' AS 'itemid',
'2011' AS yearcreated
UNION
SELECT '100052' AS 'itemid',
'2012' AS yearcreated
UNION
SELECT '100052' AS 'itemid',
'2013' AS yearcreated) t1
CROSS JOIN (SELECT '2011' AS yearcreated
UNION
SELECT '2012'
UNION
SELECT '2013') t2
ORDER BY t1.itemid,
t2.yearcreated