Oracle SQL return value from child table with minimum row number with values in specific list - oracle-sqldeveloper

I have a need to select all rows from a table (main table) and join to another table (child table). In the results set, I want to include one column from the child table, that is only the first row / line number with a column value in a specified list. If there is no match for the specified list, it should be (null)
Desired Result:
ORDER_NO
ORDER_DATE
ORDER CUST
ORDER_VALUE
ITEM
1
02/14/2022
12345
$1,000.00
APPLES
2
02/13/2022
67890
$5,000.00
(null)
3
02/12/2022
45678
$100.00
PEARS
Example:
Main Table: Order Table
Order Number (Handle)
Order Date,
Order Customer,
Order Value
ORDER_NO
ORDER_DATE
ORDER CUST
ORDER_VALUE
1
02/14/2022
12345
$1,000.00
2
02/13/2022
67890
$5,000.00
3
02/12/2022
45678
$100.00
Child Table: Order Details Tbl
Order Number (Handle)
Line Number = Order Line No
Ordered Item,
Ordered Qty
ORDER_NO
LINE_NO
ITEM
1
10
APPLES
1
20
ORANGES
1
30
LETTUCE
2
10
BROCCOLI
2
20
CAULIFLOWER
2
30
LETTUCE
3
10
KALE
3
20
RADISHES
3
30
PEARS
In this example, the returned column is essentially the first line of the order that is a fruit, not a vegetable. And if the order includes no matching fruit, null is returned.
What my code is thus far:
SELECT
MAIN.ORDER_NO,
MAIN.ORDER_DATE,
MAIN.ORDER_CUST,
MAIN.ORDER_VALUE,
B.ITEM
FROM
MAIN
LEFT JOIN
(
SELECT
CHILD.ORDER_NO,
CHILD.LINE_NO,
CHILD.ITEM
FROM
CHILD
WHERE
CHILD.ORDER_NO||'_'||LINE_NO IN
(
SELECT
CHILD.ORDER_NO||'_'||MIN(LINE_NO) AS ORDER_LINE_NO
FROM
CHILD
WHERE
CHILD.ITEM IN ('APPLES','ORANGES','PEACHES','PEARS','GRAPES')
GROUP BY
CHILD.ORDER_NO
)
) B ON MAIN.ORDER_NO = B.ORDER_NO
'''
This code is of course not working as desired, as table 'B' is including all results from CHILD.

From Oracle 12, you can use:
SELECT o.*,
d.item
FROM orders o
LEFT OUTER JOIN LATERAL(
SELECT *
FROM order_details d
WHERE o.order_no = d.order_no
AND item IN ('APPLES','ORANGES','PEACHES','PEARS','GRAPES')
ORDER BY line_no ASC
FETCH FIRST ROW ONLY
) d
ON (1 = 1)
In earlier versions you can use:
SELECT o.*,
d.item
FROM orders o
LEFT OUTER JOIN(
SELECT d.*,
ROW_NUMBER() OVER (PARTITION BY order_no ORDER BY line_no ASC)
AS rn
FROM order_details d
WHERE item IN ('APPLES','ORANGES','PEACHES','PEARS','GRAPES')
) d
ON (o.order_no = d.order_no AND rn = 1)
Which, for the sample data:
CREATE TABLE orders (ORDER_NO, ORDER_DATE, ORDER_CUST, ORDER_VALUE) AS
SELECT 1, DATE '2022-02-14', 12345, 1000.00 FROM DUAL UNION ALL
SELECT 2, DATE '2022-02-13', 67890, 5000.00 FROM DUAL UNION ALL
SELECT 3, DATE '2022-02-12', 45678, 100.00 FROM DUAL;
CREATE TABLE Order_Details (ORDER_NO, LINE_NO, ITEM) AS
SELECT 1, 10, 'APPLES' FROM DUAL UNION ALL
SELECT 1, 20, 'ORANGES' FROM DUAL UNION ALL
SELECT 1, 30, 'LETTUCE' FROM DUAL UNION ALL
SELECT 2, 10, 'BROCCOLI' FROM DUAL UNION ALL
SELECT 2, 20, 'CAULIFLOWER' FROM DUAL UNION ALL
SELECT 2, 30, 'LETTUCE' FROM DUAL UNION ALL
SELECT 3, 10, 'KALE' FROM DUAL UNION ALL
SELECT 3, 20, 'RADISHES' FROM DUAL UNION ALL
SELECT 3, 30, 'PEARS' FROM DUAL;
Both output:
ORDER_NO
ORDER_DATE
ORDER_CUST
ORDER_VALUE
ITEM
1
2022-02-14 00:00:00
12345
1000
APPLES
2
2022-02-13 00:00:00
67890
5000
null
3
2022-02-12 00:00:00
45678
100
PEARS
db<>fiddle here

Related

BigQuery SQL: Group rows with shared ID that occur within 7 days of each other, and return values from most recent occurrence

I have a table of datestamped events that I need to bundle into 7-day groups, starting with the earliest occurrence of each event_id.
The final output should return each bundle's start and end date and 'value' column of the most recent event from each bundle.
There is no predetermined start date, and the '7-day' windows are arbitrary, not 'week of the year'.
I've tried a ton of examples from other posts but none quite fit my needs or use things I'm not sure how to refactor for BigQuery
Sample Data;
Event_Id
Event_Date
Value
1
2022-01-01
010203
1
2022-01-02
040506
1
2022-01-03
070809
1
2022-01-20
101112
1
2022-01-23
131415
2
2022-01-02
161718
2
2022-01-08
192021
3
2022-02-12
212223
Expected output;
Event_Id
Start_Date
End_Date
Value
1
2022-01-01
2022-01-03
070809
1
2022-01-20
2022-01-23
131415
2
2022-01-02
2022-01-08
192021
3
2022-02-12
2022-02-12
212223
You might consider below.
CREATE TEMP FUNCTION cumsumbin(a ARRAY<INT64>) RETURNS INT64
LANGUAGE js AS """
bin = 0;
a.reduce((c, v) => {
if (c + Number(v) > 6) { bin += 1; return 0; }
else return c += Number(v);
}, 0);
return bin;
""";
WITH sample_data AS (
select 1 event_id, DATE '2022-01-01' event_date, '010203' value union all
select 1 event_id, '2022-01-02' event_date, '040506' value union all
select 1 event_id, '2022-01-03' event_date, '070809' value union all
select 1 event_id, '2022-01-20' event_date, '101112' value union all
select 1 event_id, '2022-01-23' event_date, '131415' value union all
select 2 event_id, '2022-01-02' event_date, '161718' value union all
select 2 event_id, '2022-01-08' event_date, '192021' value union all
select 3 event_id, '2022-02-12' event_date, '212223' value
),
binning AS (
SELECT *, cumsumbin(ARRAY_AGG(diff) OVER w1) bin
FROM (
SELECT *, DATE_DIFF(event_date, LAG(event_date) OVER w0, DAY) AS diff
FROM sample_data
WINDOW w0 AS (PARTITION BY event_id ORDER BY event_date)
) WINDOW w1 AS (PARTITION BY event_id ORDER BY event_date)
)
SELECT event_id,
MIN(event_date) start_date,
ARRAY_AGG(
STRUCT(event_date AS end_date, value) ORDER BY event_date DESC LIMIT 1
)[OFFSET(0)].*
FROM binning GROUP BY event_id, bin;

Count number of unique purchase dates

I have a log of purchases made by customers. Sometimes a customer purchases multiple items during a given purchase, other times they only purchase a single item. What I want to do, on a line by line basis, is identify which purchase events have happened (i.e. not on an item by item basis, but on a checkout by checkout basis).
Each row of the source database contains the following fields
cust_id, purchase_date, sku
So a customer who purchases three items during a given transaction would look like this
1, 01/01/01, dog1
1, 01/01/01, cat1
1, 01/01/01, mouse1
1, 01/02/01, wolf1
1, 01/03/01, lion1
WHat I want out is
cust_id, purchase_date, sku, item_purchase_number_within_purchase, unique_purchase_date_across_dates
And that would look like
1, 01/01/01, dog1, 1, 1
1, 01/01/01, cat1, 2, 1
1, 01/01/01, mouse1, 3, 1
1, 01/02/01, wolf1, 1, 2
1, 01/03/01, lion1, 1, 3
In words, on the first date, three items where purchased arbitrarily identified as purchase numbers, 1, 2, and 3, on the second purchase date (Jan 2nd, 2001), only a single item was purchase, but this was the second purchasing event, and then on the third purchasing date (Jan 3, 2001) there was another single item purchased.
I'm trying to do this in oracle10g. I'm not sure how to describe what I'm accomplishing.
This is the sql I have so far
SELECT
cust_id, purchase_date, sku, ROW_NUMBER() OVER (PARTITION BY purchase_date ORDER BY sku)
FROM
[table]
Thanks
You seem to want dense_rank() rather than row_number() (or rank()) to avoid gaps. With your sample data in a CTE:
with t (cust_id, purchase_date, sku) as (
select 1, date '2001-01-01', 'dog1' from dual
union all select 1, date '2001-01-01', 'cat1' from dual
union all select 1, date '2001-01-01', 'mouse1' from dual
union all select 1, date '2001-01-02', 'wolf1' from dual
union all select 1, date '2001-01-03', 'lion1' from dual
)
select cust_id, purchase_date, sku,
dense_rank() over (partition by cust_id, purchase_date order by sku)
as item_within_purchase,
dense_rank() over (partition by cust_id order by purchase_date)
as purchase_event
from t;
CUST_ID PURCHASE_D SKU ITEM_WITHIN_PURCHASE PURCHASE_EVENT
---------- ---------- ------ -------------------- --------------
1 2001-01-01 cat1 1 1
1 2001-01-01 dog1 2 1
1 2001-01-01 mouse1 3 1
1 2001-01-02 wolf1 1 2
1 2001-01-03 lion1 1 3
The first extra column is partition by both customer and date, and ordered by SKU as you had; the second is only partitioned by customer, and ordered by date.

Counting dates that fall between two dates in the same column

I have two tables and for each ID and Level combination in table1, I need to get a count of times matching ID appears in table2 in between sequential times for levels in table1.
So for example, for ID = 1 and Level=1 in table1, two Time entries from table2 for ID=1 fall between Time of Level=1 and Level=2 in table1, so result will be 2 in the result table.
table1:
ID Level Time
1 1 6/7/13 7:03
1 2 6/9/13 7:05
1 3 6/12/13 12:02
1 4 6/17/13 5:01
2 1 6/18/13 8:38
2 3 6/20/13 9:38
2 4 6/23/13 10:38
2 5 6/28/13 1:38
table2:
ID Time
1 6/7/13 11:51
1 6/7/13 14:15
1 6/9/13 16:39
1 6/9/13 19:03
2 6/20/13 11:02
2 6/20/13 15:50
Result would be
ID Level Count
1 1 2
1 2 2
1 3 0
1 4 0
2 1 0
2 3 2
2 4 0
2 5 0
select transformed_tab1.id, transformed_tab1.level, count(tab2.id)
from
(select tab1.id, tab1.level, tm, lead(tm) over (partition by id order by tm) as next_tm
from
(
select 1 as id, 1 as level, '2013-06-07 07:03'::timestamp as tm union
select 1 as id, 2 as level, '2013-06-09 07:05 '::timestamp as tm union
select 1 as id, 3 as level, '2013-06-12 12:02'::timestamp as tm union
select 1 as id, 4 as level, '2013-06-17 05:01'::timestamp as tm union
select 2 as id, 1 as level, '2013-06-18 08:38'::timestamp as tm union
select 2 as id, 3 as level, '2013-06-20 09:38'::timestamp as tm union
select 2 as id, 4 as level, '2013-06-23 10:38'::timestamp as tm union
select 2 as id, 5 as level, '2013-06-28 01:38'::timestamp as tm) tab1
) transformed_tab1
left join
(select 1 as id, '2013-06-07 11:51'::timestamp as tm union
select 1 as id, '2013-06-07 14:15'::timestamp as tm union
select 1 as id, '2013-06-09 16:39'::timestamp as tm union
select 1 as id, '2013-06-09 19:03'::timestamp as tm union
select 2 as id, '2013-06-20 11:02'::timestamp as tm union
select 2 as id, '2013-06-20 15:50'::timestamp as tm) tab2
on transformed_tab1.id=tab2.id and tab2.tm between transformed_tab1.tm and transformed_tab1.next_tm
group by transformed_tab1.id, transformed_tab1.level
order by transformed_tab1.id, transformed_tab1.level
;
SQL Fiddle
select t1.id, level, count(t2.id)
from
(
select id, level,
tsrange(
"time",
lead("time", 1, 'infinity') over(
partition by id order by level
),
'[)'
) as time_range
from t1
) t1
left join
t2 on t1.id = t2.id and t1.time_range #> t2."time"
group by t1.id, level
order by t1.id, level
The solution starts creating a range of timestamps using the lead window function. Notice the [) parameter to the tsrange constructor. It means to include the lower and exclude the upper bound.
Then it joins the two tables with the #> range operator. It means the range includes the element.
It is necessary to left join t1 to have the zero counts.

T-SQL: finding rows in a different table without joins

I have two tables, I'll call TableA and TableB
TableA:
StartNumber EndNumber Country
1 10 USA
11 20 USA
21 30 Canada
31 40 France
41 50 France
51 60 Germany
TableB:
SomeNumber
5
15
55
22
35
46
49
For each number in TableB, I want to find the corresponding row in TableA where the number is between the StartNumber and EndNumber and return the name of the country. I then want to group these results on the country column and return the number of times each country appears. So the results would look like this:
Country Occurrences
USA 2
Germany 1
Canada 1
France 3
Not sure how to do this.
Here the query.
Select A.Country, count(*) as Occurrences
from
tableA A
inner join
tableB B
on B.someNumber between a.startnumber and b.endnumber
group by A.country
This should do the trick (but does use a join):
declare #TableA table (StartNumber int, EndNumber int, Country varchar(16));
insert into #TableA (StartNumber, EndNumber, Country)
select 1, 10, 'USA' union
select 11, 20, 'USA' union
select 21, 30, 'Canada' union
select 31, 40, 'France' union
select 41, 50, 'France' union
select 51, 60, 'Germany';
declare #TableB table (SomeNumber int);
insert into #TableB (SomeNumber)
select 5 union
select 15 union
select 55 union
select 22 union
select 35 union
select 46 union
select 49;
select
a.Country, count(*) Occurrences
from
#TableA a inner join
#TableB b on b.SomeNumber between a.StartNumber and a.EndNumber
group by
a.Country;

t-sql group by category and get top n values

Imagine I have this table:
Month | Person | Value
----------------------
Jan | P1 | 1
Jan | P2 | 2
Jan | P3 | 3
Feb | P1 | 5
Feb | P2 | 4
Feb | P3 | 3
Feb | P4 | 2
...
How can I build a t-sql query to get the top 2 value rows and a third with the sum of others?
Something like this:
RESULT:
Month | Person | Value
----------------------
Jan | P3 | 3
Jan | P2 | 2
Jan | Others | 1 -(sum of the bottom value - in this case (Jan, P1, 1))
Feb | P1 | 5
Feb | P2 | 4
Feb | Others | 5 -(sum of the bottom values - in this case (Feb, P3, 3) and (Feb, P4, 2))
Thanks
In the assumption you are using SQL Server 2005 or higher, using a CTE would do the trick.
Attach a ROW_NUMBER to each row, starting with the highest value, resetting for each month.
SELECT the top 2 rows for each month from this query (rownumber <= 2)
UNION with the remaining rows (rownumber > 2)
SQL Statement
;WITH Months (Month, Person, Value) AS (
SELECT 'Jan', 'P1', 1 UNION ALL
SELECT 'Jan', 'P2', 2 UNION ALL
SELECT 'Jan', 'P3', 3 UNION ALL
SELECT 'Feb', 'P1', 5 UNION ALL
SELECT 'Feb', 'P2', 4 UNION ALL
SELECT 'Feb', 'P3', 3 UNION ALL
SELECT 'Feb', 'P4', 2
),
q AS (
SELECT Month
, Person
, Value
, RowNumber = ROW_NUMBER() OVER (PARTITION BY Month ORDER BY Value DESC)
FROM Months
)
SELECT Month
, Person
, Value
FROM (
SELECT Month
, Person
, Value
, RowNumber
FROM q
WHERE RowNumber <= 2
UNION ALL
SELECT Month
, Person = 'Others'
, SUM(Value)
, MAX(RowNumber)
FROM q
WHERE RowNumber > 2
GROUP BY
Month
) q
ORDER BY
Month DESC
, RowNumber
Kudo's go to Andriy for teaching me some new tricks.
;WITH atable (Month, Person, Value) AS (
SELECT 'Jan', 'P1', 1 UNION ALL
SELECT 'Jan', 'P2', 2 UNION ALL
SELECT 'Jan', 'P3', 3 UNION ALL
SELECT 'Feb', 'P1', 5 UNION ALL
SELECT 'Feb', 'P2', 4 UNION ALL
SELECT 'Feb', 'P3', 3 UNION ALL
SELECT 'Feb', 'P4', 2
),
numbered AS (
SELECT
Month, Person, Value,
rownum = ROW_NUMBER() OVER (PARTITION BY Month ORDER BY Value DESC)
FROM atable
),
grouped AS (
SELECT
Month, Person, Value,
Grp = CASE WHEN rownum < 3 THEN rownum ELSE 3 END
FROM numbered
)
SELECT
Month,
Person = CASE Grp WHEN 3 THEN 'Others' ELSE MAX(Person) END,
Value = SUM(Value)
FROM grouped
GROUP BY Month, Grp
ORDER BY Month DESC, Grp
WITH NTable AS
(
SELECT [Month],
Person,
Value,
ROW_NUMBER() OVER (PARTITION BY [Month] ORDER BY Value DESC)
AS Rownumber
FROM MyTable
)
SELECT t.[Month],
CASE Rownumber WHEN 1 THEN t.Person WHEN 2 THEN t.Person ELSE 'Others' END As Person,
SUM(t.Value) As [Sum]
FROM NTable t
GROUP BY t.[Month], CASE Rownumber WHEN 1 THEN t.Person WHEN 2 THEN t.Person ELSE 'Others' END
ORDER BY t.[Month]