I have data something like this:
ID 1 1 1 1 1 1 1 1 1 1 1 1
Month J F M A M J J A S O N D
Status 1 0 0 1 0 1 0 0 1 1 1 1
ID 2 2 2 2 2 2 2 2 2 2 2 2
Month J F M A M J J A S O N D
Status 1 0 1 0 1 0 1 0 1 0 1 1
ID 3 3 3 3 3 3 3 3 3 3 3 3
Month J F M A M J J A S O N D
Status 0 0 0 0 0 0 0 0 0 0 0 1
Using t-SQL, I am trying to capture the month corresponding to the first STATUS = 1 in the last group of 1s for each ID, i.e., September, November and December in this example.
Here is the code I'm using:
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL DROP TABLE #Temp1
;WITH PARTITIONED1 AS
(SELECT t0.ID
, t0.Year_Month
, LAST_VALUE(t0.Year_Month) OVER (PARTITION BY t0.Account_Number ORDER BY t0.Year_Month) AS STATUS
, ROW_NUMBER() OVER (PARTITION BY t0.Account_Number ORDER BY t0.Year_Month) AS rn1
FROM #Temp0 t0
)
SELECT *
INTO #Temp1
FROM PARTITIONED1 p1
ORDER BY t0.ID
, t0.Year_Month
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
SELECT *
INTO #Temp
FROM #Temp1 t1
WHERE t1.rn1 = (SELECT MAX(b.rn1) + 1 FROM #Temp1 b WHERE b.STATUS = 0)
GROUP BY t1.ID
, t1.Year_Month
, t1.rn1
However, this just returns the last instance where STATUS = 1 is achieved overall as the first 1 of the last group of 1s, in this case January.
I've tried using CASE statements and grouping in various combinations (hence the intermediate step reading the data into #Temp1), but have not been able to get results for all three IDs; is anyone able to assist?
Thanks in advance!
Assuming Ju for June and Jl for July:
--Sample Data
IF OBJECT_ID('tempdb..#Temp0') IS NOT NULL DROP TABLE #Temp0
CREATE TABLE #Temp0 (ID INT, Year_Month VARCHAR(1), Status INT)
INSERT INTO #Temp0
VALUES(1,'J',1),(1,'F',0),(1,'M',0),(1,'A',1),(1,'M',0),(1,'J',1),(1,'J',0),(1,'A',0),(1,'S',1),(1,'O',1),(1,'N',1),(1,'D',1),(2,'J',1),(2,'F',0),(2,'M',1),(2,'A',0),(2,'M',1),(2,'J',0),(2,'J',1),(2,'A',0),(2,'S',1),(2,'O',0),(2,'N',1),(2,'D',1),(3,'J',0),(3,'F',0),(3,'M',0),(3,'A',0),(3,'M',0),(3,'J',0),(3,'J',0),(3,'A',0),(3,'S',0),(3,'O',0),(3,'N',0),(3,'D',1);
--Query
WITH A
AS ( SELECT *,
CASE Year_Month
WHEN 'J' THEN 1
WHEN 'F' THEN 2
WHEN 'M' THEN 3
WHEN 'A' THEN 4
WHEN 'M' THEN 5
WHEN 'Ju' THEN 6
WHEN 'Jl' THEN 7
WHEN 'A' THEN 8
WHEN 'S' THEN 9
WHEN 'O' THEN 10
WHEN 'N' THEN 11
WHEN 'D' THEN 12
END
AS MonthNumber
FROM #Temp0 ),
StartingPoints
AS ( SELECT ID,
Year_Month,
MonthNumber,
Status
FROM A
WHERE NOT EXISTS
(
SELECT 1
FROM A
AS B
WHERE B.ID=A.ID
AND B.Status=A.Status-1
) ),
MonthRanking
AS ( SELECT A.*,
ROW_NUMBER( ) OVER( PARTITION BY A.ID ORDER BY A.MonthNumber )
AS rownum
FROM A
INNER JOIN
(
SELECT ID,
MAX( MonthNumber )+1
AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
)
AS B
ON A.ID=B.ID
AND A.MonthNumber>=B.StartOfLastGroup )
SELECT *
FROM MonthRanking
WHERE rownum=1;
Results:
If Month Names are recorded in Full as in July, June then this would work as well:
WITH StartingPoints
AS (SELECT ID,
Year_Month,
MonthNUmber = MONTH('01-'+Year_Month+'-2010'),
Status
FROM #Temp0
WHERE NOT EXISTS
(
SELECT 1
FROM #Temp0 AS B
WHERE B.ID = #Temp0.ID
AND B.Status = #Temp0.Status - 1
)),
MonthRanking
AS (SELECT A.*,
ROW_NUMBER() OVER(PARTITION BY A.ID ORDER BY MONTH('01-'+A.Year_Month+'-2010')) AS rownum
FROM #Temp0 AS A
INNER JOIN
(
SELECT ID,
MAX(MonthNumber) + 1 AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
) AS B ON A.ID = B.ID
AND MONTH('01-'+A.Year_Month+'-2010') >= B.StartOfLastGroup)
SELECT *
FROM MonthRanking
WHERE rownum = 1;
Results:
And if we assume that the data is as Iamdave assumes then it simply like so:
WITH StartingPoints
AS (SELECT ID,
Year_Month,
Status
FROM #Temp0
WHERE NOT EXISTS
(
SELECT 1
FROM #Temp0 AS B
WHERE B.ID = #Temp0.ID
AND B.Status = #Temp0.Status - 1
)),
MonthRanking
AS (SELECT A.*,
ROW_NUMBER() OVER(PARTITION BY A.ID ORDER BY Year_Month) AS rownum
FROM #Temp0 AS A
INNER JOIN
(
SELECT ID,
MAX(Year_Month) + 1 AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
) AS B ON A.ID = B.ID
AND A.Year_Month >= B.StartOfLastGroup)
SELECT *
FROM MonthRanking
WHERE rownum = 1;
Results:
You can do this with a couple derived tables that stack two window functions on top of one another (which can't be done in the same select). I have assumed that your data is slightly different to the table you have provided, based on the column names in your query. If they are not as I have them below, I strongly recommend having a look at how you store your data:
declare #t table(ID int, YearMonth int,StatusValue bit);
insert into #t values (1,201501,1),(1,201502,0),(1,201503,0),(1,201504,1),(1,201505,0),(1,201506,1),(1,201507,0),(1,201508,0),(1,201509,1),(1,201510,1),(1,201511,1),(1,201512,1),(2,201601,1),(2,201602,0),(2,201603,1),(2,201604,0),(2,201605,1),(2,201606,0),(2,201607,1),(2,201608,0),(2,201609,1),(2,201610,0),(2,201611,1),(2,201612,1),(3,201701,0),(3,201702,0),(3,201703,0),(3,201704,0),(3,201705,0),(3,201706,0),(3,201707,0),(3,201708,0),(3,201709,0),(3,201710,0),(3,201711,0),(3,201712,1);
with c as
(
select ID
,YearMonth
,StatusValue
,case when StatusValue = 1
and lead(StatusValue,1,1) over (partition by ID
order by YearMonth desc) = 0
then 1
else 0
end as c
from #t
), sc as
(
select ID
,YearMonth
,StatusValue
,sum(c) over (partition by ID order by YearMonth desc) as sc
from c
where c = 1
)
select ID
,YearMonth
,StatusValue
from sc
where sc = 1
order by ID;
Output:
+----+-----------+-------------+
| ID | YearMonth | StatusValue |
+----+-----------+-------------+
| 1 | 201509 | 1 |
| 2 | 201611 | 1 |
| 3 | 201712 | 1 |
+----+-----------+-------------+
the title may not be very clear so let's consider this example (this is not my code, just taking this example to model my request)
I have a table that references itself (like a filesystem)
id | parent | name
----+----------+-------
1 | null | /
2 | 1 | home
3 | 2 | user
4 | 3 | bin
5 | 1 | usr
6 | 5 | local
Is it possible to make a sql request so if I choose :
1 I will get a table containing 2,3,4,5,6 (because this is the root) so matching :
/home
/home/user
/home/user/bin
/usr
etc...
2 I will get a table containing 3,4 so matching :
/home/user
/home/user/bin
and so on
Use recursive common table expression. Always starting from the root, use an array of ids to get paths for a given id in the WHERE clause.
For id = 1:
with recursive cte(id, parent, name, ids) as (
select id, parent, name, array[id]
from my_table
where parent is null
union all
select t.id, t.parent, concat(c.name, t.name, '/'), ids || t.id
from cte c
join my_table t on c.id = t.parent
)
select id, name
from cte
where 1 = any(ids) and id <> 1
id | name
----+-----------------------
2 | /home/
5 | /usr/
6 | /usr/local/
3 | /home/user/
4 | /home/user/bin/
(5 rows)
For id = 2:
with recursive cte(id, parent, name, ids) as (
select id, parent, name, array[id]
from my_table
where parent is null
union all
select t.id, t.parent, concat(c.name, t.name, '/'), ids || t.id
from cte c
join my_table t on c.id = t.parent
)
select id, name
from cte
where 2 = any(ids) and id <> 2
id | name
----+-----------------------
3 | /home/user/
4 | /home/user/bin/
(2 rows)
Bidirectional query
The question is really interesting. The above query works well but is inefficient as it parses all tree nodes even when we're asking for a leaf. The more powerful solution is a bidirectional recursive query. The inner query walks from a given node to top, while the outer one goes from the node to bottom.
with recursive outer_query(id, parent, name) as (
with recursive inner_query(qid, id, parent, name) as (
select id, id, parent, name
from my_table
where id = 2 -- parameter
union all
select qid, t.id, t.parent, concat(t.name, '/', q.name)
from inner_query q
join my_table t on q.parent = t.id
)
select qid, null::int, right(name, -1)
from inner_query
where parent is null
union all
select t.id, t.parent, concat(q.name, '/', t.name)
from outer_query q
join my_table t on q.id = t.parent
)
select id, name
from outer_query
where id <> 2; -- parameter
I have a table named Stores with columns:
StoreCode NVARCHAR(10),
OldStoreCode NVARCHAR(10)
Here is a sample of my data:
| StoreCode | OldStoreCode |
|-----------|--------------|
| A | B |
| B | A |
| D | E |
| E | F |
| M | K |
| J | K |
| K | L |
|-----------|--------------|
I want to create clusters of related Stores. Related store means there is a one way relation between StoreCodes and OldStoreCodes.
Expected result table:
| StoreCode | ClusterId |
|-----------|-----------|
| A | 1 |
| B | 1 |
| D | 2 |
| E | 2 |
| F | 2 |
| M | 3 |
| K | 3 |
| J | 3 |
| L | 3 |
|-----------|-----------|
There is no maximum number hops. There may be a StoreCode A which has a OldStoreCode B, which has a OldStoreCode C, which has a OldStoreCode D etc.
How can I cluster stores like this?
Try it like this:
EDIT: With changes by OP taken from comment
DECLARE #tbl TABLE(ID INT IDENTITY, StoreCode VARCHAR(100),OldStoreCode VARCHAR(100));
INSERT INTO #tbl VALUES
('A','B'),('B','A'),('D','E'),('E','F'),('M','K'),('J','K'),('K','L');
WITH Related AS
(
SELECT DISTINCT t1.ID,Val
FROM #tbl AS t1
INNER JOIN #tbl AS t2 ON t1.StoreCode=t2.StoreCode
OR t1.OldStoreCode=t2.OldStoreCode
OR t1.OldStoreCode=t2.StoreCode
OR t1.StoreCode=t2.OldStoreCode
CROSS APPLY(SELECT DISTINCT Val
FROM
(VALUES(t1.StoreCode),(t2.StoreCode),(t1.OldStoreCode),(t2.OldStoreCode)) AS A(Val)
) AS valsInCols
)
,ClusterKeys AS
(
SELECT r1.ID
,(
SELECT r2.Val AS [*]
FROM Related AS r2
WHERE r2.ID=r1.ID
ORDER BY r2.Val
FOR XML PATH('')
) AS ClusterKey
FROM Related AS r1
GROUP BY r1.ID
)
,ClusterIds AS
(
SELECT ClusterKey
,MIN(ID) AS ID
FROM ClusterKeys
GROUP BY ClusterKey
)
SELECT r.ID
,r.Val
FROM ClusterIds c
INNER JOIN Related r ON c.ID = r.ID
The result
ID Val
1 A
1 B
3 D
3 E
3 F
5 J
5 K
5 L
5 M
This should do it:
SAMPLE DATA:
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL
BEGIN
DROP TABLE #Temp1;
END;
CREATE TABLE #Temp1(StoreCode NVARCHAR(10)
, OldStoreCode NVARCHAR(10));
INSERT INTO #Temp1(StoreCode
, OldStoreCode)
VALUES
('A'
, 'B'),
('B'
, 'A'),
('D'
, 'E'),
('E'
, 'F'),
('M'
, 'K'),
('J'
, 'K'),
('K'
, 'L');
QUERY:
;WITH A -- get all distinct new and old storecodes
AS (
SELECT StoreCode
FROM #Temp1
UNION
SELECT OldStoreCode
FROM #Temp1),
B -- give a unique number id to each store code
AS (SELECT rn = RANK() OVER(ORDER BY StoreCode)
, StoreCode
FROM A),
C -- combine the store codes and the unique number id's in one table
AS (SELECT b2.rn AS StoreCodeID
, t.StoreCode
, b1.rn AS OldStoreCodeId
, t.OldStoreCode
FROM #Temp1 AS t
LEFT OUTER JOIN B AS b1 ON t.OldStoreCode = b1.StoreCode
LEFT OUTER JOIN B AS b2 ON t.StoreCode = b2.StoreCode),
D -- assign a row number for each entry in the data set
AS (SELECT rn = RANK() OVER(ORDER BY StoreCode)
, *
FROM C),
E -- derive first and last store in the path
AS (SELECT FirstStore = d2.StoreCode
, LastStore = d1.OldStoreCode
, GroupID = d1.OldStoreCodeId
FROM D AS d1
RIGHT OUTER JOIN D AS d2 ON d1.StoreCodeID = d2.OldStoreCodeId
AND d1.rn - 1 = d2.rn
WHERE d1.OldStoreCode IS NOT NULL) ,
F -- get the stores wich led to the last store with one hop
AS (SELECT C.StoreCode
, E.GroupID
FROM E
INNER JOIN C ON E.LastStore = C.OldStoreCode)
-- combine to get the full grouping
SELECT A.StoreCode, ClusterID = DENSE_RANK() OVER (ORDER BY A.GroupID) FROM (
SELECT C.StoreCode,F.GroupID FROM C INNER JOIN F ON C.OldStoreCode = F.StoreCode
UNION
SELECT * FROM F
UNION
SELECT E.LastStore,E.GroupID FROM E) AS A ORDER BY StoreCode, ClusterID
RESULTS: