Recursive CTE to get a Category and all its ancestors [duplicate] - tsql

Given a child id, I need to return a query containing all parents of that child as well as their parents till I get to the root parent.
For example, given this data:
ID / Parent ID
1 / 0
2 / 1
3 / 2
4 / 0
5 / 3
So if I passed in ID 5 I would like to get a query with the results:
ID / Parent ID
1 / 0
2 / 1
3 / 2
This table does not work with a hierarchyid type so I suspect that this will need to be done with a CTE, but have no clue how. If it can be done in an SQL query / proc, any help would be appreciated.
Thanks

This is more or less what you want:
-- CTE to prepare hierarchical result set
;WITH #results AS
(
SELECT id,
parentid
FROM [table]
WHERE id = #childId
UNION ALL
SELECT t.id,
t.parentid
FROM [table] t
INNER JOIN #results r ON r.parentid = t.id
)
SELECT *
FROM #results;
Reference:
CTE: Common Table Expression
Working example:
-- create table with self lookup (parent id)
CREATE TABLE #tmp (id INT, parentid INT);
-- insert some test data
INSERT INTO #tmp (id, parentid)
SELECT 1,0 UNION ALL SELECT 2,1 UNION ALL SELECT 3,2
UNION ALL SELECT 4,0 UNION ALL SELECT 5,3;
-- prepare the child item to look up
DECLARE #childId INT;
SET #childId = 5;
-- build the CTE
WITH #results AS
(
SELECT id,
parentid
FROM #tmp
WHERE id = #childId
UNION ALL
SELECT t.id,
t.parentid
FROM #tmp t
INNER JOIN #results r ON r.parentid = t.id
)
-- output the results
SELECT *
FROM #results
WHERE id != #childId
ORDER BY id;
-- cleanup
DROP TABLE #tmp;
Output:
1 | 0
2 | 1
3 | 2

Related

sql recursion: find tree given middle node

I need to get a tree of related nodes given a certain node, but not necessary top node. I've got a solution using two CTEs, since I am struggling to squeeze it all into one CTE :). Might somebody have a sleek solution to avoid using two CTEs? Here is some code that I was playing with:
DECLARE #temp AS TABLE (ID INT, ParentID INT)
INSERT INTO #temp
SELECT 1 ID, NULL AS ParentID
UNION ALL
SELECT 2, 1
UNION ALL
SELECT 3, 2
UNION ALL
SELECT 4, 3
UNION ALL
SELECT 5, 4
UNION ALL
SELECT 6, NULL
UNION ALL
SELECT 7, 6
UNION ALL
SELECT 8, 7
DECLARE #startNode INT = 4
;WITH TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM #temp
WHERE ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
;WITH Up(ID,ParentID)
AS (
SELECT t.id, t.ParentID
FROM #temp t
WHERE t.ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN Up c ON t.id = c.ParentID
)
--SELECT * FROM Up
,TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM Up
WHERE ParentID is null
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
thanks
Meh. This avoids using two CTEs, but the result is a brute force kludge that hardly qualifies as "sleek" as it won’t be efficient if your table is at all sizeable. It will:
Recursively build all possible hierarchies
As you build them, flag the target NodeId as you find it
Return only the targeted tree
I threw in column “TreeNumber” on the off-chance the TargetId appears in multiple hierarchies, or if you’d ever have multiple values to check in one pass. “Depth” was added to make the output a bit more legible.
A more complex solution like #John’s might do, and more and subtler tricks could be done with more detailed table sturctures.
DECLARE #startNode INT = 4
;WITH cteAllTrees (TreeNumber, Depth, ID, ParentID, ContainsTarget)
AS (
SELECT
row_number() over (order by ID) TreeNumber
,1
,ID
,ParentID
,case
when ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp
WHERE ParentId is null
UNION ALL
SELECT
tr.TreeNumber
,tr.Depth + 1
,t.id
,t.ParentID
,case
when tr.ContainsTarget = 1 then 1
when t.ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp t
INNER JOIN cteAllTrees tr
ON t.ParentID = tr.ID
)
SELECT
TreeNumber
,Depth
,ID
,ParentId
from cteAllTrees
where TreeNumber in (select TreeNumber from cteAllTrees where ContainsTarget = 1)
order by
TreeNumber
,Depth
,ID
Here is a technique where you can select the entire hierarchy, a specific node with all its children, and even a filtered list and how they roll.
Note: See the comments next to the DECLAREs
Declare #YourTable table (id int,pt int,name varchar(50))
Insert into #YourTable values
(1,null,'1'),(2,1,'2'),(3,1,'3'),(4,2,'4'),(5,2,'5'),(6,3,'6'),(7,null,'7'),(8,7,'8')
Declare #Top int = null --<< Sets top of Hier Try 2
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
Declare #Filter varchar(25) = '' --<< Empty for All or try 4,6
;with cteP as (
Select Seq = cast(1000+Row_Number() over (Order by name) as varchar(500))
,ID
,pt
,Lvl=1
,name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(pt,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',1000+Row_Number() over (Order by r.name)) as varchar(500))
,r.ID
,r.pt
,p.Lvl+1
,r.name
From #YourTable r
Join cteP p on r.pt = p.ID)
,cteR1 as (Select *,R1=Row_Number() over (Order By Seq) From cteP)
,cteR2 as (Select A.Seq,A.ID,R2=Max(B.R1) From cteR1 A Join cteR1 B on (B.Seq like A.Seq+'%') Group By A.Seq,A.ID )
Select Distinct
A.R1
,B.R2
,A.ID
,A.pt
,A.Lvl
,name = Replicate(#Nest,A.Lvl-1) + A.name
From cteR1 A
Join cteR2 B on A.ID=B.ID
Join (Select R1 From cteR1 where IIF(#Filter='',1,0)+CharIndex(concat(',',ID,','),concat(',',#Filter+','))>0) F on F.R1 between A.R1 and B.R2
Order By A.R1

postgres hierarchy - count of child levels and sort by date of children or grandchildren

I would like to know how to write a postgres subquery so that the following table example will output what I need.
id parent_id postdate
1   -1 2015-03-10
2     1 2015-03-11 (child level 1)
3     1 2015-03-12 (child level 1)
4     3 2015-03-13 (child level 2)
5    -1 2015-03-14
6    -1 2015-03-15
7     6 2015-03-16 (child level 1)
If I want to sort all the root ids by child level 1 with a count of children(s) from the parent, the output would be something like this
id count  date
6   2    2015-03-15
1   4    2015-03-10
5   1    2015-03-14
The output is sorted by postdate based on the root's child. The 'date' being outputted is the date of the root's postdate. Even though id#5 has a more recent postdate, the rootid#6's child (id#7) has the most recent postdate because it is being sorted by child's postdate. id#5 doesnt have any children so it just gets placed at the end, sorted by date. The 'count' is the number children(child level 1), grandchildren(child level 2) and itself (root). For instance, id #2,#3,#4 all belong to id#1 so for id#1, the count would be 4.
My current subquery thus far:
SELECT p1.id,count(p1.id),p1.postdate
FROM mytable p1
LEFT JOIN mytable c1 ON c1.parent_id = p1.id AND p1.parent_id = -1
LEFT JOIN mytable c2 ON c2.parent_id = c1.id AND p1.parent_id = -1
GROUP BY p1.id,c1.postdate,p1.postdate
ORDER by c1.postdate DESC,p1.postdate DESC
create table mytable ( id serial primary key, parent_id int references mytable, postdate date );
create index mytable_parent_id_idx on mytable (parent_id);
insert into mytable (id, parent_id, postdate) values (1, null, '2015-03-10');
insert into mytable (id, parent_id, postdate) values (2, 1, '2015-03-11');
insert into mytable (id, parent_id, postdate) values (3, 1, '2015-03-12');
insert into mytable (id, parent_id, postdate) values (4, 3, '2015-03-13');
insert into mytable (id, parent_id, postdate) values (5, null, '2015-03-14');
insert into mytable (id, parent_id, postdate) values (6, null, '2015-03-15');
insert into mytable (id, parent_id, postdate) values (7, 6, '2015-03-16');
with recursive recu as (
select id as parent, id as root, null::date as child_postdate
from mytable
where parent_id is null
union all
select r.parent, mytable.id, mytable.postdate
from recu r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate, c.max_child_date
from mytable m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date
from recu
group by parent
) c on c.parent = m.id
order by c.max_child_date desc nulls last, m.postdate desc;
You'll need a recursive query to count the elements in the subtrees:
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY t.id
;
UPDATE (it appears the OP also wants the maxdate per tree)
-- The same, but also select the postdate
-- --------------------------------------
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
, postdate AS postdate
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
-- , GREATEST(o.postdate,t.postdate) AS postdate
, t.postdate AS postdate
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
, c.maxdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
, MAX(o.postdate) AS maxdate -- and obtain the max()
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY c.maxdate, t.id
;
After looking at everyone's code, I created the subquery I needed. I can use PHP to vary the 'case when' code depending on the user's sort selection. For instance, the code below will sort the root nodes based on child level 1's postdate.
with recursive cte as (
select id as parent, id as root, null::timestamp as child_postdate,0 as depth
from mytable
where parent_id = -1
union all
select r.parent, mytable.id, mytable.postdate,depth+1
from cte r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate
from ssf.dtb_021 m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date,depth
from cte
group by parent,depth
) c on c.parent = m.id
order by
case
when depth=2 then 1
when depth=1 then 2
else 0
end DESC,
c.max_child_date desc nulls last, m.postdate desc;
select
p.id,
(1+c.n) as parent_post_plus_number_of_subposts,
p.postdate
from
table as p
inner join
(
select
parent_id, count(*) as n, max(postdate) as _postdate
from table
group by parent_id
) as c
on p.id = c.parent_id
where p.parent_id = -1
order by c._postdate desc

Find exact FK matches

Have a very large table (over 200 million rows)
sID int, wordID int (PK sID, wordID)
Want to find the sID's that have the exact same wordID's (and no extras)
For a sID with over 100 wordID the chance of an exact match goes down so willing to limit it to 100
(but would like to go to 1000)
If this was school and sID were classes and wordID were students.
Then I want to find classes that have the exact same students.
sID, wordID
1, 1
1, 2
1, 3
2, 2
2, 3
3, 1
3, 4
5, 1
5, 2
6, 2
6, 3
7, 1
7, 2
8, 1
8, 1
sID 6 and 2 have the exact same wordID's
sID 5, 7, and 8 have the exact same wordID's
This is what I have so far
I would like to eliminate the two delete #temp3_sID1_sID2 and take care of that in the insert above
But I will try any ideas
It is not like you can easily create a table with 200 million rows to test with
drop table #temp_sID_wordCount
drop table #temp_count_wordID_sID
drop table #temp3_wordID_sID_forThatCount
drop table #temp3_sID1_sID2
drop table #temp3_sID1_sID2_keep
create table #temp_sID_wordCount (sID int primary key, ccount int not null)
create table #temp_count_wordID_sID (ccount int not null, wordID int not null, sID int not null, primary key (ccount, wordID, sID))
create table #temp3_wordID_sID_forThatCount (wordID int not null, sID int not null, primary key(wordID, sID))
create table #temp3_sID1_sID2_keep (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
create table #temp3_sID1_sID2 (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
insert into #temp_sID_wordCount
select sID, count(*) as ccount
FROM [FTSindexWordOnce] with (nolock)
group by sID
order by sID;
select count(*) from #temp_sID_wordCount where ccount <= 100; -- 701,966
truncate table #temp_count_wordID_sID
insert into #temp_count_wordID_sID
select #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID
from #temp_sID_wordCount
join [FTSindexWordOnce] with (nolock)
on [FTSindexWordOnce].sID = #temp_sID_wordCount.sID
and ccount >= 1 and ccount <= 10
order by #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID;
select count(*) from #temp_sID_wordCount; -- 34,860,090
truncate table #temp3_sID1_sID2_keep
declare cur cursor for
select top 10 ccount from #temp_count_wordID_sID group by ccount order by ccount
open cur
declare #count int, #sIDcur int
fetch next from cur into #count
while (##FETCH_STATUS = 0)
begin
--print (#count)
--select count(*), #count from #temp_sID_wordCount where #temp_sID_wordCount.ccount = #count
truncate table #temp3_wordID_sID_forThatCount
truncate table #temp3_sID1_sID2
-- wordID and sID for that unique word count
-- they can only be exact if they have the same word count
insert into #temp3_wordID_sID_forThatCount
select #temp_count_wordID_sID.wordID
, #temp_count_wordID_sID.sID
from #temp_count_wordID_sID
where #temp_count_wordID_sID.ccount = #count
order by #temp_count_wordID_sID.wordID, #temp_count_wordID_sID.sID
-- select count(*) from #temp3_wordID_sID_forThatCount
-- this has some duplicates
-- sID1 is the group
insert into #temp3_sID1_sID2
select w1.sID, w2.sID
from #temp3_wordID_sID_forThatCount as w1 with (nolock)
join #temp3_wordID_sID_forThatCount as w2 with (nolock)
on w1.wordID = w2.wordID
and w1.sID <= w2.sID
group by w1.sID, w2.sID
having count(*) = #count
order by w1.sID, w2.sID
-- get rid of the goups of 1
delete #temp3_sID1_sID2
where sID1 in (select sID1 from #temp3_sID1_sID2 group by sID1 having count(*) = 1)
-- get rid of the double dips
delete #temp3_sID1_sID2
where #temp3_sID1_sID2.sID1 in
(select distinct s1del.sID1 -- these are the double dips
from #temp3_sID1_sID2 as s1base with (nolock)
join #temp3_sID1_sID2 as s1del with (nolock)
on s1del.sID1 > s1base.sID1
and s1Del.sID1 = s1base.sID2)
insert into #temp3_sID1_sID2_keep
select #temp3_sID1_sID2.sID1
, #temp3_sID1_sID2.sID2
from #temp3_sID1_sID2 with (nolock)
order by #temp3_sID1_sID2.sID1, #temp3_sID1_sID2.sID2
fetch next from cur into #count
end
close cur
deallocate cur
select *
FROM #temp3_sID1_sID2_keep with (nolock)
order by 1,2
So, as I see, the task is to find equal subsets.
First we can find pairs of equal subsets:
;with tmp1 as (select sID, cnt = count(wordID) from [Table] group by sID)
select s1.sID, s2.sID
from tmp1 s1
cross join tmp1 s2
cross apply (
select count(1)
from [Table] d1
join [Table] d2 on d2.wordID = d1.wordID
where d1.sID = s1.sID and d2.sID = s2.sID
) c(cnt)
where s1.cnt = s2.cnt
and s1.sID > s2.sID
and s1.cnt = c.cnt
Output is:
sID sID
----------- -----------
6 2
7 5
8 5
8 7
And then pairs can be combined into groups, if necessary:
sID gNum
----------- -----------
2 1
6 1
5 2
7 2
8 2
See details in SqlFiddle sample below.
SqlFiddle Sample
The other approach is to calculate hash function for every subset data:
;with a as (
select distinct sID from [Table]
)
select sID,
hashbytes('sha1', (
select cast(wordID as varchar(10)) + '|'
from [Table]
where sID = a.sID
order by wordID
for xml path('')))
from a
Then subsets can be grouped based on hash value.
SqlFiddle Sample
The last one took less than a minute on my machine for a test data of about 10 million rows (20k sID values up to 1k wordID each). Also you can optimize it by excluding sIDs having no wordID count matches to any other.

Comparing data between rows within a table

I have following table structure
table 1
ID SOURCE_ID NAME
1 1 A
2 1 B
3 2 B
4 2 C
5 2 A
i need to pick those names which are common across all SOURCE_ID , hence i expect names A and B as they are present in both the SOURCE_ID 1,2.
The following query gives me the expected output:
SELECT DISTINCT NAME
FROM TABLE1 A, TABLE1 B
WHERE A.NAME = B.NAME AND A.SOURCE_ID != B.SOURCE_ID
Now when the data in table changes to include a new record ID 6
table 1
ID SOURCE_ID NAME
1 1 A
2 1 B
3 2 B
4 2 C
5 2 A
6 3 A
The name that is common in all three SOURCE_ID(1,2,3) IS A.
My query fails to return the correct output as new records are entered.
Please provide me a query that works correctly when new records are inserted.
Have a look at something like
DECLARE #Table TABLE(
SOURCE_ID INT,
NAME VARCHAR(20)
)
INSERT INTO #Table SELECT 1,'A'
INSERT INTO #Table SELECT 1,'B'
INSERT INTO #Table SELECT 2,'B'
INSERT INTO #Table SELECT 2,'C'
INSERT INTO #Table SELECT 2,'A'
--INSERT INTO #Table SELECT 3,'A'
;WITH DistinctCount AS (
SELECT NAME,
COUNT(DISTINCT SOURCE_ID) Cnt
FROM #Table
GROUP BY NAME
)
SELECT *
FROM DistinctCount
WHERE Cnt = (SELECT COUNT(DISTINCT SOURCE_ID) FROM #Table)
With the 6th insert commented out, should return A and B, with it included, should return A

Returning the parent/ child relationship on a self-joining table

I need to be able to return a list of all children given a parent Id at all levels using SQL.
The table looks something like this:
ID ParentId Name
---------------------------------------
1 null Root
2 1 Child of Root
3 2 Child of Child of Root
Give an Id of '1', how would I return the entire list...? There is no limitation on the depth of the nesting either...
Thanks,
Kieron
To get all children for a given #ParentId stored in that manner you could use a recursive CTE.
declare #ParentId int
--set #ParentId = 1
;WITH T AS
(
select 1 AS ID,null AS ParentId, 'Root' as [Name] union all
select 2,1,'Child of Root' union all
select 3,2,'Child of Child of Root'
),
cte AS
(
SELECT ID, ParentId, Name
FROM T
WHERE ParentId = #ParentId OR (ParentId IS NULL AND #ParentId IS NULL)
UNION ALL
SELECT T.ID, T.ParentId, T.Name
FROM T
JOIN cte c ON c.ID = T.ParentId
)
SELECT ID, ParentId, Name
FROM cte
OPTION (MAXRECURSION 0)