SQL Server flatten hierarchy table - tsql

I have this data in a recursive table, and I want to flatten this hierarchy information. I have accomplished this by doing a query but I was wondering if there is a better way to get the same result.
Data that I have:
CREATE TABLE [dbo].[ElementosGeograficos_](
[IdElement] [int] IDENTITY(1,1) NOT NULL,
[Name] [varchar](50) NOT NULL,
[IdParentElement] [int] NULL,
[IdLevel] [int] NOT NULL,
CONSTRAINT [PK_ElementosGeograficos_]
PRIMARY KEY CLUSTERED ([IdElement] ASC)
GO
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Colombia',null,1
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Venezuela',null,1
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Cundinamarca',1,2
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Antioquia',1,2
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Valle',1,2
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Distrito Capital',2,2
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Bogota',3,3
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Medellin',4,3
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Cali',5,3
insert ElementosGeograficos_ ( Name,IdParentElement,IdLevel ) select 'Caracaas',6,3
Query result that I need:
- NUM 1 2 3
- 1 Colombia Null Null
- 2 Venezuela Null Null
- 3 Colombia Cundinamarca Null
- 4 Colombia Antioquia Null
- 5 Colombia Valle Null
- 6 Venezuela Distrito Capital Null
- 7 Colombia Cundinamarca Bogota
- 8 Colombia Antioquia Medellin
- 9 Colombia Valle Cali
- 10 Venezuela Distrito Capital Caracaas
Query that I have:
SELECT
EG1.IdElementoGeografico, EG1.Nombre 'Pais',
EG2.Nombre 'Departamento', EG3.Nombre 'Ciudad'
FROM
ElementosGeograficos_ EG1
LEFT JOIN
ElementosGeograficos_ EG2 ON EG2.IdElementoGeografico = EG1.IdElementoPadre
LEFT JOIN
ElementosGeograficos_ EG3 ON EG3.IdElementoGeografico = EG2.IdElementoPadre
WHERE
EG1.IdNivelGeografico = 1
UNION
SELECT
EG1.IdElementoGeografico, EG2.Nombre 'Pais',
EG1.Nombre 'Departamento', EG3.Nombre 'Ciudad'
FROM
ElementosGeograficos_ EG1
LEFT JOIN
ElementosGeograficos_ EG2 ON EG2.IdElementoGeografico = EG1.IdElementoPadre
LEFT JOIN
ElementosGeograficos_ EG3 ON EG3.IdElementoGeografico = EG2.IdElementoPadre
WHERE
EG1.IdNivelGeografico = 2
UNION
SELECT
EG1.IdElementoGeografico, EG3.Nombre 'Pais',
EG2.Nombre 'Departamento', EG1.Nombre 'Ciudad'
FROM
ElementosGeograficos_ EG1
LEFT JOIN
ElementosGeograficos_ EG2 ON EG2.IdElementoGeografico = EG1.IdElementoPadre
LEFT JOIN
ElementosGeograficos_ EG3 ON EG3.IdElementoGeografico = EG2.IdElementoPadre
WHERE
EG1.IdNivelGeografico = 3

Since you seem to have two known levels you can just use a self-join, coalesce and a case statement
SELECT
first.IdElement num,
COALESCE(third.name, second.Name, first.Name) [1],
CASE WHEN Third.Name IS NOT NULL THEN second.Name
WHEN SECOND.Name IS NOT NULL THEN first.Name
END as [2],
CASE WHEN Third.Name IS NOT NULL and second.Name IS NOT NULL THEN First.Name
END [3]
FROM
ElementosGeograficos_ first
LEFT JOIN ElementosGeograficos_ second
ON first.IDParentElement = second.IdElement
LEFT JOIN ElementosGeograficos_ third
ON second.IDParentElement = third.IdElement
DEMO

Related

Flatten recursive data

I have a table glclassifications (Exact software) that looks like this:
CREATE TABLE [glclassifications](
[ID] [nvarchar](500) NULL, --(contains uniqueidentifier)
[Code] [nvarchar](500) NULL,
[Description] [nvarchar](500) NULL,
[Parent] [nvarchar](500) NULL --(contains uniqueidentifier)
)
It holds classifications that are hierarchical (hence the Parent column that refers to the ID of another record.)
I need this table to be 'flattened' so that I have
Parent_ID,
Parent_code,
Parent_description,
--child1
Child1_ID,
Child1_code,
Child1_description,
--child2
Child2_ID....
-- up until including child9
and so on.
My predecessor left me with this query that violates a lot of the designprinciples that we agreed upon. One of those is not using (temp)tables. I would like to simplify this script and preferably get the output as a view and not as a table. A catch is that the resulting view needs to deliver columns for 8 children, even if there are less in the table...
/* drop duplicates in glclassifications */
WITH CTE AS (
SELECT ROW_NUMBER() OVER (
PARTITION BY ID
ORDER BY ( SELECT 0)
) RN
FROM [dbo].[glclassifications]
)
DELETE FROM CTE
WHERE RN > 1;
/* drop tmp table */
DROP TABLE IF EXISTS #H;
CREATE TABLE #H (
Code varchar(100),
Description varchar(250),
ID varchar(100),
Division int
)
/* declare table */
DECLARE #n INT = 1
DECLARE #Sql varchar(50)
WHILE #n < 10 BEGIN
DECLARE #col_name varchar(10) = CONCAT('Code_' , #n)
SET #Sql = 'ALTER TABLE #H ADD ' + QUOTENAME(#col_name) + ' varchar(20) NULL'
EXEC (#Sql)
SET #col_name = CONCAT('Name_' , #n)
SET #Sql = 'ALTER TABLE #H ADD ' + QUOTENAME(#col_name) + ' varchar(250) NULL'
EXEC (#Sql)
SET #n = #n + 1
END
/* insert level 1 */
INSERT INTO #H (Code,[Description],ID,Division,Code_1, Name_1)
SELECT
Code,Description,ID,bip_office,Code,Description
FROM
[dbo].[glclassifications]
WHERE Parent = ''
/* insert level 2 */
INSERT INTO #H (Code,[Description],ID,Division,Code_1, Name_1, Code_2, Name_2)
SELECT
e.Code,e.Description,e.Id,e.bip_office,Code_1,Name_1,e.Code,e.Description
FROM
[dbo].[glclassifications] e
JOIN #H h on h.ID = e.Parent
WHERE e.ID not in (SELECT DISTINCT ID from #H ) AND Parent IS NOT NULL ;
/* insert level 3 */
INSERT INTO #H (Code,[Description],ID,Division,Code_1, Name_1, Code_2, Name_2,Code_3, Name_3)
SELECT
e.Code,e.Description,e.Id,e.bip_office, Code_1,Name_1,Code_2, Name_2,e.Code,e.Description
FROM
[dbo].[glclassifications] e
JOIN #H h on h.ID = e.Parent
WHERE e.ID not in (SELECT DISTINCT ID from #H ) AND Parent IS NOT NULL ;
/* insert level 4 */
INSERT INTO #H (Code,[Description],ID,Division,Code_1, Name_1, Code_2, Name_2,Code_3, Name_3, Code_4, Name_4)
SELECT
e.Code,e.Description,e.Id,e.bip_office, Code_1,Name_1,Code_2, Name_2,Code_3, Name_3,e.Code,e.Description
FROM
[dbo].[glclassifications] e
JOIN #H h on h.ID = e.Parent
WHERE e.ID not in (SELECT DISTINCT ID from #H ) AND Parent IS NOT NULL ;
/* insert level 5 */
INSERT INTO #H (Code,[Description],ID,Division,Code_1, Name_1, Code_2, Name_2,Code_3, Name_3, Code_4, Name_4, Code_5, Name_5)
SELECT
e.Code,e.Description,e.Id,e.bip_office,Code_1,Name_1,Code_2, Name_2,Code_3, Name_3,Code_4, Name_4,e.Code,e.Description
FROM
[dbo].[glclassifications] e
JOIN #H h on h.ID = e.Parent
WHERE e.ID not in (SELECT DISTINCT ID from #H ) AND Parent IS NOT NULL ;
/* insert level 6 */
INSERT INTO #H (Code,[Description],ID,Division,Code_1, Name_1, Code_2, Name_2,Code_3, Name_3, Code_4, Name_4, Code_5, Name_5,Code_6, Name_6)
SELECT
e.Code,e.Description,e.Id,e.bip_office,Code_1,Name_1,Code_2, Name_2,Code_3, Name_3,Code_4, Name_4,Code_5, Name_5,e.Code,e.Description
FROM
[dbo].[glclassifications] e
JOIN #H h on h.ID = e.Parent
WHERE e.ID not in (SELECT DISTINCT ID from #H ) AND Parent IS NOT NULL ;
/* insert level 7 */
INSERT INTO #H (Code,[Description],ID,Division,Code_1, Name_1, Code_2, Name_2,Code_3, Name_3, Code_4, Name_4, Code_5, Name_5,Code_6, Name_6,Code_7, Name_8)
SELECT
e.Code,e.Description,e.Id,e.bip_office,Code_1,Name_1,Code_2, Name_2,Code_3, Name_3,Code_4, Name_4,Code_5, Name_5,Code_6, Name_6,e.Code,e.Description
FROM
[dbo].[glclassifications] e
JOIN #H h on h.ID = e.Parent
WHERE e.ID not in (SELECT DISTINCT ID from #H ) AND Parent IS NOT NULL;
/* store to table */
DROP TABLE IF EXISTS [dbo].[gb-hierarchie];
SELECT hie.*
INTO [dbo].[gb-hierarchie]
FROM #H hie
I did this
WITH cte_org AS (
SELECT
ID,
Code,
Description,
Parent
FROM
[dbo].[glclassifications]
WHERE len(parent) = 0
UNION ALL
SELECT
e.ID,
e.Code,
e.Description,
e.parent
FROM
[dbo].[glclassifications] e
INNER JOIN cte_org o
ON o.ID = e.parent
)
SELECT * FROM cte_org;
but how to continue?
With thanks to Bill Jetzer: simply joining the tables on itself 8 times was the easiest way to get these results

sql recursion: find tree given middle node

I need to get a tree of related nodes given a certain node, but not necessary top node. I've got a solution using two CTEs, since I am struggling to squeeze it all into one CTE :). Might somebody have a sleek solution to avoid using two CTEs? Here is some code that I was playing with:
DECLARE #temp AS TABLE (ID INT, ParentID INT)
INSERT INTO #temp
SELECT 1 ID, NULL AS ParentID
UNION ALL
SELECT 2, 1
UNION ALL
SELECT 3, 2
UNION ALL
SELECT 4, 3
UNION ALL
SELECT 5, 4
UNION ALL
SELECT 6, NULL
UNION ALL
SELECT 7, 6
UNION ALL
SELECT 8, 7
DECLARE #startNode INT = 4
;WITH TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM #temp
WHERE ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
;WITH Up(ID,ParentID)
AS (
SELECT t.id, t.ParentID
FROM #temp t
WHERE t.ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN Up c ON t.id = c.ParentID
)
--SELECT * FROM Up
,TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM Up
WHERE ParentID is null
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
thanks
Meh. This avoids using two CTEs, but the result is a brute force kludge that hardly qualifies as "sleek" as it won’t be efficient if your table is at all sizeable. It will:
Recursively build all possible hierarchies
As you build them, flag the target NodeId as you find it
Return only the targeted tree
I threw in column “TreeNumber” on the off-chance the TargetId appears in multiple hierarchies, or if you’d ever have multiple values to check in one pass. “Depth” was added to make the output a bit more legible.
A more complex solution like #John’s might do, and more and subtler tricks could be done with more detailed table sturctures.
DECLARE #startNode INT = 4
;WITH cteAllTrees (TreeNumber, Depth, ID, ParentID, ContainsTarget)
AS (
SELECT
row_number() over (order by ID) TreeNumber
,1
,ID
,ParentID
,case
when ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp
WHERE ParentId is null
UNION ALL
SELECT
tr.TreeNumber
,tr.Depth + 1
,t.id
,t.ParentID
,case
when tr.ContainsTarget = 1 then 1
when t.ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp t
INNER JOIN cteAllTrees tr
ON t.ParentID = tr.ID
)
SELECT
TreeNumber
,Depth
,ID
,ParentId
from cteAllTrees
where TreeNumber in (select TreeNumber from cteAllTrees where ContainsTarget = 1)
order by
TreeNumber
,Depth
,ID
Here is a technique where you can select the entire hierarchy, a specific node with all its children, and even a filtered list and how they roll.
Note: See the comments next to the DECLAREs
Declare #YourTable table (id int,pt int,name varchar(50))
Insert into #YourTable values
(1,null,'1'),(2,1,'2'),(3,1,'3'),(4,2,'4'),(5,2,'5'),(6,3,'6'),(7,null,'7'),(8,7,'8')
Declare #Top int = null --<< Sets top of Hier Try 2
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
Declare #Filter varchar(25) = '' --<< Empty for All or try 4,6
;with cteP as (
Select Seq = cast(1000+Row_Number() over (Order by name) as varchar(500))
,ID
,pt
,Lvl=1
,name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(pt,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',1000+Row_Number() over (Order by r.name)) as varchar(500))
,r.ID
,r.pt
,p.Lvl+1
,r.name
From #YourTable r
Join cteP p on r.pt = p.ID)
,cteR1 as (Select *,R1=Row_Number() over (Order By Seq) From cteP)
,cteR2 as (Select A.Seq,A.ID,R2=Max(B.R1) From cteR1 A Join cteR1 B on (B.Seq like A.Seq+'%') Group By A.Seq,A.ID )
Select Distinct
A.R1
,B.R2
,A.ID
,A.pt
,A.Lvl
,name = Replicate(#Nest,A.Lvl-1) + A.name
From cteR1 A
Join cteR2 B on A.ID=B.ID
Join (Select R1 From cteR1 where IIF(#Filter='',1,0)+CharIndex(concat(',',ID,','),concat(',',#Filter+','))>0) F on F.R1 between A.R1 and B.R2
Order By A.R1

postgres hierarchy - count of child levels and sort by date of children or grandchildren

I would like to know how to write a postgres subquery so that the following table example will output what I need.
id parent_id postdate
1   -1 2015-03-10
2     1 2015-03-11 (child level 1)
3     1 2015-03-12 (child level 1)
4     3 2015-03-13 (child level 2)
5    -1 2015-03-14
6    -1 2015-03-15
7     6 2015-03-16 (child level 1)
If I want to sort all the root ids by child level 1 with a count of children(s) from the parent, the output would be something like this
id count  date
6   2    2015-03-15
1   4    2015-03-10
5   1    2015-03-14
The output is sorted by postdate based on the root's child. The 'date' being outputted is the date of the root's postdate. Even though id#5 has a more recent postdate, the rootid#6's child (id#7) has the most recent postdate because it is being sorted by child's postdate. id#5 doesnt have any children so it just gets placed at the end, sorted by date. The 'count' is the number children(child level 1), grandchildren(child level 2) and itself (root). For instance, id #2,#3,#4 all belong to id#1 so for id#1, the count would be 4.
My current subquery thus far:
SELECT p1.id,count(p1.id),p1.postdate
FROM mytable p1
LEFT JOIN mytable c1 ON c1.parent_id = p1.id AND p1.parent_id = -1
LEFT JOIN mytable c2 ON c2.parent_id = c1.id AND p1.parent_id = -1
GROUP BY p1.id,c1.postdate,p1.postdate
ORDER by c1.postdate DESC,p1.postdate DESC
create table mytable ( id serial primary key, parent_id int references mytable, postdate date );
create index mytable_parent_id_idx on mytable (parent_id);
insert into mytable (id, parent_id, postdate) values (1, null, '2015-03-10');
insert into mytable (id, parent_id, postdate) values (2, 1, '2015-03-11');
insert into mytable (id, parent_id, postdate) values (3, 1, '2015-03-12');
insert into mytable (id, parent_id, postdate) values (4, 3, '2015-03-13');
insert into mytable (id, parent_id, postdate) values (5, null, '2015-03-14');
insert into mytable (id, parent_id, postdate) values (6, null, '2015-03-15');
insert into mytable (id, parent_id, postdate) values (7, 6, '2015-03-16');
with recursive recu as (
select id as parent, id as root, null::date as child_postdate
from mytable
where parent_id is null
union all
select r.parent, mytable.id, mytable.postdate
from recu r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate, c.max_child_date
from mytable m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date
from recu
group by parent
) c on c.parent = m.id
order by c.max_child_date desc nulls last, m.postdate desc;
You'll need a recursive query to count the elements in the subtrees:
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY t.id
;
UPDATE (it appears the OP also wants the maxdate per tree)
-- The same, but also select the postdate
-- --------------------------------------
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
, postdate AS postdate
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
-- , GREATEST(o.postdate,t.postdate) AS postdate
, t.postdate AS postdate
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
, c.maxdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
, MAX(o.postdate) AS maxdate -- and obtain the max()
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY c.maxdate, t.id
;
After looking at everyone's code, I created the subquery I needed. I can use PHP to vary the 'case when' code depending on the user's sort selection. For instance, the code below will sort the root nodes based on child level 1's postdate.
with recursive cte as (
select id as parent, id as root, null::timestamp as child_postdate,0 as depth
from mytable
where parent_id = -1
union all
select r.parent, mytable.id, mytable.postdate,depth+1
from cte r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate
from ssf.dtb_021 m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date,depth
from cte
group by parent,depth
) c on c.parent = m.id
order by
case
when depth=2 then 1
when depth=1 then 2
else 0
end DESC,
c.max_child_date desc nulls last, m.postdate desc;
select
p.id,
(1+c.n) as parent_post_plus_number_of_subposts,
p.postdate
from
table as p
inner join
(
select
parent_id, count(*) as n, max(postdate) as _postdate
from table
group by parent_id
) as c
on p.id = c.parent_id
where p.parent_id = -1
order by c._postdate desc

PostgreSQL grouping

I would like to group values according to values in over columns.
This is an example:
I would like to get the output:
{{-30,-50,20},{-20,30,60},{-30,NULL or other value, 20}}
I managed to arrive to:
SELECT array_agg("val")
FROM my_table
WHERE "t_id" = 1
GROUP BY "m_id";
{{-30,-50,20},{-20,30,60},{-30,20}}
What would be the best approach?
create table my_table (
t_id int,
m_id int,
s_id int,
val int
);
insert into my_table (t_id, m_id, s_id, val) values
(1,1,1,-30),
(1,1,2,-50),
(1,1,3,20),
(1,2,1,-20),
(1,2,2,30),
(1,2,3,60),
(1,3,1,-30),
(1,3,3,20);
select array_agg(val order by s_id)
from
my_table t
right join
(
(
select distinct t_id, m_id
from my_table
) a
cross join
(
select distinct s_id
from my_table
) b
) s using (t_id, m_id, s_id)
where t_id = 1
group by m_id
order by m_id
;
array_agg
---------------
{-30,-50,20}
{-20,30,60}
{-30,NULL,20}

Using CTE instead of Cursor

I have the following table structure.
I just want to update SubId to all the rows where it is null and where the RawLineNumber is ascending by 1 and also the SeqNumber ascending by 1.
RawlineNumber Claimid SubId SeqNumber
1 6000 A100 1
2 6000 NULL 2
3 6000 NULL 3
10 6000 A200 1
11 6000 NULL 2
25 6000 A300 1
26 6000 NULL 2
27 6000 NULL 3
I want to update
SubId of RawLineNumber 2 and 3 with A100,
SubId of RawLineNumber 11 with A200,
SubId of RawLineNumber 26 and 27 with A300.
I have a cursor which does the job but can I have a CTE to take care of it ?
UPDATE m
SET subid = q.subid
FROM mytable m
CROSS APPLY
(
SELECT TOP 1 subid
FROM mytable mi
WHERE mi.rawLineNumber < m.rawLineNumber
AND mi.subid IS NOT NULL
ORDER BY
rawLineNumber DESC
) q
WHERE m.subid IS NULL
Since a recusive solution was requested, I decided to write one. Also it works for gaps in Seqnumbers and RawlineNumber
declare #t table (RawlineNumber int, Claimid int, SubId varchar(5), SeqNumber int)
insert #t values(1, 6000, 'A100', 1)
insert #t values(2, 6000, NULL, 2)
insert #t values(3, 6000, NULL, 3)
insert #t values(10, 6000, 'A200', 1)
insert #t values(11, 6000, NULL, 2)
insert #t values(25, 6000, 'A300', 1)
insert #t values(26, 6000, NULL, 2)
insert #t values(27, 6000, NULL, 3)
;with cte as
(
select Rawlinenumber, SeqNumber, SubId
from #t where SubId is not null and SeqNumber = 1
union all
select t.Rawlinenumber, t.SeqNumber, c.SubId
from cte c
join
#t t
on c.Rawlinenumber + 1 = t.Rawlinenumber
and c.SeqNumber + 1 = t.SeqNumber
where t.SubId is null and t.SeqNumber > 1
)
update t
set SubId = c.SubId
from #t t join cte c
on c.Rawlinenumber = t.Rawlinenumber
where t.SeqNumber > 1
select * from #t
A not-so simple SQL script should achieve what you want:
update my_table t1 set t1.subid =
(select t2.subid from my_table t2
where t2.rawlinenumber < t1.rawlinenumber
and t2.seqnumber = 1
and t2.rawlinenumber = (
select max(t3.rawlinenumber)
from my_table t3
where t3.seq_number = 1
and t3.rawlinenumber <= t2.rawlinenumber)
where t1.subid is null;
The inner subselect (T3) gives us the last row having seqnumber = 1 before the current line,
the outer subselect gives us the SubID for this row (using windowing functions would be more efficient, but since you didn't mention a specific RDBMS, I stick with this :-) )