postgres hierarchy - count of child levels and sort by date of children or grandchildren - postgresql

I would like to know how to write a postgres subquery so that the following table example will output what I need.
id parent_id postdate
1   -1 2015-03-10
2     1 2015-03-11 (child level 1)
3     1 2015-03-12 (child level 1)
4     3 2015-03-13 (child level 2)
5    -1 2015-03-14
6    -1 2015-03-15
7     6 2015-03-16 (child level 1)
If I want to sort all the root ids by child level 1 with a count of children(s) from the parent, the output would be something like this
id count  date
6   2    2015-03-15
1   4    2015-03-10
5   1    2015-03-14
The output is sorted by postdate based on the root's child. The 'date' being outputted is the date of the root's postdate. Even though id#5 has a more recent postdate, the rootid#6's child (id#7) has the most recent postdate because it is being sorted by child's postdate. id#5 doesnt have any children so it just gets placed at the end, sorted by date. The 'count' is the number children(child level 1), grandchildren(child level 2) and itself (root). For instance, id #2,#3,#4 all belong to id#1 so for id#1, the count would be 4.
My current subquery thus far:
SELECT p1.id,count(p1.id),p1.postdate
FROM mytable p1
LEFT JOIN mytable c1 ON c1.parent_id = p1.id AND p1.parent_id = -1
LEFT JOIN mytable c2 ON c2.parent_id = c1.id AND p1.parent_id = -1
GROUP BY p1.id,c1.postdate,p1.postdate
ORDER by c1.postdate DESC,p1.postdate DESC

create table mytable ( id serial primary key, parent_id int references mytable, postdate date );
create index mytable_parent_id_idx on mytable (parent_id);
insert into mytable (id, parent_id, postdate) values (1, null, '2015-03-10');
insert into mytable (id, parent_id, postdate) values (2, 1, '2015-03-11');
insert into mytable (id, parent_id, postdate) values (3, 1, '2015-03-12');
insert into mytable (id, parent_id, postdate) values (4, 3, '2015-03-13');
insert into mytable (id, parent_id, postdate) values (5, null, '2015-03-14');
insert into mytable (id, parent_id, postdate) values (6, null, '2015-03-15');
insert into mytable (id, parent_id, postdate) values (7, 6, '2015-03-16');
with recursive recu as (
select id as parent, id as root, null::date as child_postdate
from mytable
where parent_id is null
union all
select r.parent, mytable.id, mytable.postdate
from recu r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate, c.max_child_date
from mytable m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date
from recu
group by parent
) c on c.parent = m.id
order by c.max_child_date desc nulls last, m.postdate desc;

You'll need a recursive query to count the elements in the subtrees:
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY t.id
;
UPDATE (it appears the OP also wants the maxdate per tree)
-- The same, but also select the postdate
-- --------------------------------------
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
, postdate AS postdate
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
-- , GREATEST(o.postdate,t.postdate) AS postdate
, t.postdate AS postdate
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
, c.maxdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
, MAX(o.postdate) AS maxdate -- and obtain the max()
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY c.maxdate, t.id
;

After looking at everyone's code, I created the subquery I needed. I can use PHP to vary the 'case when' code depending on the user's sort selection. For instance, the code below will sort the root nodes based on child level 1's postdate.
with recursive cte as (
select id as parent, id as root, null::timestamp as child_postdate,0 as depth
from mytable
where parent_id = -1
union all
select r.parent, mytable.id, mytable.postdate,depth+1
from cte r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate
from ssf.dtb_021 m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date,depth
from cte
group by parent,depth
) c on c.parent = m.id
order by
case
when depth=2 then 1
when depth=1 then 2
else 0
end DESC,
c.max_child_date desc nulls last, m.postdate desc;

select
p.id,
(1+c.n) as parent_post_plus_number_of_subposts,
p.postdate
from
table as p
inner join
(
select
parent_id, count(*) as n, max(postdate) as _postdate
from table
group by parent_id
) as c
on p.id = c.parent_id
where p.parent_id = -1
order by c._postdate desc

Related

recursive query to replicate/imitate dense_rank

BEGIN;
CREATE temp TABLE teacher (
name text,
salary numeric
);
INSERT INTO teacher
VALUES ('b1', 90000);
INSERT INTO teacher
VALUES ('f1', 87000);
INSERT INTO teacher
VALUES ('a', 65000),
('b', 90000),
('c', 40000),
('d', 95000),
('e', 60000),
('f', 87000);
COMMIT;
query
with recursive cte as(
(select name, salary, 1 as rn
from teacher order by salary desc limit 1)
union all
select l.* from cte c cross join lateral(
select name, salary, rn + 1 from teacher t
where t.salary < c.salary
order by salary desc
limit 1
) l
)
table cte order by salary desc;
If all salary are distinct,then above mentioned query can imitate as rank/row_number.
I am wondering how to use recursive query to replicate/imitate dense_rank.
related post: https://dba.stackexchange.com/questions/286627/get-top-two-rows-per-group-efficiently

Recursive CTE and multiple inserts in joined table

I'm searching to copy nodes of a hierarchical tree and to apply the changes onto a joined table. I found parts of the answer in other questions like Postgresql copy data within the tree table for the tree copy (in my case I only copy the children and not the root) and PostgreSQL - Insert data into multiple tables simultaneously to insert data in several table simultaneously, but I don't manage to mix them.
I would like to:
Generate the new nodes id from the fields table
Insert the new field ids in the data_versions table
Insert the new nodes in the fields table with the data_id from the data_versions table
Note: there is a circular reference between the fields and the data_versions tables.
See below the schema:
Here is a working query, but without the insert in the data_versions table. It is only a shallow copy (keeping the same data_id) while I would like a deep copy:
WITH created_data AS (
WITH RECURSIVE cte AS (
SELECT *, nextval('fields_id_seq') new_id FROM fields WHERE parent_id = :source_field_id
UNION ALL
SELECT fields.*, nextval('fields_id_seq') new_id FROM cte JOIN fields ON cte.id = fields.parent_id
)
SELECT C1.new_id, C1.name, C1.field_type, C1.data_id, C2.new_id new_parent_id
FROM cte C1 LEFT JOIN cte C2 ON C1.parent_id = C2.id
)
INSERT INTO fields (id, name, parent_id, field_type, data_id)
SELECT new_id, name, COALESCE(new_parent_id, :target_field_id), field_type, data_id FROM created_data
RETURNING id, name, parent_id, field_type, data_id;
And here is the draft query I'm working on for inserting data in the data_versions table resulting with WITH clause containing a data-modifying statement must be at the top level as an error:
WITH created_data AS (
WITH cloned_fields AS (
WITH RECURSIVE cte AS (
SELECT *, nextval('fields_id_seq') new_id FROM fields WHERE parent_id = :source_field_id
UNION ALL
SELECT fields.*, nextval('fields_id_seq') new_id FROM cte JOIN fields ON cte.id = fields.parent_id
)
SELECT C1.new_id, C1.name, C1.field_type, C1.data_id, C2.new_id new_parent_id
FROM cte C1 LEFT JOIN cte C2 ON C1.parent_id = C2.id
),
cloned_data AS (
INSERT INTO data_versions (value, author, field_id)
SELECT d.value, d.author, c.new_id
FROM cloned_fields c
INNER JOIN data_versions d ON c.data_id = d.id
RETURNING id data_id
)
SELECT cloned_fields.new_id, cloned_fields.name, cloned_fields.field_type, cloned_fields.new_parent_id, cloned_data.data_id
FROM cloned_fields
INNER JOIN cloned_data ON cloned_fields.data_id = cloned_data.id
)
INSERT INTO fields (id, name, parent_id, field_type, data_id)
SELECT new_id, name, COALESCE(new_parent_id, :target_field_id), field_type, data_id FROM created_data
RETURNING id, name, parent_id, field_type, data_id, value data;
If other people were encountering the same issue as me, I came up with this solution some months later. The trick was to move the data-modifying CTE at the top level as suggested by the error message. We can always access previously declared CTE's:
WITH new_fields_ids AS (
WITH RECURSIVE cte AS (
SELECT *, nextval('fields_id_seq') new_id FROM fields WHERE parent_id = :source_field_id
UNION ALL
SELECT fields.*, nextval('fields_id_seq') new_id FROM cte JOIN fields ON cte.id = fields.parent_id
)
SELECT C1.new_id, C1.name, C1.field_type, C1.data_id, C2.new_id new_parent_id
FROM cte C1 LEFT JOIN cte C2 ON C1.parent_id = C2.id
),
cloned_data AS (
INSERT INTO data_versions (value, author, field_id)
SELECT d.value, d.author, c.new_id
FROM new_fields_ids c
INNER JOIN data_versions d ON c.data_id = d.id
RETURNING id AS data_id, field_id, value
),
created_data AS (
SELECT new_fields_ids.new_id, new_fields_ids.name, new_fields_ids.field_type, new_fields_ids.new_parent_id, cloned_data.data_id
FROM new_fields_ids
INNER JOIN cloned_data ON new_fields_ids.new_id = cloned_data.field_id
),
cloned_fields AS (
INSERT INTO fields (id, name, parent_id, field_type, data_id)
SELECT new_id, name, COALESCE(new_parent_id, :target_field_id), field_type, data_id FROM created_data
RETURNING id, name, parent_id, field_type, data_id
)
SELECT f.id, f.name, f.parent_id, f.field_type, f.data_id, d.value AS data FROM cloned_fields f
INNER JOIN cloned_data d ON f.id = d.field_id;

sql recursion: find tree given middle node

I need to get a tree of related nodes given a certain node, but not necessary top node. I've got a solution using two CTEs, since I am struggling to squeeze it all into one CTE :). Might somebody have a sleek solution to avoid using two CTEs? Here is some code that I was playing with:
DECLARE #temp AS TABLE (ID INT, ParentID INT)
INSERT INTO #temp
SELECT 1 ID, NULL AS ParentID
UNION ALL
SELECT 2, 1
UNION ALL
SELECT 3, 2
UNION ALL
SELECT 4, 3
UNION ALL
SELECT 5, 4
UNION ALL
SELECT 6, NULL
UNION ALL
SELECT 7, 6
UNION ALL
SELECT 8, 7
DECLARE #startNode INT = 4
;WITH TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM #temp
WHERE ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
;WITH Up(ID,ParentID)
AS (
SELECT t.id, t.ParentID
FROM #temp t
WHERE t.ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN Up c ON t.id = c.ParentID
)
--SELECT * FROM Up
,TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM Up
WHERE ParentID is null
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
thanks
Meh. This avoids using two CTEs, but the result is a brute force kludge that hardly qualifies as "sleek" as it won’t be efficient if your table is at all sizeable. It will:
Recursively build all possible hierarchies
As you build them, flag the target NodeId as you find it
Return only the targeted tree
I threw in column “TreeNumber” on the off-chance the TargetId appears in multiple hierarchies, or if you’d ever have multiple values to check in one pass. “Depth” was added to make the output a bit more legible.
A more complex solution like #John’s might do, and more and subtler tricks could be done with more detailed table sturctures.
DECLARE #startNode INT = 4
;WITH cteAllTrees (TreeNumber, Depth, ID, ParentID, ContainsTarget)
AS (
SELECT
row_number() over (order by ID) TreeNumber
,1
,ID
,ParentID
,case
when ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp
WHERE ParentId is null
UNION ALL
SELECT
tr.TreeNumber
,tr.Depth + 1
,t.id
,t.ParentID
,case
when tr.ContainsTarget = 1 then 1
when t.ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp t
INNER JOIN cteAllTrees tr
ON t.ParentID = tr.ID
)
SELECT
TreeNumber
,Depth
,ID
,ParentId
from cteAllTrees
where TreeNumber in (select TreeNumber from cteAllTrees where ContainsTarget = 1)
order by
TreeNumber
,Depth
,ID
Here is a technique where you can select the entire hierarchy, a specific node with all its children, and even a filtered list and how they roll.
Note: See the comments next to the DECLAREs
Declare #YourTable table (id int,pt int,name varchar(50))
Insert into #YourTable values
(1,null,'1'),(2,1,'2'),(3,1,'3'),(4,2,'4'),(5,2,'5'),(6,3,'6'),(7,null,'7'),(8,7,'8')
Declare #Top int = null --<< Sets top of Hier Try 2
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
Declare #Filter varchar(25) = '' --<< Empty for All or try 4,6
;with cteP as (
Select Seq = cast(1000+Row_Number() over (Order by name) as varchar(500))
,ID
,pt
,Lvl=1
,name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(pt,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',1000+Row_Number() over (Order by r.name)) as varchar(500))
,r.ID
,r.pt
,p.Lvl+1
,r.name
From #YourTable r
Join cteP p on r.pt = p.ID)
,cteR1 as (Select *,R1=Row_Number() over (Order By Seq) From cteP)
,cteR2 as (Select A.Seq,A.ID,R2=Max(B.R1) From cteR1 A Join cteR1 B on (B.Seq like A.Seq+'%') Group By A.Seq,A.ID )
Select Distinct
A.R1
,B.R2
,A.ID
,A.pt
,A.Lvl
,name = Replicate(#Nest,A.Lvl-1) + A.name
From cteR1 A
Join cteR2 B on A.ID=B.ID
Join (Select R1 From cteR1 where IIF(#Filter='',1,0)+CharIndex(concat(',',ID,','),concat(',',#Filter+','))>0) F on F.R1 between A.R1 and B.R2
Order By A.R1

SQL Server : group by with corresponding row values

I need to write a T-SQL group by query for a table with multiple dates and seq columns:
DROP TABLE #temp
CREATE TABLE #temp(
id char(1),
dt DateTime,
seq int)
Insert into #temp values('A','2015-03-31 10:00:00',1)
Insert into #temp values('A','2015-08-31 10:00:00',2)
Insert into #temp values('A','2015-03-31 10:00:00',5)
Insert into #temp values('B','2015-09-01 10:00:00',1)
Insert into #temp values('B','2015-09-01 10:00:00',2)
I want the results to contains only the items A,B with their latest date and the corresponding seq number, like:
id MaxDate CorrespondentSeq
A 2015-08-31 10:00:00.000 2
B 2015-09-01 10:00:00.000 2
I am trying with (the obviously wrong!):
select id, max(dt) as MaxDate, max(seq) as CorrespondentSeq
from #temp
group by id
which returns:
id MaxDate CorrespondentSeq
A 2015-08-31 10:00:00.000 5 <-- 5 is wrong
B 2015-09-01 10:00:00.000 2
How can I achieve that?
EDIT
The dt datetime column has duplicated values (exactly same date!)
I am using SQL Server 2005
You can use a ranking subselect to get only the highest ranked entries for an id:
select id, dt, seq
from (
select id, dt, seq, rank() over (partition by id order by dt desc, seq desc) as r
from #temp
) ranked
where r=1;
SELECT ID, DT, SEQ
FROM (
SELECT ID, DT, SEQ, Row_Number()
OVER (PARTITION BY id ORDER BY dt DESC, seq DESC) AS row_number
FROM temp
) cte
WHERE row_number = 1;
Demo : http://www.sqlfiddle.com/#!3/3e3d5/5
With trial and errors maybe I have found a solution, but I'm not completely sure this is correct:
select A.id, B.dt, max(B.seq)
from (select id, max(dt) as maxDt
from #temp
group by id) as A
inner join #temp as B on A.id = B.id AND A.maxDt = B.dt
group by A.id, B.dt
Select id, dt, seq
From #temp t
where dt = (Select Max(dt) from #temp
Where id = t.Id)
If there are duplicate rows, then you also need to specify what the query processor should use to determine which of the duplicates to return. Say you want the lowest value of seq,
Then you could write:
Select id, dt, seq
From #temp t
where dt = (Select Max(dt) from #temp
Where id = t.Id)
and seq = (Select Min(Seq) from #temp
where id = t.Id
and dt = t.dt)

getting distinct rows based on two column values

I am trying to get distinct rows from a temporary table and output them to an aspx page. I am trying to use the value of one column and get the last entry made into that column.
I have been trying to use inner join and max(). However i have been unsuccessful.
Here is the code i have been trying to do it with.
Declare #TempTable table (
viewIcon nvarchar(10),
tenderType nvarchar(20),
diaryIcon int,
customerName nvarchar(100),
projectName nvarchar(100),
diaryEntry nvarchar(max),
diaryDate nvarchar(20),
pid nvarchar(20)
)
insert into #TempTable(
viewIcon,
tenderType,
diaryIcon,
customerName,
projectName,
diaryEntry ,
diaryDate ,
pid
)
select p.viewicon,
p.[Tender Type],
1 diaryicon,
c.[Customer Name],
co.[Last Project],
d.Action,
co.[Diary Date],
p.PID
From Projects2 p Inner Join
(select distinct Pno, max(convert(date,[date of next call],103)) maxdate from ProjectDiary group by Pno
) td on p.PID = td.Pno
Inner Join contacts3 co on co.[Customer Number] = p.[Customer Number]
Inner Join Customers3 c on p.[Customer Number] = c.[Customer Number]
Inner Join ProjectDiary d on td.Pno = d.Pno
Where CONVERT(Date, co.[Diary Date], 103) BETWEEN GETDATE()-120 AND GETDATE()-60
DECLARE #contactsTable TABLE
(pid nvarchar(200),
diaryDate date)
insert into #contactsTable (t.pid, t.diarydate)
select distinct pid as pid, MAX(CONVERT(DATE, diaryDate, 103)) as diaryDate from # TempTable t group by pid
DECLARE #tempContacts TABLE
(pid nvarchar(200))
insert into #tempContacts(pid)
select pid from #contactsTable
DECLARE #tempDiaryDate TABLE (diaryDate date)
insert into #tempDiaryDate(diaryDate)
select distinct MAX(CONVERT(DATE, diaryDate, 103)) from #TempTable
select t.* from #TempTable t inner join (select distinct customerName, M AX(CONVERT(DATE, diaryDate, 103)) AS diaryDate from #TempTable group by customerName) tt on t t.customerName=t.customerName
where t.pid not in
(select Pno from ProjectDiary where convert(date,[Date Of Next Call],103) > GETDATE())
and t.viewIcon <> '098'
and t.viewIcon <> '163'
and t.viewIcon <> '119'
and t.pid in (select distinct pid from #tempContacts)
and CONVERT(DATE, t.diaryDate, 103) in (select distinct CONVERT(DATE, diaryDate, 103) f rom #tempDiaryDate)
order by CONVERT(DATE, tt.diaryDate, 103)
I am trying to get all the distinct customerName's using the max date to determine which record it uses.
Use a subquery. Without going through your entire sql statement, the general idea is:
Select [Stuff]
From table t
Where date = (Select Max(Date) from table
where customer = t.customer)