Best way to repeat list of values for `IN` clauses - postgresql

I need to use the same list of values in several IN clauses and I tried doing that with a WITH statement, but can't get it to work correctly.
Here's an example query:
SELECT * FROM parent WHERE
id IN (SELECT first_id FROM child WHERE id=119896 UNION ALL
SELECT second_id FROM child WHERE id=119896 UNION ALL
SELECT third_id FROM child WHERE id=119896) OR
id IN (SELECT was_first_id FROM parent WHERE id IN (SELECT first_id FROM child WHERE id=119896 UNION ALL
SELECT second_id FROM child WHERE id=119896 UNION ALL
SELECT third_id FROM child WHERE id=119896)) OR
id IN (SELECT was_second_id FROM parent WHERE id IN (SELECT first_id FROM child WHERE id=119896 UNION ALL
SELECT second_id FROM child WHERE id=119896 UNION ALL
SELECT third_id FROM child WHERE id=119896)) OR
id IN (SELECT was_third_id FROM parent WHERE id IN (SELECT first_id FROM child WHERE id=119896 UNION ALL
SELECT second_id FROM child WHERE id=119896 UNION ALL
SELECT third_id FROM child WHERE id=119896));
I was hoping to make it so that the 3 queries that are combined in the UNION ALL could be defined in a WITH and then re-used to simplify the query, and it would be nice if it improved performance as well.
Is there a good way to do this?

I would suggest
SELECT *
FROM parent
WHERE (
SELECT id IN (first_id, second_id, third_id)
FROM child
WHERE id = 119896
) OR (
SELECT id IN (was_first_id, was_second_id, was_third_id)
FROM parent
WHERE (
SELECT id IN (first_id, second_id, third_id)
FROM child
WHERE id = 119896
)
);
(I don't know about performance - as per comments, it's really bad)
An alternative, using the CTEs you suggested, would be
WITH child_ids AS (
SELECT UNNEST(ARRAY[first_id, second_id, third_id]) AS id
FROM child
WHERE id = 119896
), all_ids AS (
SELECT UNNEST(ARRAY[was_first_id, was_second_id, was_third_id]) AS id
FROM parent
JOIN child_ids USING (id) -- same as: WHERE id IN (SELECT id FROM child_ids)
UNION
TABLE child_ids
)
SELECT *
FROM parent
JOIN all_ids USING (id) -- same as: WHERE id IN (SELECT id FROM all_ids)

Based on the second solution by #Bergi, this also works as well:
WITH in_child(id) AS (
SELECT first_id FROM child WHERE id=119896 UNION ALL
SELECT second_id FROM child WHERE id=119896 UNION ALL
SELECT third_id FROM child WHERE id=119896
),
in_parent(id) AS (
(SELECT id FROM in_child) UNION ALL
(SELECT was_first_id FROM parent WHERE id IN (SELECT id FROM in_child)) UNION ALL
(SELECT was_second_id FROM parent WHERE id IN (SELECT id FROM in_child)) UNION ALL
(SELECT was_third_id FROM parent WHERE id IN (SELECT id FROM in_child))
)
SELECT * FROM parent WHERE
id IN (SELECT id FROM in_parent);

Related

Recursive CTE and multiple inserts in joined table

I'm searching to copy nodes of a hierarchical tree and to apply the changes onto a joined table. I found parts of the answer in other questions like Postgresql copy data within the tree table for the tree copy (in my case I only copy the children and not the root) and PostgreSQL - Insert data into multiple tables simultaneously to insert data in several table simultaneously, but I don't manage to mix them.
I would like to:
Generate the new nodes id from the fields table
Insert the new field ids in the data_versions table
Insert the new nodes in the fields table with the data_id from the data_versions table
Note: there is a circular reference between the fields and the data_versions tables.
See below the schema:
Here is a working query, but without the insert in the data_versions table. It is only a shallow copy (keeping the same data_id) while I would like a deep copy:
WITH created_data AS (
WITH RECURSIVE cte AS (
SELECT *, nextval('fields_id_seq') new_id FROM fields WHERE parent_id = :source_field_id
UNION ALL
SELECT fields.*, nextval('fields_id_seq') new_id FROM cte JOIN fields ON cte.id = fields.parent_id
)
SELECT C1.new_id, C1.name, C1.field_type, C1.data_id, C2.new_id new_parent_id
FROM cte C1 LEFT JOIN cte C2 ON C1.parent_id = C2.id
)
INSERT INTO fields (id, name, parent_id, field_type, data_id)
SELECT new_id, name, COALESCE(new_parent_id, :target_field_id), field_type, data_id FROM created_data
RETURNING id, name, parent_id, field_type, data_id;
And here is the draft query I'm working on for inserting data in the data_versions table resulting with WITH clause containing a data-modifying statement must be at the top level as an error:
WITH created_data AS (
WITH cloned_fields AS (
WITH RECURSIVE cte AS (
SELECT *, nextval('fields_id_seq') new_id FROM fields WHERE parent_id = :source_field_id
UNION ALL
SELECT fields.*, nextval('fields_id_seq') new_id FROM cte JOIN fields ON cte.id = fields.parent_id
)
SELECT C1.new_id, C1.name, C1.field_type, C1.data_id, C2.new_id new_parent_id
FROM cte C1 LEFT JOIN cte C2 ON C1.parent_id = C2.id
),
cloned_data AS (
INSERT INTO data_versions (value, author, field_id)
SELECT d.value, d.author, c.new_id
FROM cloned_fields c
INNER JOIN data_versions d ON c.data_id = d.id
RETURNING id data_id
)
SELECT cloned_fields.new_id, cloned_fields.name, cloned_fields.field_type, cloned_fields.new_parent_id, cloned_data.data_id
FROM cloned_fields
INNER JOIN cloned_data ON cloned_fields.data_id = cloned_data.id
)
INSERT INTO fields (id, name, parent_id, field_type, data_id)
SELECT new_id, name, COALESCE(new_parent_id, :target_field_id), field_type, data_id FROM created_data
RETURNING id, name, parent_id, field_type, data_id, value data;
If other people were encountering the same issue as me, I came up with this solution some months later. The trick was to move the data-modifying CTE at the top level as suggested by the error message. We can always access previously declared CTE's:
WITH new_fields_ids AS (
WITH RECURSIVE cte AS (
SELECT *, nextval('fields_id_seq') new_id FROM fields WHERE parent_id = :source_field_id
UNION ALL
SELECT fields.*, nextval('fields_id_seq') new_id FROM cte JOIN fields ON cte.id = fields.parent_id
)
SELECT C1.new_id, C1.name, C1.field_type, C1.data_id, C2.new_id new_parent_id
FROM cte C1 LEFT JOIN cte C2 ON C1.parent_id = C2.id
),
cloned_data AS (
INSERT INTO data_versions (value, author, field_id)
SELECT d.value, d.author, c.new_id
FROM new_fields_ids c
INNER JOIN data_versions d ON c.data_id = d.id
RETURNING id AS data_id, field_id, value
),
created_data AS (
SELECT new_fields_ids.new_id, new_fields_ids.name, new_fields_ids.field_type, new_fields_ids.new_parent_id, cloned_data.data_id
FROM new_fields_ids
INNER JOIN cloned_data ON new_fields_ids.new_id = cloned_data.field_id
),
cloned_fields AS (
INSERT INTO fields (id, name, parent_id, field_type, data_id)
SELECT new_id, name, COALESCE(new_parent_id, :target_field_id), field_type, data_id FROM created_data
RETURNING id, name, parent_id, field_type, data_id
)
SELECT f.id, f.name, f.parent_id, f.field_type, f.data_id, d.value AS data FROM cloned_fields f
INNER JOIN cloned_data d ON f.id = d.field_id;

SQL | Parent Child relationship in same table

I have a table that has parent and child key relationship in the same table, I need to find Parent rows that don't have any children(for example row 1 have no other children) and the most recent children (for example like row 17 have 3 children i.e. 10,13,14 and we need to fetch most recent children only which is 10 )
You need one part related to be main parent:
SELECT parent.*
FROM MyTable AS parent
WHERE parent_id = 0
Next you need to find most recent direct child:
SELECT parent.*, child.*
FROM MyTable AS parent
LEFT JOIN MyTable AS child
ON child.parent_id = parent.id
WHERE parent_id = 0
AND RANK() OVER (PARTITION BY child.parent_id ORDER BY child.id DESC) = 1
And add no-child ones:
SELECT parent.*, child.*
FROM MyTable AS parent
LEFT JOIN MyTable AS child
ON child.parent_id = parent.id
WHERE parent_id = 0
AND (
RANK() OVER (PARTITION BY child.parent_id ORDER BY child.id DESC) = 1
OR child.id IS NULL)
Something like this should work, but may not be the most efficient way of doing it:
select {desired fields}
from your_table
where not exists
(
select 1 from your_table B
where B.parent_id = your_table.id
)
UNION
select {desired fields}
from your_table
where id in
(
select max(id)
from your_table B
where B.parent_id = your_table.id
)
Hmmm . . . You seem to want one row per parent. Either the most recent child or the parent if there are no children. You can actually do this with window functions:
select t.*
from (select t.*,
row_number() over (partition by coalesce(nullif(parent_id, 0), id
order by parent_id desc, id desc
) as seqnum
from t
) t
where seqnum = 1;

Recursive hierarchical PGSQL request with uuid goes into a loop

Helle there
I'd like to get a hierarchical data parent / childs. meaning that if i select tje id of the parent, all its chilren will be in the record
Their relations are defined by the parent UUID
I have in a DB the following data :
INSERT INTO dummy_table (Id, parent) VALUES ('0171a28a-578a-49b5-86d5-ff0df54c8e96', '0171a28a-578a-49b5-86d5-ff0df54c8e96')
INSERT INTO dummy_table (Id, parent) VALUES ('0171a28a-5809-4708-9fc9-aeb91c16e560', '0171a28a-578a-49b5-86d5-ff0df54c8e96')
INSERT INTO dummy_table (Id, parent) VALUES ('0171a28a-580b-4de9-b3fa-35f13df27dd5', '0171a28a-5809-4708-9fc9-aeb91c16e560')
INSERT INTO dummy_table (Id, parent) VALUES ('0171a28a-580c-4e6b-8d17-0cc18af24b25', '0171a28a-580b-4de9-b3fa-35f13df27dd5')
INSERT INTO dummy_table (Id, parent) VALUES ('0171a28a-580d-47ee-aa15-92c6727e657e', '0171a28a-580c-4e6b-8d17-0cc18af24b25')
And my request is the following :
WITH RECURSIVE cte AS (
SELECT id, parent FROM dummy_table WHERE id = '0171a28a-578a-49b5-86d5-ff0df54c8e96'
UNION ALL
SELECT dt.id, dt.parent FROM dummy_table dt INNER JOIN cte ON cte.parent = dt.id
)
SELECT * FROM cte;
The problem i have is that it loops and I cannot figure out why.
Any help please ?
Thanks you all
That query goes into a loop because the first row in your data references itself as its parent and so the recursion never stops. Add a check to avoid self-reference and you should be fine
WITH RECURSIVE cte AS (
SELECT id, parent FROM dummy_table WHERE id = '0171a28a-578a-49b5-86d5-ff0df54c8e96'
UNION ALL
SELECT dt.id, dt.parent FROM dummy_table dt INNER JOIN cte ON cte.parent = dt.id and cte.id <> dt.id
)
SELECT * FROM cte;
Another option is setting parent as null in the rows where parent is equal to id

postgres hierarchy - count of child levels and sort by date of children or grandchildren

I would like to know how to write a postgres subquery so that the following table example will output what I need.
id parent_id postdate
1   -1 2015-03-10
2     1 2015-03-11 (child level 1)
3     1 2015-03-12 (child level 1)
4     3 2015-03-13 (child level 2)
5    -1 2015-03-14
6    -1 2015-03-15
7     6 2015-03-16 (child level 1)
If I want to sort all the root ids by child level 1 with a count of children(s) from the parent, the output would be something like this
id count  date
6   2    2015-03-15
1   4    2015-03-10
5   1    2015-03-14
The output is sorted by postdate based on the root's child. The 'date' being outputted is the date of the root's postdate. Even though id#5 has a more recent postdate, the rootid#6's child (id#7) has the most recent postdate because it is being sorted by child's postdate. id#5 doesnt have any children so it just gets placed at the end, sorted by date. The 'count' is the number children(child level 1), grandchildren(child level 2) and itself (root). For instance, id #2,#3,#4 all belong to id#1 so for id#1, the count would be 4.
My current subquery thus far:
SELECT p1.id,count(p1.id),p1.postdate
FROM mytable p1
LEFT JOIN mytable c1 ON c1.parent_id = p1.id AND p1.parent_id = -1
LEFT JOIN mytable c2 ON c2.parent_id = c1.id AND p1.parent_id = -1
GROUP BY p1.id,c1.postdate,p1.postdate
ORDER by c1.postdate DESC,p1.postdate DESC
create table mytable ( id serial primary key, parent_id int references mytable, postdate date );
create index mytable_parent_id_idx on mytable (parent_id);
insert into mytable (id, parent_id, postdate) values (1, null, '2015-03-10');
insert into mytable (id, parent_id, postdate) values (2, 1, '2015-03-11');
insert into mytable (id, parent_id, postdate) values (3, 1, '2015-03-12');
insert into mytable (id, parent_id, postdate) values (4, 3, '2015-03-13');
insert into mytable (id, parent_id, postdate) values (5, null, '2015-03-14');
insert into mytable (id, parent_id, postdate) values (6, null, '2015-03-15');
insert into mytable (id, parent_id, postdate) values (7, 6, '2015-03-16');
with recursive recu as (
select id as parent, id as root, null::date as child_postdate
from mytable
where parent_id is null
union all
select r.parent, mytable.id, mytable.postdate
from recu r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate, c.max_child_date
from mytable m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date
from recu
group by parent
) c on c.parent = m.id
order by c.max_child_date desc nulls last, m.postdate desc;
You'll need a recursive query to count the elements in the subtrees:
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY t.id
;
UPDATE (it appears the OP also wants the maxdate per tree)
-- The same, but also select the postdate
-- --------------------------------------
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
, postdate AS postdate
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
-- , GREATEST(o.postdate,t.postdate) AS postdate
, t.postdate AS postdate
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
, c.maxdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
, MAX(o.postdate) AS maxdate -- and obtain the max()
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY c.maxdate, t.id
;
After looking at everyone's code, I created the subquery I needed. I can use PHP to vary the 'case when' code depending on the user's sort selection. For instance, the code below will sort the root nodes based on child level 1's postdate.
with recursive cte as (
select id as parent, id as root, null::timestamp as child_postdate,0 as depth
from mytable
where parent_id = -1
union all
select r.parent, mytable.id, mytable.postdate,depth+1
from cte r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate
from ssf.dtb_021 m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date,depth
from cte
group by parent,depth
) c on c.parent = m.id
order by
case
when depth=2 then 1
when depth=1 then 2
else 0
end DESC,
c.max_child_date desc nulls last, m.postdate desc;
select
p.id,
(1+c.n) as parent_post_plus_number_of_subposts,
p.postdate
from
table as p
inner join
(
select
parent_id, count(*) as n, max(postdate) as _postdate
from table
group by parent_id
) as c
on p.id = c.parent_id
where p.parent_id = -1
order by c._postdate desc

Returning the parent/ child relationship on a self-joining table

I need to be able to return a list of all children given a parent Id at all levels using SQL.
The table looks something like this:
ID ParentId Name
---------------------------------------
1 null Root
2 1 Child of Root
3 2 Child of Child of Root
Give an Id of '1', how would I return the entire list...? There is no limitation on the depth of the nesting either...
Thanks,
Kieron
To get all children for a given #ParentId stored in that manner you could use a recursive CTE.
declare #ParentId int
--set #ParentId = 1
;WITH T AS
(
select 1 AS ID,null AS ParentId, 'Root' as [Name] union all
select 2,1,'Child of Root' union all
select 3,2,'Child of Child of Root'
),
cte AS
(
SELECT ID, ParentId, Name
FROM T
WHERE ParentId = #ParentId OR (ParentId IS NULL AND #ParentId IS NULL)
UNION ALL
SELECT T.ID, T.ParentId, T.Name
FROM T
JOIN cte c ON c.ID = T.ParentId
)
SELECT ID, ParentId, Name
FROM cte
OPTION (MAXRECURSION 0)