How to get array of descendants from adjacency lists? - postgresql

I have the following table and data
CREATE TABLE relationships (a TEXT, b TEXT);
CREATE TABLE nodes(n TEXT);
INSERT INTO relationships(a, b) VALUES
('1', '2'),
('1', '3'),
('1', '4'),
('1', '5'),
('2', '6'),
('2', '7'),
('2', '8'),
('3', '9');
INSERT INTO nodes(n) VALUES ('1'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8'), ('9'), ('10');
I want to output
n | children
1 | ['2', '3', '4', '5', '6', '7', '8', '9']
2 | ['6', '7', '8', '9']
3 | ['9']
4 | []
5 | []
6 | []
7 | []
8 | []
9 | []
10 | []
I am trying to use WITH RECURSIVE but is stuck on how to pass parameter into CTE
WITH RECURSIVE traverse(n) AS (
SELECT *
FROM relationships
WHERE a = n --- not sure how to pass data to here
UNION ALL
...
)
WITH basic_cte AS (
SELECT a1.n as n,
(SELECT COALESCE(json_agg(temp), '[]')
FROM (
(SELECT * FROM traverse(a1.a))
) as temp
) as children
FROM nodes as a1
)
SELECT *
FROM basic_cte;

To get a list of the children of all nodes, you need a left join to the nodes table
with recursive rels as (
select a,b, a as root
from relationships
union all
select c.*, r.root
from relationships c
join rels r on r.b = c.a
)
select n.n, array_agg(r.b) filter (where r.b is not null)
from nodes n
left join rels r on r.root = n.n
group by n.n
order by n.n;

Note: This ignores any empty children. You can add a left join like in #a_horse_with_no_name's answer to get that functionality.
You can't really pass a parameter into the CTE unless you veer off into stored procedures and whatnot. The CTE is a single table that needs to contain all the rows you might want to use from it.
Assuming a fairly nice graph (no duplicate edges, no cycles), code like the following ought to do what you're looking for.
The base case for the recursive query gets all level-1 descendants (the children) for all nodes which could possibly be parents.
The recursive step walks through the 2nd level, 3rd level, etc down the tree.
Once we have all parent-descendant tuples we can aggregate the data as desired.
WITH RECURSIVE descendants(parent, child) AS (
SELECT * FROM relationships
UNION
SELECT d.parent, r.b
FROM descendants d JOIN relationships r ON d.child=r.a
)
SELECT parent AS n, array_agg(child) AS children
FROM descendants
GROUP BY parent

Related

Postgres Group by intersection array

I have a table like this
SELECT id, items
FROM ( VALUES
( '1', ARRAY['A', 'B'] ),
( '2', ARRAY['A', 'B', 'C'] ),
( '3', ARRAY['E', 'F'] ),
( '4', ARRAY['G'] )
) AS t(id, items)
Two items belongs to the same group if the have at least one item in common.
For example #1 and #2 belongs to the same group because they both have A and B. #3 and #4 are other different group.
So my desidered output would be
ID
items
group_alias
1
{A,B}
{A,B}
2
{A,B,C}
{A,B}
3
{E,F}
{E,F}
4
{G}
{G}
The group_alias field is a new field that say to me that the record #1 and #2 belongs to the same group.
Having
CREATE TABLE temp1
(
id int PRIMARY KEY,
items char[] NOT NULL
);
INSERT INTO temp1 VALUES
( '1', ARRAY['A', 'B'] ),
( '2', ARRAY['A', 'B', 'C'] ),
( '3', ARRAY['E', 'F'] ),
( '4', ARRAY['G'] );
--Indexing array field to speedup queries
CREATE INDEX idx_items on temp1 USING GIN ("items");
Then
select t1.*,
coalesce( (select t2.items from temp1 t2
where t2.items && t1.items
and t1.id != t2.id
and array_length(t2.items,1)<array_length(t1.items,1)
order by array_length(t2.items,1) limit 1 )/*minimum common*/
, t1.items /*trivial solution*/ ) group_alias
from temp1 t1;
https://www.db-fiddle.com/f/46ydeE5ZXCJDk4Rw3cu4jt/10
This query returns all group alias of an item. For example item no. 5 has group alias {E} and {A,B}. The performance is maybe better if you create a temporary table for the items instead of creating them dynamically like you mentioned in one comment. Temporary tables are automatically dropped at the end of a session. You can create
indexes on temporary tables, too, which can speed up the query.
CREATE TEMP TABLE temp
(
id int PRIMARY KEY,
items char[] NOT NULL
);
INSERT INTO temp VALUES
( '1', ARRAY['A', 'B'] ),
( '2', ARRAY['A', 'B', 'C'] ),
( '3', ARRAY['E', 'F'] ),
( '4', ARRAY['G'] ),
( '5', ARRAY['A', 'B', 'E'] );
The query:
SELECT DISTINCT
t1.id, t1.items, coalesce(match, t1.items) AS group_alias
FROM temp t1 LEFT JOIN (
SELECT
t2.id, match
FROM
temp t2,
LATERAL(
SELECT
match
FROM
temp t3,
LATERAL(
SELECT
array_agg(aa) AS match
FROM
unnest(t2.items) aa
JOIN
unnest(t3.items) ab
ON aa = ab
) AS m1
WHERE
t2.id != t3.id AND t2.items && t3.items
) AS m2
) AS groups
ON groups.id = t1.id
ORDER BY t1.id;
And the result:
id | items | group_alias
----+---------+-------------
1 | {A,B} | {A,B}
2 | {A,B,C} | {A,B}
3 | {E,F} | {E}
4 | {G} | {G}
5 | {A,B,E} | {A,B}
5 | {A,B,E} | {E}

Combine IN and LIKE function in DB2

Is there a way to combine IN and LIKE function together in DB2? For example I would like to exclude users that have userid A,B,C and also userid that start from X% or Y% . I tried the below query however it did not work
select * from table where userid not in ('A','B','C') or (not like 'X%' or not like 'Y%')
Use 'AND' instead of 'OR'
select * from table
where userid not in ('A','B','C')
and userid not like 'X%'
and userid not like 'Y%'
You may use all the constants used in IN in LIKE:
with
table (userid) as
(
values 'A', 'AA', 'XX', 'YY', 'ZZ'
)
, vals (userid) as
(
values 'A', 'B', 'C', 'X%', 'Y%'
)
select *
from table t
where not exists
(
select 1
from vals v
where t.userid like v.userid
);
The result is:
|USERID|
|------|
|AA |
|ZZ |

Subtracting values by id and a previous counter

Here is a Snippet of my data :-
customers order_id order_date order_counter
1 a 1/1/2018 1
1 b 1/4/2018 2
1 c 3/8/2018 3
1 d 4/9/2019 4
I'm trying to get the average number of days between the order time for each customer. So for the following Snippet the average number of days should be 32.66 days as there were 3,62,32 number of days between each order, sum it, and then divide by 3.
My data has Customers that may have more than 100+ orders .
You could use LAG function:
WITH cte AS (
SELECT customers,order_date-LAG(order_date) OVER(PARTITION BY customers ORDER BY order_counter) AS d
FROM t
)
SELECT customers, AVG(d)
FROM cte
WHERE d IS NOT NULL
GROUP BY customers;
db<>fiddle demo
With a self join, group by customer and get the average difference:
select
t.customers,
round(avg(tt.order_date - t.order_date), 2) averagedays
from tablename t inner join tablename tt
on tt.customers = t.customers and tt.order_counter = t.order_counter + 1
group by t.customers
See the demo.
Results:
| customers | averagedays |
| --------- | ----------- |
| 1 | 32.67 |
Please check below query.
I tried to insert data of two customers so that we can check that average for every customer is coming correct.
DB Fiddle Example: https://www.db-fiddle.com/
CREATE TABLE test (
customers INTEGER,
order_id VARCHAR(1),
order_date DATE,
order_counter INTEGER
);
INSERT INTO test
(customers, order_id, order_date, order_counter)
VALUES
('1', 'a', '2018-01-01', '1'),
('1', 'b', '2018-01-04', '2'),
('1', 'c', '2018-03-08', '3'),
('1', 'd', '2018-04-09', '4'),
('2', 'a', '2018-01-01', '1'),
('2', 'b', '2018-01-06', '2'),
('2', 'c', '2018-03-12', '3'),
('2', 'd', '2018-04-15', '4');
commit;
select customers , round(avg(next_order_diff),2) as average
from
(
select customers , order_date , next_order_date - order_date as next_order_diff
from
(
select customers ,
lead(order_date) over (partition by customers order by order_date) as next_order_date , order_date
from test
) a
where next_order_date is not null
) a
group by customers
order by customers
;
Another option. I would myself like the answer from #forpas except that it depends on the monotonically increasing value for order_counter (what happens when an order is deleted). The following accounts for that by actually counting the number of order pairs. It also accounts for customers have places only 1 order, returning NULL as the average.
select customers, round(sum(nd)::numeric/n, 2) avg_days_to_order
from (
select customers
, order_date - lag(order_date) over(partition by customers order by order_counter) nd
, count(*) over (partition by customers) - 1 n
from test
)d
group by customers, n
order by customers;

Interleaving array_agg in postgres

I have a postgres query in which I want to interleave my array_agg statements :
SELECT client_user_id,
(array_agg(question), array_agg(client_intake_question_id), array_agg(answer)) as answer
FROM client_intake_answer
LEFT OUTER JOIN client_intake_question
ON client_intake_question.id = client_user_id
GROUP BY client_user_id
Gives me the following:
5 | ("{""Have you ever received counselling?"",""Have you ever received counselling or mental health support in the past?""}","{1,2}","{yes,no}")
I would like the results to be:
5 | ("{""Have you ever received counselling?", 1, "yes"",""Have you ever received counselling or mental health support in the past?", 2, "no""}"
How do I do this?
I've set up a small example similar to yours:
create table answers(user_id int, question_id int, answer varchar(20));
create table questions(question_id int, question varchar(20));
insert into questions values
(1, 'question 1'),
(2, 'question 2');
insert into answers values
(1, 1, 'yes'),
(1, 2, 'no'),
(2, 1, 'no'),
(2, 2, 'yes');
select user_id, array_agg(concat(questions.question, ',', questions.question_id::text, ',', answers.answer))
from questions
inner join answers
on questions.question_id = answers.question_id
group by answers.user_id
user_id | array_agg
------: | :-------------------------------------
1 | {"question 1,1,yes","question 2,2,no"}
2 | {"question 1,1,no","question 2,2,yes"}
dbfiddle here
To interleave or splice together multiple array_agg's you can do the following:
SELECT client_user_id,
array_agg('[' || client_intake_question_id || question || ',' || answer || ']') as answer
FROM client_intake_answer
LEFT OUTER JOIN
client_intake_question ON client_intake_question.id = client_user_id
GROUP BY client_user_id

TSQL: How to return two rows if Column = Null

I have to build a procedure that returns a table at the end, which contains a list of fields where specific substances were applied. I need to return one row for each field and the applied substance.
This works great for all fields where something was actually applied, but I also need to display the same amount of rows for those fields, were nothing was applied.
At the moment I get a table like this:
Field 1 | Substance 1 | 12345 kg
Field 1 | Substance 2 | 23423 kg
Field 2 | Substance 1 | 23236 kg
Field 2 | Substance 2 | 12312 kg
Field 3 | NULL | NULL
I know that I could swap the NULL value with at least one Substance by making a Case-Condition, but I need two rows (one for Substance 1 and one for Substance 2) containing the names of each substance.
Is there any way to achieve this?
Or maybe you have something like this:
CREATE TABLE Fields (
FieldID INT PRIMARY KEY,
FieldName VARCHAR(50) NOT NULL UNIQUE,
)
INSERT INTO dbo.Fields (FieldID, FieldName) VALUES
(1, 'Field 1'),
(2, 'Field 2'),
(3, 'Field 3')
CREATE TABLE dbo.Substances (
SubstanceID INT PRIMARY KEY,
Substance VARCHAR(50) NOT NULL UNIQUE
)
INSERT INTO dbo.Substances (SubstanceID, Substance) VALUES
(1, 'Substance 1'),
(2, 'Substance 2')
CREATE TABLE AppliedSubstances (
FieldID INT NOT NULL REFERENCES dbo.Fields,
SubstanceID INT NOT NULL REFERENCES dbo.Substances,
Quantity INT NOT NULL
)
INSERT INTO dbo.AppliedSubstances (FieldID, SubstanceID, Quantity) VALUES
(1, 1, 12345),
(1, 2, 23423),
(2, 1, 23236),
(2, 2, 12312)
Then you can use the following query:
SELECT f.FieldName, s.Substance, a.Quantity
FROM dbo.AppliedSubstances a
INNER JOIN dbo.Fields f ON f.FieldID = a.FieldID
INNER JOIN dbo.Substances s ON s.SubstanceID = a.SubstanceID
UNION ALL
SELECT f.FieldName, s.Substance, NULL AS Quantity
FROM dbo.Fields f
CROSS JOIN dbo.Substances s
WHERE NOT EXISTS (
SELECT * FROM dbo.AppliedSubstances a
WHERE a.FieldID=f.FieldID AND a.SubstanceID=s.SubstanceID
)
Or a shorter stranger version (with a different meaning if you have some substances that were applied only for some fields):
SELECT f.FieldName, s.Substance, a.Quantity
FROM dbo.AppliedSubstances a
RIGHT JOIN dbo.Fields f ON f.FieldID = a.FieldID
INNER JOIN dbo.Substances s ON s.SubstanceID = ISNULL(a.SubstanceID,s.SubstanceID)
I'm not sure if I understand your question correctly, but try this:
CREATE TABLE SourceData (
FieldName VARCHAR(50),
Substance VARCHAR(50),
Quantity INT
)
INSERT INTO dbo.SourceData (FieldName, Substance, Quantity) VALUES
('Field 1', 'Substance 1', 12345),
('Field 1', 'Substance 2', 23423),
('Field 2', 'Substance 1', 23236),
('Field 2', 'Substance 2', 12312),
('Field 3', NULL, NULL)
SELECT FieldName, Substance, Quantity
FROM dbo.SourceData WHERE Substance IS NOT NULL
UNION ALL
SELECT s1.FieldName, x.Substance, NULL AS Quantity
FROM dbo.SourceData s1 CROSS JOIN (
SELECT DISTINCT s2.Substance
FROM dbo.SourceData s2
WHERE s2.Substance IS NOT NULL
) x
WHERE s1.Substance IS NULL