Converting relational tree to nested json document in postgres - postgresql

In PostgreSQL have a relational data model that represents the hierarchy within an organization.
create table employee (
id integer primary key,
name varchar(40) not null,
supervisor_id integer references employee
);
Only the CEO has supervisor_id=NULL. Every other employ has a supervisor with some supervisor_id.
I would like export the data as a single nested json document
{
"id": 1,
"name": "Name of company's CEO",
"supervises": [
{
"id": 2,
"name": "Name of 1st EC member",
"supervises": [ ... nested employees ... ]
},
{
"id": 3,
"name": "Name of 2nd EC member",
"supervises": [ ... nested employees ... ]
}
...
]
}
I followed the example from https://www.postgresqltutorial.com/postgresql-recursive-query/ but it only helps me to identify all the employees top down along the reporting line using the WITH RECURSIVE clause.
I know that I need to start the aggregation with the employees that have the highest depth in the tree (not just leaf nodes) and aggregate them bottom up but I did not manage to write a query that does the job.
Thanks for your help!

At first glance your question sounds like quite simple using a recursive query, but it is not because of the list of employees who may have the same supervisor, which requires an aggregate function in order to build the corresponding json array, but the aggregate functions are not allowed in a recursive query ...
First step - We build the json objects for every employee and the corresponding array of supervised employees when he/she is a supervisor :
SELECT s.id AS father
, CASE WHEN array_agg(e.id) = array[NULL :: integer] THEN NULL ELSE array_agg(e.id) END AS children_list
, jsonb_build_object
( 'id', s.id
, 'name', s.name
, 'supervises', array_agg(e.id)
) AS json_tree
FROM employee AS s
LEFT JOIN employee AS e
ON s.id = e.supervisor_id
GROUP BY s.id, s.name
Second step - Using a recursive query, we go through the tree of employees in a top-down approach, we assign a rank to every employee and build the corresponding jsonpath that will be used in the next step :
WITH RECURSIVE elt AS
(
SELECT s.id AS father
, CASE WHEN array_agg(e.id) = array[NULL :: integer] THEN NULL ELSE array_agg(e.id) END AS children_list
, jsonb_build_object
( 'id', s.id
, 'name', s.name
, 'supervises', array_agg(e.id)
) AS json_tree
FROM employee AS s
LEFT JOIN employee AS e
ON s.id = e.supervisor_id
GROUP BY s.id, s.name
), list (father, json_tree, children_list, rank, path) AS
(
SELECT c.father, c.json_tree, c.children_list, 1, '{}' :: text[]
FROM elt AS c
LEFT JOIN elt AS f
ON array[c.father] <# f.children_list
WHERE f.father IS NULL
UNION ALL
SELECT c.father
, c.json_tree
, c.children_list
, f.rank + 1
, f.path || array['supervises',(array_position(f.children_list, c.father)-1) :: text]
FROM list AS f
INNER JOIN elt AS c
ON array[c.father] <# f.children_list
) --, ordered_list AS (
SELECT *
FROM list
ORDER BY rank DESC
Third step - We create the aggregate version of the jsonb_set function so that to build the final jsonb data while iterating on the previous resulting list
CREATE OR REPLACE FUNCTION jsonb_set(x jsonb, y jsonb, p text[], e jsonb, b boolean)
RETURNS jsonb LANGUAGE sql AS $$
SELECT CASE WHEN x IS NULL THEN e ELSE jsonb_set(x, p, e, b) END ; $$ ;
CREATE OR REPLACE AGGREGATE jsonb_set_agg(x jsonb, p text[], e jsonb, b boolean)
( STYPE = jsonb, SFUNC = jsonb_set) ;
Final query
WITH RECURSIVE elt AS
(
SELECT s.id AS father
, CASE WHEN array_agg(e.id) = array[NULL :: integer] THEN NULL ELSE array_agg(e.id) END AS children_list
, jsonb_build_object
( 'id', s.id
, 'name', s.name
, 'supervises', array_agg(e.id)
) AS json_tree
FROM employee AS s
LEFT JOIN employee AS e
ON s.id = e.supervisor_id
GROUP BY s.id, s.name
), list (father, json_tree, children_list, rank, path) AS
(
SELECT c.father, c.json_tree, c.children_list, 1, '{}' :: text[]
FROM elt AS c
LEFT JOIN elt AS f
ON array[c.father] <# f.children_list
WHERE f.father IS NULL
UNION ALL
SELECT c.father
, c.json_tree
, c.children_list
, f.rank + 1
, f.path || array['supervises',(array_position(f.children_list, c.father)-1) :: text]
FROM list AS f
INNER JOIN elt AS c
ON array[c.father] <# f.children_list
)
SELECT jsonb_set_agg(NULL :: jsonb, path, json_tree, true ORDER BY rank ASC)
FROM list
test result in dbfiddle

Related

How to perfom adequate operations on JSONB PostgresSQL

Lets say I have
sequelize.query('SELECT associations FROM users WHERE id = :id')
associations is a JSONB ARRAY column
the output look like so
[
{
"role": 2,
"shop_id": 1,
"admin_id": 1,
"manager_id": null
}
]
I'd like to loop through the array and search for those associations using those ids
I'd like to perfom that whole thing in the same query.
I have a role table, shop table, users table
Progress
all the columns are coming out as null
If association is a column of type jsonb[], then use unnest(association) in order to expand the first level of elements.
Then you can try something like this assuming that all the id are of type integer :
sequelize.query('
SELECT *
FROM users
CROSS JOIN LATERAL unnest(associations) AS j
LEFT JOIN role AS r
ON (j->>\'role\') :: integer = r.id
LEFT JOIN shop AS s
ON (j->>\'shop_id\') :: integer = s.id
LEFT JOIN users AS a
ON (j->>\'admin_id\') :: integer = a.id
LEFT JOIN users AS m
ON (j->>\'manager_id\') :: integer = m.id
WHERE id = :id'
)

Converting select statement directly into json array

From here, and here I have figured out that if I want to aggregate a set of related rows into an array of objects I have to use this syntax:
(select to_json(C) from ( /* subquery */ ) C)
So, if I have three tables: user, creature and their junction table user_creature:
And I want to retrieve each user, and each creature that belongs to this user, I would have to do something like this:
select to_json(T)
from (
select "user".id as user_id,
(select to_json(C) -- !!! There it is
from (
select name, height
from creature
inner join "user_creature" uc on creature.id = "uc".creature_id
inner join "user" u on "uc".user_id = u.id
where u.id = user_id
) C) as "creatures" -- !!! There it is
from "user"
) T;
This query successfully retrieves a list of users and their related creatures:
Is there a way to drop select and from keywords from the query, so that I can write my query like this:
select to_json(T)
from (
select "user".id as user_id,
to_json( -- !!! Calling to_json directly on select statement
select name, height
from creature
inner join "user_creature" uc on creature.id = "uc".creature_id
inner join "user" u on "uc".user_id = u.id
where u.id = user_id
) as "creatures"
from "user"
) T;
It is possible to use a subquery as the argument to to_json, but not practical:
You need to wrap the subquery in a grouping parenthesis: to_json( (SELECT … FROM …) )
The subquery must return exactly one row (but that's normal)
The subquery must return exactly one column. This is a bit harder - you can return a record, but if you build it dynamically (e.g. from a selection of columns, you can hardly control the field names)
(See a demo here).
Instead, use json_build_object if you want to write a single SELECT query only:
SELECT json_build_object(
'user_id', u.id,
'creatures', (
SELECT json_build_object(
'name', c.name,
'height', c.height
)
FROM creature c
INNER JOIN "user_creature" uc ON c.id = uc.creature_id
WHERE uc.user_id = u.id
)
)
FROM "user" u;
And, if you want to be able to retrieve multiple rows use SELECT json_agg(json_build_object(…)) FROM … or ARRAY(SELECT json_build_object(…) FROM …):
SELECT json_build_object(
'user_id', u.id,
'creatures', (
SELECT json_agg(json_build_object(
'name', c.name,
'height', c.height
))
FROM creature c
INNER JOIN "user_creature" uc ON c.id = uc.creature_id
WHERE uc.user_id = u.id
)
)
FROM "user" u;

Handle Null in jsonb_array_elements

I have 2 tables a and b
Table a
id | name | code
VARCHAR VARCHAR jsonb
1 xyz [14, 15, 16 ]
2 abc [null]
3 def [null]
Table b
id | name | code
1 xyz [16, 15, 14 ]
2 abc [null]
I want to figure out where the code does not match for same id and name. I sort code column in b b/c i know it same but sorted differently
SELECT a.id,
a.name,
a.code,
c.id,
c.name,
c.code
FROM a
FULL OUTER JOIN ( SELECT id,
name,
jsonb_agg(code ORDER BY code) AS code
FROM (
SELECT id,
name,
jsonb_array_elements(code) AS code
FROM b
GROUP BY id,
name,
jsonb_array_elements(code)
) t
GROUP BY id,
name
) c
ON a.id = c.id
AND a.name = c.name
AND COALESCE (a.code, '[]'::jsonb) = COALESCE (c.code, '[]'::jsonb)
WHERE (a.id IS NULL OR c.id IS NULL)
My answer in this case should only return id = 3 b/c its not in b table but my query is returning id = 2 as well b/c i am not handling the null case well enough in the inner subquery
How can i handle the null use case in the inner subquery?
demo:db<>fiddle
The <# operator checks if all elements of the left array occur in the right one. The #> does other way round. So using both you can ensure that both arrays contain the same elements:
a.code #> b.code AND a.code <# b.code
Nevertheless it will be accept as well if one array contains duplicates. So [42,42] will be the same as [42]. If you want to avoid this as well you should check the array length as well
AND jsonb_array_length(a.code) = jsonb_array_length(b.code)
Furthermore you might check if both values are NULL. This case has to be checked separately:
a.code IS NULL and b.code IS NULL
A little bit shorter form is using the COALESCE function:
COALESCE(a.code, b.code) IS NULL
So the whole query could look like this:
SELECT
*
FROM a
FULL OUTER JOIN b
ON a.id = b.id AND a.name = b.name
AND (
COALESCE(a.code, b.code) IS NULL -- both null
OR (a.code #> b.code AND a.code <# b.code
AND jsonb_array_length(a.code) = jsonb_array_length(b.code) -- avoid accepting duplicates
)
)
After that you are able to filter the NULL values in the WHERE clause

PostgreSql unable to create view due to "duplicate column"

I am trying to create a country_name, and country cid pair between each country that are neighbours:
Here's the schema:
CREATE TABLE country (
cid INTEGER PRIMARY KEY,
cname VARCHAR(20) NOT NULL,
height INTEGER NOT NULL,
population INTEGER NOT NULL);
CREATE TABLE neighbour (
country INTEGER REFERENCES country(cid) ON DELETE RESTRICT,
neighbor INTEGER REFERENCES country(cid) ON DELETE RESTRICT,
length INTEGER NOT NULL,
PRIMARY KEY(country, neighbor));
My query:
create view neighbour_pair as (
select c1.cid, c1.cname, c2.cid, c2.cname
from neighbour n join country c1 on c1.cid = n.country
join country c2 on n.neighbor = c2.cid);
I am getting error code 42701 which means that there is a duplicate column.
The actual error message I am getting is:
ERROR: column "cid" specified more than once
********** Error **********
ERROR: column "cid" specified more than once
SQL state: 42701
I am unsure how to go around the error problem since I WANT the pair of neighbour countries with the country name and their cid.
Nevermind. I edited the first line of the query and changed the column names
create view neighbour_pair as
select c1.cid as c1cid, c1.cname as c1name, c2.cid as c2cid, c2.cname as c2name
from neighbour n join country c1 on c1.cid = n.country
join country c2 on n.neighbor = c2.cid;
I ran into a similar issue recently. I had a query like:
CREATE VIEW pairs AS
SELECT p.id, p.name,
(SELECT count(id) from results
where winner = p.id),
(SELECT count(id) from results
where winner = p.id OR loser = p.id)
FROM players p LEFT JOIN matches m ON p.id = m.id
GROUP BY 1,2;
The error was telling me: ERROR: column "count" specified more than once. The query WAS working via psycopg2, however when I brought it into a .sql file for testing the error arose.
I realized I just needed to alias the 2 count subqueries:
CREATE VIEW pairs AS
SELECT p.id, p.name,
(SELECT count(id) from results
where winner = p.id) as wins,
(SELECT count(id) from results
where winner = p.id OR loser = p.id) as matches
FROM players p LEFT JOIN matches m ON p.id = m.id
GROUP BY 1,2;
You can use alias with AS:
For example your view could be as follows:
create view neighbour_pair as
(
select c1.**cid**
, c1.cname
, c2.**cid AS cid_c2**
, c2.cname
from neighbour n
join country c1 on c1.cid = n.country
join country c2 on n.neighbor = c2.cid
);

Using row_to_json() with nested joins

I'm trying to map the results of a query to JSON using the row_to_json() function that was added in PostgreSQL 9.2.
I'm having trouble figuring out the best way to represent joined rows as nested objects (1:1 relations)
Here's what I've tried (setup code: tables, sample data, followed by query):
-- some test tables to start out with:
create table role_duties (
id serial primary key,
name varchar
);
create table user_roles (
id serial primary key,
name varchar,
description varchar,
duty_id int, foreign key (duty_id) references role_duties(id)
);
create table users (
id serial primary key,
name varchar,
email varchar,
user_role_id int, foreign key (user_role_id) references user_roles(id)
);
DO $$
DECLARE duty_id int;
DECLARE role_id int;
begin
insert into role_duties (name) values ('Script Execution') returning id into duty_id;
insert into user_roles (name, description, duty_id) values ('admin', 'Administrative duties in the system', duty_id) returning id into role_id;
insert into users (name, email, user_role_id) values ('Dan', 'someemail#gmail.com', role_id);
END$$;
The query itself:
select row_to_json(row)
from (
select u.*, ROW(ur.*::user_roles, ROW(d.*::role_duties)) as user_role
from users u
inner join user_roles ur on ur.id = u.user_role_id
inner join role_duties d on d.id = ur.duty_id
) row;
I found if I used ROW(), I could separate the resulting fields out into a child object, but it seems limited to a single level. I can't insert more AS XXX statements, as I think I should need in this case.
I am afforded column names, because I cast to the appropriate record type, for example with ::user_roles, in the case of that table's results.
Here's what that query returns:
{
"id":1,
"name":"Dan",
"email":"someemail#gmail.com",
"user_role_id":1,
"user_role":{
"f1":{
"id":1,
"name":"admin",
"description":"Administrative duties in the system",
"duty_id":1
},
"f2":{
"f1":{
"id":1,
"name":"Script Execution"
}
}
}
}
What I want to do is generate JSON for joins (again 1:1 is fine) in a way where I can add joins, and have them represented as child objects of the parents they join to, i.e. like the following:
{
"id":1,
"name":"Dan",
"email":"someemail#gmail.com",
"user_role_id":1,
"user_role":{
"id":1,
"name":"admin",
"description":"Administrative duties in the system",
"duty_id":1
"duty":{
"id":1,
"name":"Script Execution"
}
}
}
}
Update: In PostgreSQL 9.4 this improves a lot with the introduction of to_json, json_build_object, json_object and json_build_array, though it's verbose due to the need to name all the fields explicitly:
select
json_build_object(
'id', u.id,
'name', u.name,
'email', u.email,
'user_role_id', u.user_role_id,
'user_role', json_build_object(
'id', ur.id,
'name', ur.name,
'description', ur.description,
'duty_id', ur.duty_id,
'duty', json_build_object(
'id', d.id,
'name', d.name
)
)
)
from users u
inner join user_roles ur on ur.id = u.user_role_id
inner join role_duties d on d.id = ur.duty_id;
For older versions, read on.
It isn't limited to a single row, it's just a bit painful. You can't alias composite rowtypes using AS, so you need to use an aliased subquery expression or CTE to achieve the effect:
select row_to_json(row)
from (
select u.*, urd AS user_role
from users u
inner join (
select ur.*, d
from user_roles ur
inner join role_duties d on d.id = ur.duty_id
) urd(id,name,description,duty_id,duty) on urd.id = u.user_role_id
) row;
produces, via http://jsonprettyprint.com/:
{
"id": 1,
"name": "Dan",
"email": "someemail#gmail.com",
"user_role_id": 1,
"user_role": {
"id": 1,
"name": "admin",
"description": "Administrative duties in the system",
"duty_id": 1,
"duty": {
"id": 1,
"name": "Script Execution"
}
}
}
You will want to use array_to_json(array_agg(...)) when you have a 1:many relationship, btw.
The above query should ideally be able to be written as:
select row_to_json(
ROW(u.*, ROW(ur.*, d AS duty) AS user_role)
)
from users u
inner join user_roles ur on ur.id = u.user_role_id
inner join role_duties d on d.id = ur.duty_id;
... but PostgreSQL's ROW constructor doesn't accept AS column aliases. Sadly.
Thankfully, they optimize out the same. Compare the plans:
The nested subquery version; vs
The latter nested ROW constructor version with the aliases removed so it executes
Because CTEs are optimisation fences, rephrasing the nested subquery version to use chained CTEs (WITH expressions) may not perform as well, and won't result in the same plan. In this case you're kind of stuck with ugly nested subqueries until we get some improvements to row_to_json or a way to override the column names in a ROW constructor more directly.
Anyway, in general, the principle is that where you want to create a json object with columns a, b, c, and you wish you could just write the illegal syntax:
ROW(a, b, c) AS outername(name1, name2, name3)
you can instead use scalar subqueries returning row-typed values:
(SELECT x FROM (SELECT a AS name1, b AS name2, c AS name3) x) AS outername
Or:
(SELECT x FROM (SELECT a, b, c) AS x(name1, name2, name3)) AS outername
Additionally, keep in mind that you can compose json values without additional quoting, e.g. if you put the output of a json_agg within a row_to_json, the inner json_agg result won't get quoted as a string, it'll be incorporated directly as json.
e.g. in the arbitrary example:
SELECT row_to_json(
(SELECT x FROM (SELECT
1 AS k1,
2 AS k2,
(SELECT json_agg( (SELECT x FROM (SELECT 1 AS a, 2 AS b) x) )
FROM generate_series(1,2) ) AS k3
) x),
true
);
the output is:
{"k1":1,
"k2":2,
"k3":[{"a":1,"b":2},
{"a":1,"b":2}]}
Note that the json_agg product, [{"a":1,"b":2}, {"a":1,"b":2}], hasn't been escaped again, as text would be.
This means you can compose json operations to construct rows, you don't always have to create hugely complex PostgreSQL composite types then call row_to_json on the output.
I am adding this solution becasue the accepted response does not contemplate N:N relationships. aka: collections of collections of objects
If you have N:N relationships the clausula with it's your friend.
In my example, I would like to build a tree view of the following hierarchy.
A Requirement - Has - TestSuites
A Test Suite - Contains - TestCases.
The following query represents the joins.
SELECT reqId ,r.description as reqDesc ,array_agg(s.id)
s.id as suiteId , s."Name" as suiteName,
tc.id as tcId , tc."Title" as testCaseTitle
from "Requirement" r
inner join "Has" h on r.id = h.requirementid
inner join "TestSuite" s on s.id = h.testsuiteid
inner join "Contains" c on c.testsuiteid = s.id
inner join "TestCase" tc on tc.id = c.testcaseid
GROUP BY r.id, s.id;
Since you can not do multiple aggregations, you need to use "WITH".
with testcases as (
select c.testsuiteid,ts."Name" , tc.id, tc."Title" from "TestSuite" ts
inner join "Contains" c on c.testsuiteid = ts.id
inner join "TestCase" tc on tc.id = c.testcaseid
),
requirements as (
select r.id as reqId ,r.description as reqDesc , s.id as suiteId
from "Requirement" r
inner join "Has" h on r.id = h.requirementid
inner join "TestSuite" s on s.id = h.testsuiteid
)
, suitesJson as (
select testcases.testsuiteid,
json_agg(
json_build_object('tc_id', testcases.id,'tc_title', testcases."Title" )
) as suiteJson
from testcases
group by testcases.testsuiteid,testcases."Name"
),
allSuites as (
select has.requirementid,
json_agg(
json_build_object('ts_id', suitesJson.testsuiteid,'name',s."Name" , 'test_cases', suitesJson.suiteJson )
) as suites
from suitesJson inner join "TestSuite" s on s.id = suitesJson.testsuiteid
inner join "Has" has on has.testsuiteid = s.id
group by has.requirementid
),
allRequirements as (
select json_agg(
json_build_object('req_id', r.id ,'req_description',r.description , 'test_suites', allSuites.suites )
) as suites
from allSuites inner join "Requirement" r on r.id = allSuites.requirementid
)
select * from allRequirements
What it does is building the JSON object in small collection of items and aggregating them on each with clausules.
Result:
[
{
"req_id": 1,
"req_description": "<character varying>",
"test_suites": [
{
"ts_id": 1,
"name": "TestSuite",
"test_cases": [
{
"tc_id": 1,
"tc_title": "TestCase"
},
{
"tc_id": 2,
"tc_title": "TestCase2"
}
]
},
{
"ts_id": 2,
"name": "TestSuite",
"test_cases": [
{
"tc_id": 2,
"tc_title": "TestCase2"
}
]
}
]
},
{
"req_id": 2,
"req_description": "<character varying> 2 ",
"test_suites": [
{
"ts_id": 2,
"name": "TestSuite",
"test_cases": [
{
"tc_id": 2,
"tc_title": "TestCase2"
}
]
}
]
}
]
My suggestion for maintainability over the long term is to use a VIEW to build the coarse version of your query, and then use a function as below:
CREATE OR REPLACE FUNCTION fnc_query_prominence_users( )
RETURNS json AS $$
DECLARE
d_result json;
BEGIN
SELECT ARRAY_TO_JSON(
ARRAY_AGG(
ROW_TO_JSON(
CAST(ROW(users.*) AS prominence.users)
)
)
)
INTO d_result
FROM prominence.users;
RETURN d_result;
END; $$
LANGUAGE plpgsql
SECURITY INVOKER;
In this case, the object prominence.users is a view. Since I selected users.*, I will not have to update this function if I need to update the view to include more fields in a user record.