Flattening Postgres nested JSONB column - postgresql

I'm looking to see how to flatten data nested in a JSONB column.
As an example, say we have the table users with user_id(int) and siblings(JSONB)
With rows like:
id | JSONB
---------------------
1 | {"brother": {"first_name":"Sam", "last_name":"Smith"}, "sister": {"first_name":"Sally", "last_name":"Smith"}
2 | {"sister": {"first_name":"Jill"}}
I'm looking for a query that will return a response like:
id | sibling | first_name | last_name
-------------------------------------
1 | "brother" | "Sam" | "Smith"
1 | "sister" | "Sally" | "Smith"
2 | "sister" | "Jill" | null

I develop to this use it in psql.
To check code I create small view t1:
CREATE VIEW t1 AS (
SELECT 1 AS id, '{"brother": {"first_name":"Sam", "last_name":"Smith"}, "sister": {"first_name":"Sally", "last_name":"Smith"}}'::jsonb AS jsonb
UNION SELECT 2, '{"sister": {"first_name":"Jill", "last_name":"Johnson"}}'
UNION SELECT 3, '{"sister": {"first_name":"Jill", "x_name":"Johnson"}}'
);
The first task is to found list of possible key:
WITH fields AS (
SELECT DISTINCT jff.key
FROM t1,
jsonb_each(jsonb) AS jf,
jsonb_each(jf.value) AS jff
)
SELECT * FROM fields;
The result is:
key
------------
first_name
last_name
x_name
The next step is generate queries:
SELECT 'SELECT id, jf.key as sibling, ' || (
WITH fields AS (
SELECT DISTINCT jff.key
FROM t1,
jsonb_each(jsonb) AS jf,
jsonb_each(jf.value) AS jff
)
SELECT string_agg('jf.value->>''' || key || ''' as "' || key || '"', ',' ORDER BY key)
FROM fields
)
|| ' FROM t1, jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;' AS cmd;
It returns:
cmd
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SELECT id, jf.key as sibling,jf.value->>'first_name' as "first_name",jf.value->>'last_name' as "last_name",jf.value->>'x_name' as "x_name" FROM t1, jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;
(1 row)
To set result as psql variable I use gset:
\gset
After that you can call query:
:cmd
id | sibling | first_name | last_name | x_name
----+---------+------------+-----------+---------
1 | brother | Sam | Smith |
1 | sister | Sally | Smith |
2 | sister | Jill | Johnson |
3 | sister | Jill | | Johnson
(4 rows)
To run it from external languages you can create postgres function than return SQL command:
CREATE OR REPLACE FUNCTION build_query(IN tname text, OUT cmd text) AS $sql$
BEGIN
EXECUTE $cmd$
SELECT 'SELECT id, jf.key as sibling, ' || (
WITH fields AS (
SELECT DISTINCT jff.key
FROM t1,
jsonb_each(jsonb) AS jf,
jsonb_each(jf.value) AS jff
)
SELECT string_agg('jf.value->>''' || key || ''' as "' || key || '"', ',' ORDER BY key)
FROM fields
)
|| ' FROM $cmd$ || quote_ident(tname) || $cmd$ , jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;'$cmd$ INTO cmd;
RETURN;
END;
$sql$ LANGUAGE plpgsql;
SELECT * FROM build_query('t1');
cmd
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SELECT id, jf.key as sibling, jf.value->>'first_name' as "first_name",jf.value->>'last_name' as "last_name",jf.value->>'x_name' as "x_name" FROM t1 , jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;
(1 row)

Related

Aggregating distinct values from JSONB arrays combined with SQL group by

I am trying to aggregate distinct values from JSONB arrays in a SQL GROUP BY statement:
One dataset has many cfiles and a cfile only ever has one dataset
SELECT * FROM cfiles;
id | dataset_id | property_values (jsonb)
----+------------+-----------------------------------------------
1 | 1 | {"Sample Names": ["SampA", "SampB", "SampC"]}
2 | 1 | {"Sample Names": ["SampA", "SampB", "SampD"]}
3 | 1 | {"Sample Names": ["SampE"]}
4 | 2 | {"Sample Names": ["SampA", "SampF"]}
5 | 2 | {"Sample Names": ["SampG"]}
This query works and returns the correct result I want but it's a mess.
SELECT distinct(datasets.id) as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM unnest(
STRING_TO_ARRAY(
STRING_AGG(
DISTINCT REPLACE(
REPLACE(
REPLACE(
REPLACE(
cfiles.property_values ->> 'Sample Names', '",' || chr(32) || '"', ';'
), '[' , ''
), '"' , ''
), ']' , ''
), ';'
), ';'
)
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id
dataset_id | sample_names
------------+-----------------------------------
1 | SampA; SampB; SampC; SampD; SampE
2 | SampA; SampF; SampG
Is there a better way to write this query without all the string manipulation?
I tired jsonb_array_elements but it gave me the error subquery uses ungrouped column "cfiles.property_values" from outer query. So then I added cfiles.property_values to the GROUP BY but it no longer grouped just by the dataset_id
Not the result I want:
SELECT DISTINCT datasets.id as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM jsonb_array_elements(
cfiles.property_values -> 'Sample Names'
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id, cfiles.property_values
dataset_id | sample_names
------------+---------------------------
1 | "SampA"; "SampB"; "SampC"
1 | "SampA"; "SampB"; "SampD"
1 | "SampE"
2 | "SampA"; "SampF"
2 | "SampG"
SQL for creating demo
CREATE TABLE datasets (
id INT PRIMARY KEY
);
CREATE TABLE cfiles (
id INT PRIMARY KEY,
dataset_id INT,
property_values JSONB,
FOREIGN KEY (dataset_id) REFERENCES datasets(id)
);
INSERT INTO datasets values (1),(2);
INSERT INTO cfiles values
(1,1,'{"Sample Names":["SampA", "SampB", "SampC"]}'),
(2,1,'{"Sample Names":["SampA", "SampB", "SampD"]}'),
(3,1,'{"Sample Names":["SampE"]}');
INSERT INTO cfiles values
(4,2,'{"Sample Names":["SampA", "SampF"]}'),
(5,2,'{"Sample Names":["SampG"]}');
jsonb_array_elements is a set returning function and should be used in the FROM clause. Using it in the SELECT list makes things unnecessarily complicated:
select c.dataset_id, string_agg(distinct n.name, '; ' order by n.name)
from cfiles c
cross join jsonb_array_elements_text(c.property_values -> 'Sample Names') as n(name)
group by c.dataset_id
order by c.dataset_id;
Online example

PostgreSQL recursive parent/child query

I'm having some trouble working out the PostgreSQL documentation for recursive queries, and wonder if anyone might be able to offer a suggestion for the following.
Here's the data:
Table "public.subjects"
Column | Type | Collation | Nullable | Default
-------------------+-----------------------------+-----------+----------+--------------------------------------
id | bigint | | not null | nextval('subjects_id_seq'::regclass)
name | character varying | | |
Table "public.subject_associations"
Column | Type | Collation | Nullable | Default
------------+-----------------------------+-----------+----------+--------------------------------------------------
id | bigint | | not null | nextval('subject_associations_id_seq'::regclass)
parent_id | integer | | |
child_id | integer | | |
Here, a "subject" may have many parents and many children. Of course, at the top level a subject has no parents and at the bottom no children. For example:
parent_id | child_id
------------+------------
2 | 3
1 | 4
1 | 3
4 | 8
4 | 5
5 | 6
6 | 7
What I'm looking for is starting with a child_id to get all the ancestors, and with a parent_id, all the descendants. Therefore:
parent_id 1 -> children 3, 4, 5, 6, 7, 8
parent_id 2 -> children 3
child_id 3 -> parents 1, 2
child_id 4 -> parents 1
child_id 7 -> parents 6, 5, 4, 1
Though there seem to be a lot of examples of similar things about I'm having trouble making sense of them, so any suggestions I can try out would be welcome.
To get all children for subject 1, you can use
WITH RECURSIVE c AS (
SELECT 1 AS id
UNION ALL
SELECT sa.child_id
FROM subject_associations AS sa
JOIN c ON c.id = sa. parent_id
)
SELECT id FROM c;
CREATE OR REPLACE FUNCTION func_finddescendants(start_id integer)
RETURNS SETOF subject_associations
AS $$
DECLARE
BEGIN
RETURN QUERY
WITH RECURSIVE t
AS
(
SELECT *
FROM subject_associations sa
WHERE sa.id = start_id
UNION ALL
SELECT next.*
FROM t prev
JOIN subject_associations next ON (next.parentid = prev.id)
)
SELECT * FROM t;
END;
$$ LANGUAGE PLPGSQL;
Try this
--- Table
-- DROP SEQUENCE public.data_id_seq;
CREATE SEQUENCE "data_id_seq"
INCREMENT 1
MINVALUE 1
MAXVALUE 9223372036854775807
START 1
CACHE 1;
ALTER TABLE public.data_id_seq
OWNER TO postgres;
CREATE TABLE public.data
(
id integer NOT NULL DEFAULT nextval('data_id_seq'::regclass),
name character varying(50) NOT NULL,
label character varying(50) NOT NULL,
parent_id integer NOT NULL,
CONSTRAINT data_pkey PRIMARY KEY (id),
CONSTRAINT data_name_parent_id_unique UNIQUE (name, parent_id)
)
WITH (
OIDS=FALSE
);
INSERT INTO public.data(id, name, label, parent_id) VALUES (1,'animal','Animal',0);
INSERT INTO public.data(id, name, label, parent_id) VALUES (5,'birds','Birds',1);
INSERT INTO public.data(id, name, label, parent_id) VALUES (6,'fish','Fish',1);
INSERT INTO public.data(id, name, label, parent_id) VALUES (7,'parrot','Parrot',5);
INSERT INTO public.data(id, name, label, parent_id) VALUES (8,'barb','Barb',6);
--- Function
CREATE OR REPLACE FUNCTION public.get_all_children_of_parent(use_parent integer) RETURNS integer[] AS
$BODY$
DECLARE
process_parents INT4[] := ARRAY[ use_parent ];
children INT4[] := '{}';
new_children INT4[];
BEGIN
WHILE ( array_upper( process_parents, 1 ) IS NOT NULL ) LOOP
new_children := ARRAY( SELECT id FROM data WHERE parent_id = ANY( process_parents ) AND id <> ALL( children ) );
children := children || new_children;
process_parents := new_children;
END LOOP;
RETURN children;
END;
$BODY$
LANGUAGE plpgsql VOLATILE COST 100;
ALTER FUNCTION public.get_all_children_of_parent(integer) OWNER TO postgres
--- Test
SELECT * FROM data WHERE id = any(get_all_children_of_parent(1))
SELECT * FROM data WHERE id = any(get_all_children_of_parent(5))
SELECT * FROM data WHERE id = any(get_all_children_of_parent(6))

Postgres find max column value from multiple tables

I have list of tables that have specific column names like
SELECT table_name
FROM information_schema.columns
WHERE column_name = 'column1'
I need to find the max value of column1 for each tables. I expect result like the following
|--------|--------------|
| Table | Max column1 |
|--------|--------------|
| Table1 | 100 |
| Table2 | 200 |
| ... | ... |
|--------|--------------|
How can I construct a query?
You can use a variation of the row count for all tables approach:
select t.table_name,
(xpath('/row/max/text()', xmax))[1]::text::int
from (
SELECT table_name, data_type,
query_to_xml(format('select max(%I) from %I.%I', column_name, table_schema, table_name), true, true, '') as xmax
FROM information_schema.columns
WHERE column_name = 'column1'
and table_schema = 'public'
) as t;
query_to_xml() runs a select max(..) from .. for each column returned from the query. The result of that is something like:
<row xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<max>42</max>
</row>
The xpath() function is then used to extract the value from the XML. The derived table (sub-query) is not really needed, but makes the xpath() expression more readable (in my opinion).
You may create a generic function that returns a TABLE type by constructing a UNION ALL query from information_schema.columns
CREATE OR REPLACE FUNCTION public.get_max(TEXT )
RETURNS TABLE(t_table_name TEXT, t_max text )
LANGUAGE plpgsql
AS $BODY$
DECLARE
p_colname TEXT := $1;
v_sql_statement TEXT;
BEGIN
SELECT STRING_AGG( 'SELECT '''||table_name||''','||' MAX('
|| column_name||'::text'
|| ') FROM '
|| table_name
,' UNION ALL ' ) INTO v_sql_statement
FROM information_schema.columns
WHERE column_name = p_colname
--and table_schema = 'public';
IF v_sql_statement IS NOT NULL THEN
RETURN QUERY EXECUTE v_sql_statement;
END IF;
END
$BODY$;
Execute and get the results like this.
knayak=# select * FROM get_max('id');
t_table_name | t_max
--------------+-------
f | 2
t2 | 1
friends | id4
person | 2
customer |
employe |
diary | 4
jsontable | 6
atable |
t_json | 2
ingredients | 1
test | 2
accts |
mytable | 30
(14 rows)

sql to display all names in 1 row (1 string)

ad_org table with column id & name
ad_org
ad_org_id | name
----------------------------------+-----------
357947E87C284935AD1D783CF6F099A1 | Spain
43D590B4814049C6B85C6545E8264E37 | Main
5EFF95EB540740A3B10510D9814EFAD5 | USA
2878085215E54C73A04D394BFD170733 | India
22669845D93A49A98932CE29AE02E0FD | Honkong
how to get output of all names(in 1 string) in this way from the above database
Spain | Main | USA | India | Honkong
in 1 select statement.
Use string_agg.
SELECT string_agg("name", ' | ') FROM thetable;
For older PostgreSQL, you must use array_agg and array_to_string:
SELECT array_to_string( array_agg("name"), ' | ') FROM thetable;
If you want a particular order, put it in the aggregate, e.g for alphabetical:
SELECT string_agg("name", ' | ' ORDER BY "name") FROM thetable;
use below code
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT ',' + QUOTENAME(ColumnName)
from yourtable
group by ColumnName, id
order by id
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT ' + #cols + ' from
(
select value, ColumnName
from yourtable
) x
pivot
(
max(value)
for ColumnName in (' + #cols + ')
) p '
execute(#query)
Click here for Demo
got it by searching..
Equivalent to PostgreSQL array() / array_to_string() functions in Oracle 9i
select array_to_string(array(select name from ad_org), '|') as names;

How to select with the length of the field / untrim using select

I have this table that I would like to select values from. The thing is that I would like to get the exact amount of length in the output of my select as I have in my table.
For example:
CREATE TABLE myschema.test_table
(
id serial NOT NULL,
otyp CHARACTER VARYING(3),
fname CHARACTER VARYING(8),
age integer
) WITH(OIDS=FALSE);
And let's say that this table contain
id | otyp | fname | age
----+------+--------+-----
1 | aa | gustav | 20
(1 row)
SELECT id || otyp || fname || age FROM myschema.test_table;
This would give me this result: 1aagustav20
I want the output to be 1aa gustav 20
Any help would appreciated!
Just do
SELECT id || otyp || ' ' || fname || ' ' || age FROM myschema.test_table;
I solved the problem!
SELECT id
|| RPAD(otyp, 3, ' ')
|| RPAD(fname, 8, ' ')
|| age
FROM
myschema.test_table;