Aggregating distinct values from JSONB arrays combined with SQL group by - postgresql

I am trying to aggregate distinct values from JSONB arrays in a SQL GROUP BY statement:
One dataset has many cfiles and a cfile only ever has one dataset
SELECT * FROM cfiles;
id | dataset_id | property_values (jsonb)
----+------------+-----------------------------------------------
1 | 1 | {"Sample Names": ["SampA", "SampB", "SampC"]}
2 | 1 | {"Sample Names": ["SampA", "SampB", "SampD"]}
3 | 1 | {"Sample Names": ["SampE"]}
4 | 2 | {"Sample Names": ["SampA", "SampF"]}
5 | 2 | {"Sample Names": ["SampG"]}
This query works and returns the correct result I want but it's a mess.
SELECT distinct(datasets.id) as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM unnest(
STRING_TO_ARRAY(
STRING_AGG(
DISTINCT REPLACE(
REPLACE(
REPLACE(
REPLACE(
cfiles.property_values ->> 'Sample Names', '",' || chr(32) || '"', ';'
), '[' , ''
), '"' , ''
), ']' , ''
), ';'
), ';'
)
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id
dataset_id | sample_names
------------+-----------------------------------
1 | SampA; SampB; SampC; SampD; SampE
2 | SampA; SampF; SampG
Is there a better way to write this query without all the string manipulation?
I tired jsonb_array_elements but it gave me the error subquery uses ungrouped column "cfiles.property_values" from outer query. So then I added cfiles.property_values to the GROUP BY but it no longer grouped just by the dataset_id
Not the result I want:
SELECT DISTINCT datasets.id as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM jsonb_array_elements(
cfiles.property_values -> 'Sample Names'
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id, cfiles.property_values
dataset_id | sample_names
------------+---------------------------
1 | "SampA"; "SampB"; "SampC"
1 | "SampA"; "SampB"; "SampD"
1 | "SampE"
2 | "SampA"; "SampF"
2 | "SampG"
SQL for creating demo
CREATE TABLE datasets (
id INT PRIMARY KEY
);
CREATE TABLE cfiles (
id INT PRIMARY KEY,
dataset_id INT,
property_values JSONB,
FOREIGN KEY (dataset_id) REFERENCES datasets(id)
);
INSERT INTO datasets values (1),(2);
INSERT INTO cfiles values
(1,1,'{"Sample Names":["SampA", "SampB", "SampC"]}'),
(2,1,'{"Sample Names":["SampA", "SampB", "SampD"]}'),
(3,1,'{"Sample Names":["SampE"]}');
INSERT INTO cfiles values
(4,2,'{"Sample Names":["SampA", "SampF"]}'),
(5,2,'{"Sample Names":["SampG"]}');

jsonb_array_elements is a set returning function and should be used in the FROM clause. Using it in the SELECT list makes things unnecessarily complicated:
select c.dataset_id, string_agg(distinct n.name, '; ' order by n.name)
from cfiles c
cross join jsonb_array_elements_text(c.property_values -> 'Sample Names') as n(name)
group by c.dataset_id
order by c.dataset_id;
Online example

Related

Concatenate the result of a query into a variable in PostgreSQL

Is it possible to concatenate the result of a query into a variable in postgresql?
Something like this in MSSQL:
DECLARE #Names_tmp NVARCHAR(max);
select #Names_tmp =
COALESCE(#Names_tmp + ' UNION ALL ', '') +
FromTable.Name
from FromTable
FromTable structure:
Key Name Other Columns ...
1 name_1 asd
2 name_2 asd
3 name_3 asd
PRINT CAST(#Names_tmp AS NTEXT)
result:
name_1 UNION ALL name_2 UNION ALL name 3
I see no need to use plpgsql for this matter. PostgreSQL aggregate functions should do it:
CREATE TEMPORARY TABLE t (id INT, name TEXT, asd TEXT);
INSERT INTO t VALUES (1,'name_1','asd'),
(2,'name_2','asd'),
(3,'name_3','asd');
SELECT ARRAY_TO_STRING(ARRAY_AGG(name),' UNION ALL ') FROM t;
SELECT STRING_AGG(name, ' UNION ALL ') FROM t;
Result:
------------------------------------------
name_1 UNION ALL name_2 UNION ALL name_3
(1 Zeile)
Use STRING_AGG
[SQL Fiddle][1]
Query 1:
select string_agg(name,' UNION ALL ') as res from t
Results:
| res |
|------------------------------------------|
| name_1 UNION ALL name_2 UNION ALL name_3 |

Flattening Postgres nested JSONB column

I'm looking to see how to flatten data nested in a JSONB column.
As an example, say we have the table users with user_id(int) and siblings(JSONB)
With rows like:
id | JSONB
---------------------
1 | {"brother": {"first_name":"Sam", "last_name":"Smith"}, "sister": {"first_name":"Sally", "last_name":"Smith"}
2 | {"sister": {"first_name":"Jill"}}
I'm looking for a query that will return a response like:
id | sibling | first_name | last_name
-------------------------------------
1 | "brother" | "Sam" | "Smith"
1 | "sister" | "Sally" | "Smith"
2 | "sister" | "Jill" | null
I develop to this use it in psql.
To check code I create small view t1:
CREATE VIEW t1 AS (
SELECT 1 AS id, '{"brother": {"first_name":"Sam", "last_name":"Smith"}, "sister": {"first_name":"Sally", "last_name":"Smith"}}'::jsonb AS jsonb
UNION SELECT 2, '{"sister": {"first_name":"Jill", "last_name":"Johnson"}}'
UNION SELECT 3, '{"sister": {"first_name":"Jill", "x_name":"Johnson"}}'
);
The first task is to found list of possible key:
WITH fields AS (
SELECT DISTINCT jff.key
FROM t1,
jsonb_each(jsonb) AS jf,
jsonb_each(jf.value) AS jff
)
SELECT * FROM fields;
The result is:
key
------------
first_name
last_name
x_name
The next step is generate queries:
SELECT 'SELECT id, jf.key as sibling, ' || (
WITH fields AS (
SELECT DISTINCT jff.key
FROM t1,
jsonb_each(jsonb) AS jf,
jsonb_each(jf.value) AS jff
)
SELECT string_agg('jf.value->>''' || key || ''' as "' || key || '"', ',' ORDER BY key)
FROM fields
)
|| ' FROM t1, jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;' AS cmd;
It returns:
cmd
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SELECT id, jf.key as sibling,jf.value->>'first_name' as "first_name",jf.value->>'last_name' as "last_name",jf.value->>'x_name' as "x_name" FROM t1, jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;
(1 row)
To set result as psql variable I use gset:
\gset
After that you can call query:
:cmd
id | sibling | first_name | last_name | x_name
----+---------+------------+-----------+---------
1 | brother | Sam | Smith |
1 | sister | Sally | Smith |
2 | sister | Jill | Johnson |
3 | sister | Jill | | Johnson
(4 rows)
To run it from external languages you can create postgres function than return SQL command:
CREATE OR REPLACE FUNCTION build_query(IN tname text, OUT cmd text) AS $sql$
BEGIN
EXECUTE $cmd$
SELECT 'SELECT id, jf.key as sibling, ' || (
WITH fields AS (
SELECT DISTINCT jff.key
FROM t1,
jsonb_each(jsonb) AS jf,
jsonb_each(jf.value) AS jff
)
SELECT string_agg('jf.value->>''' || key || ''' as "' || key || '"', ',' ORDER BY key)
FROM fields
)
|| ' FROM $cmd$ || quote_ident(tname) || $cmd$ , jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;'$cmd$ INTO cmd;
RETURN;
END;
$sql$ LANGUAGE plpgsql;
SELECT * FROM build_query('t1');
cmd
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SELECT id, jf.key as sibling, jf.value->>'first_name' as "first_name",jf.value->>'last_name' as "last_name",jf.value->>'x_name' as "x_name" FROM t1 , jsonb_each(jsonb) AS jf ORDER BY 1, 2, 3;
(1 row)

How to compare two identicals tables data of each column in postgres?

I want compare two table's all column values.The two table is identical tables means column number is same and primary key is same. can any one suggest query which compare such two tables in postgres.
The query should give the column name and what is the two different value of two tables.Like this
pkey | column_name | table1_value | table2_value
123 | bonus | 1 | 0
To get all different rows you can use:
select *
from table_1 t1
join table_2 t2 on t1.pkey = t2.pkey
where t1 is distinct from t2;
This will only compare rows that exist in both tables. If you also want to find those that are missing in on of them use a full outer join:
select coalesce(t1.pkey, t2.pkey) as pkey,
case
when t1.pkey is null then 'Missing in table_1'
when t2.pkey is null then 'Missing in table_2'
else 'At least one column is different'
end as status,
*
from table_1 t1
full ojoin table_2 t2 on t1.pkey = t2.pkey
where (t1 is distinct from t2)
or (t1.pkey is null)
or (t2.pkey is null);
If you install the hstore extension, you can view the differences as a key/value map:
select coalesce(t1.pkey, t2.pkey) as pkey,
case
when t1.pkey is null then 'Missing in table_1'
when t2.pkey is null then 'Missing in table_2'
else 'At least one column is different'
end as status,
hstore(t1) - hstore(t2) as values_in_table_1,
hstore(t2) - hstore(t1) as values_in_table_2
from table_1 t1
full ojoin table_2 t2 on t1.pkey = t2.pkey
where (t1 is distinct from t2)
or (t1.pkey is null)
or (t2.pkey is null);
Using this sample data:
create table table_1 (pkey integer primary key, col_1 text, col_2 int);
insert into table_1 (pkey, col_1, col_2)
values (1, 'a', 1), (2, 'b', 2), (3, 'c', 3), (5, 'e', 42);
create table table_2 (pkey integer primary key, col_1 text, col_2 int);
insert into table_2 (pkey, col_1, col_2)
values (1,'a', 1), (2, 'x', 2), (3, 'c', 33), (4, 'd', 52);
A possible result would be:
pkey | status | values_in_table_1 | values_in_table_2
-----+----------------------------------+-------------------+------------------
2 | At least one column is different | "col_1"=>"b" | "col_1"=>"x"
3 | At least one column is different | "col_2"=>"3" | "col_2"=>"33"
4 | Missing in table_1 | |
5 | Missing in table_2 | |
Example data:
create table test1(pkey serial primary key, str text, val int);
insert into test1 (str, val) values ('a', 1), ('b', 2), ('c', 3);
create table test2(pkey serial primary key, str text, val int);
insert into test2 (str, val) values ('a', 1), ('x', 2), ('c', 33);
This simple query gives a complete information on differences of two tables (including rows missing in one of them):
(select 1 t, * from test1
except
select 1 t, * from test2)
union all
(select 2 t, * from test2
except
select 2 t, * from test1)
order by pkey, t;
t | pkey | str | val
---+------+-----+-----
1 | 2 | b | 2
2 | 2 | x | 2
1 | 3 | c | 3
2 | 3 | c | 33
(4 rows)
In Postgres 9.5+ you can transpose the result to the expected format using jsonb functions:
select pkey, key as column, val[1] as value_1, val[2] as value_2
from (
select pkey, key, array_agg(value order by t) val
from (
select t, pkey, key, value
from (
(select 1 t, * from test1
except
select 1 t, * from test2)
union all
(select 2 t, * from test2
except
select 2 t, * from test1)
) s,
lateral jsonb_each_text(to_jsonb(s))
group by 1, 2, 3, 4
) s
group by 1, 2
) s
where key <> 't' and val[1] <> val[2]
order by pkey;
pkey | column | value_1 | value_2
------+--------+---------+---------
2 | str | b | x
3 | val | 3 | 33
(2 rows)
I tried all of the above answer.Thanks guys for your help.Bot after googling I found a simple query.
SELECT <common_column_list> from table1
EXCEPT
SELECT <common_column_list> from table2.
It shows all the row of table1 if any table1 column value is different from table2 column value.
Not very nice but fun and it works :o)
Just replace public.mytable1 and public.mytable2 by correct tables and
update the " where table_schema='public' and table_name='mytable1'"
select * from (
select pkey,column_name,t1.col_value table1_value,t2.col_value table2_value from (
select pkey,generate_subscripts(t,1) ordinal_position,unnest(t) col_value from (
select pkey,
(
replace(regexp_replace( -- null fields
'{'||substring(a::character varying,'^.(.*).$') ||'}' -- {} instead of ()
,'([\{,])([,\}])','\1null\2','g'),',,',',null,')
)::TEXT[] t
from public.mytable1 a
) a) t1
left join (
select pkey,generate_subscripts(t,1) ordinal_position,unnest(t) col_value from (
select pkey,
(
replace(regexp_replace( -- null fields
'{'||substring(a::character varying,'^.(.*).$') ||'}' -- {} instead of ()
,'([\{,])([,\}])','\1null\2','g'),',,',',null,')
)::TEXT[] t
from public.mytable2 a
) a) t2 using (pkey,ordinal_position)
join (select * from information_schema.columns where table_schema='public' and table_name='mytable1') c using (ordinal_position)
) final where COALESCE(table1_value,'')!=COALESCE(table2_value,'')

How to get all combinations of comma separated values in a T-SQL query

I have table with three columns: column 2 and 3 contains comma-separated values.
-col1----col2---col3--
| 1 | 1,2,3 | 4,5 |
----------------------
What is the most efficient way to get a table of three columns that contains all the combinations of values of these three columns, like this:
1 | 1 | 4
1 | 2 | 4
1 | 3 | 4
1 | 1 | 5
1 | 2 | 5
1 | 3 | 5
Using query and nodes:
DECLARE #t TABLE (col1 VARCHAR(100), col2 VARCHAR(100), col3 VARCHAR(100))
INSERT #t VALUES ('1', '1,2,3', '4,5')
;WITH cte AS
(
SELECT
col1 = CAST('<x>' + REPLACE(col1, ',','</x><x>') + '</x>' AS XML),
col2 = CAST('<x>' + REPLACE(col2, ',','</x><x>') + '</x>' AS XML),
col3 = CAST('<x>' + REPLACE(col3, ',','</x><x>') + '</x>' AS XML)
FROM #t
)
SELECT
col1.n.query('.[1]').value('.', 'int'),
col2.n.query('.[1]').value('.', 'int'),
col3.n.query('.[1]').value('.', 'int')
FROM
cte
CROSS APPLY col1.nodes('x') AS col1(n)
CROSS APPLY col2.nodes('x') AS col2(n)
CROSS APPLY col3.nodes('x') AS col3(n)
SQL Fiddle
Try this:
DECLARE #T1 TABLE (COL1 VARCHAR(25), COL2 VARCHAR(25), COL3 VARCHAR(25))
INSERT INTO #T1 (COL1,COL2,COL3)
VALUES ('1','1,2,3','4,5')
DECLARE #COL1 TABLE (VAL1 VARCHAR(25))
DECLARE #COL2 TABLE (VAL2 VARCHAR(25))
DECLARE #COL3 TABLE (VAL3 VARCHAR(25))
INSERT INTO #COL1 (VAL1)
SELECT DISTINCT Split.a.value('.', 'VARCHAR(max)') AS String
FROM (SELECT CAST ('<M>' + REPLACE(CAST(COL1 AS VARCHAR), ',', '</M><M>') + '</M>' AS XML) AS String
FROM #t1) AS A
CROSS APPLY String.nodes ('/M') AS Split(a)
INSERT INTO #COL2 (VAL2)
SELECT DISTINCT Split.a.value('.', 'VARCHAR(max)') AS String
FROM (SELECT CAST ('<M>' + REPLACE(CAST(COL2 AS VARCHAR), ',', '</M><M>') + '</M>' AS XML) AS String
FROM #t1) AS A
CROSS APPLY String.nodes ('/M') AS Split(a)
INSERT INTO #COL3 (VAL3)
SELECT DISTINCT Split.a.value('.', 'VARCHAR(max)') AS String
FROM (SELECT CAST ('<M>' + REPLACE(CAST(COL3 AS VARCHAR), ',', '</M><M>') + '</M>' AS XML) AS String
FROM #t1) AS A
CROSS APPLY String.nodes ('/M') AS Split(a)
SELECT *
FROM #COL1
CROSS APPLY #COL2
CROSS APPLY #COL3
ORDER BY VAL1,VAL2,VAL3

sql to display all names in 1 row (1 string)

ad_org table with column id & name
ad_org
ad_org_id | name
----------------------------------+-----------
357947E87C284935AD1D783CF6F099A1 | Spain
43D590B4814049C6B85C6545E8264E37 | Main
5EFF95EB540740A3B10510D9814EFAD5 | USA
2878085215E54C73A04D394BFD170733 | India
22669845D93A49A98932CE29AE02E0FD | Honkong
how to get output of all names(in 1 string) in this way from the above database
Spain | Main | USA | India | Honkong
in 1 select statement.
Use string_agg.
SELECT string_agg("name", ' | ') FROM thetable;
For older PostgreSQL, you must use array_agg and array_to_string:
SELECT array_to_string( array_agg("name"), ' | ') FROM thetable;
If you want a particular order, put it in the aggregate, e.g for alphabetical:
SELECT string_agg("name", ' | ' ORDER BY "name") FROM thetable;
use below code
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT ',' + QUOTENAME(ColumnName)
from yourtable
group by ColumnName, id
order by id
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT ' + #cols + ' from
(
select value, ColumnName
from yourtable
) x
pivot
(
max(value)
for ColumnName in (' + #cols + ')
) p '
execute(#query)
Click here for Demo
got it by searching..
Equivalent to PostgreSQL array() / array_to_string() functions in Oracle 9i
select array_to_string(array(select name from ad_org), '|') as names;