Oracle SQL Listagg remove duplicates with case statement conditions - group-by

I am trying to show repeated column values with comma separated list by using listagg but getting error as "Not a single group by function". Hope I get some help.
Below is the DDL script with insert statements and data:
DROP TABLE dept CASCADE CONSTRAINTS;
DROP TABLE myrole CASCADE CONSTRAINTS;
DROP TABLE person CASCADE CONSTRAINTS;
DROP TABLE person_role CASCADE CONSTRAINTS;
CREATE TABLE dept (
id INTEGER NOT NULL,
dept VARCHAR2(50 CHAR)
);
INSERT INTO dept (
id,
dept
) VALUES (
1,
'Operations'
);
INSERT INTO dept (
id,
dept
) VALUES (
2,
'Research'
);
INSERT INTO dept (
id,
dept
) VALUES (
3,
'Accounts'
);
INSERT INTO dept (
id,
dept
) VALUES (
4,
'Sales'
);
ALTER TABLE dept ADD CONSTRAINT dept_pk PRIMARY KEY ( id );
CREATE TABLE myrole (
id INTEGER NOT NULL,
role VARCHAR2(50 CHAR)
);
INSERT INTO myrole (
id,
role
) VALUES (
1,
'JJJ'
);
INSERT INTO myrole (
id,
role
) VALUES (
2,
'Auth'
);
INSERT INTO myrole (
id,
role
) VALUES (
3,
'AAA'
);
INSERT INTO myrole (
id,
role
) VALUES (
4,
'MMM'
);
INSERT INTO myrole (
id,
role
) VALUES (
5,
'KKK'
);
INSERT INTO myrole (
id,
role
) VALUES (
6,
'BBB'
);
ALTER TABLE myrole ADD CONSTRAINT myrole_pk PRIMARY KEY ( id );
CREATE TABLE person (
id INTEGER NOT NULL,
person VARCHAR2(50 CHAR)
);
INSERT INTO person (
id,
person
) VALUES (
1,
'John'
);
INSERT INTO person (
id,
person
) VALUES (
2,
'Scott'
);
INSERT INTO person (
id,
person
) VALUES (
3,
'Ruth'
);
INSERT INTO person (
id,
person
) VALUES (
4,
'Smith'
);
INSERT INTO person (
id,
person
) VALUES (
5,
'Frank'
);
INSERT INTO person (
id,
person
) VALUES (
6,
'Martin'
);
INSERT INTO person (
id,
person
) VALUES (
7,
'Blake'
);
ALTER TABLE person ADD CONSTRAINT person_pk PRIMARY KEY ( id );
CREATE TABLE person_role (
id INTEGER NOT NULL,
person_id INTEGER NOT NULL,
role_id INTEGER NOT NULL,
dept_id INTEGER
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
1,
1,
1,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
2,
2,
2,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
3,
2,
4,
1
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
4,
2,
4,
2
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
5,
3,
1,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
6,
3,
5,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
7,
4,
3,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
8,
5,
6,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
9,
6,
6,
3
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
10,
6,
6,
2
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
11,
6,
2,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
12,
7,
6,
4
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
13,
7,
6,
4
);
ALTER TABLE person_role ADD CONSTRAINT person_role_pk PRIMARY KEY ( id );
ALTER TABLE person_role
ADD CONSTRAINT person_role_myrole_fk FOREIGN KEY ( myrole_id )
REFERENCES myrole ( id );
ALTER TABLE person_role
ADD CONSTRAINT person_role_person_fk FOREIGN KEY ( person_id )
REFERENCES person ( id );
CREATE SEQUENCE dept_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER dept_tr BEFORE
INSERT ON dept
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := dept_seq.nextval;
END;
/
CREATE SEQUENCE myrole_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER myrole_tr BEFORE
INSERT ON myrole
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := myrole_seq.nextval;
END;
/
CREATE SEQUENCE person_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER person_tr BEFORE
INSERT ON person
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := person_seq.nextval;
END;
/
CREATE SEQUENCE person_role_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER person_role_tr BEFORE
INSERT ON person_role
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := person_role_seq.nextval;
END;
/
By using below query that #Koen Lostrie provided and by adding columns I need, I get output as shown:
SELECT p.person, r.role as myrole, d.dept,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END as myaccess
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
JOIN (
SELECT p.id, MIN(CASE WHEN r.ROLE = 'Auth' THEN 0 WHEN r.ROLE in ('AAA','BBB') THEN 1 ELSE 2 END) as role_type
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
GROUP BY p.id
) rl ON rl.id = pr.person_id
left join dept d on d.id = pr.dept_id
Output from query:
+--------+--------+------------+----------+
| PERSON | MYROLE | DEPT | MYACCESS |
+--------+--------+------------+----------+
| John | JJJ | | |
| Scott | Auth | | Remove |
| Scott | MMM | Operations | |
| Scott | MMM | Research | |
| Ruth | JJJ | | |
| Ruth | KKK | | |
| Smith | AAA | | Add |
| Frank | BBB | | Add |
| Martin | AAA | Accounts | |
| Martin | AAA | Research | |
| Martin | Auth | | Remove |
| Blake | BBB | Sales | |
| Blake | BBB | Sales | Add |
+--------+--------+------------+----------+
Now I want to show DEPT column values comma separated based on PERSON and MYROLE columns and the output expected is shown below:
+--------+--------+---------------------+----------+
| PERSON | MYROLE | DEPT | MYACCESS |
+--------+--------+---------------------+----------+
| John | JJJ | | |
| Scott | Auth | | Remove |
| Scott | MMM | Operations,Research | |
| Ruth | JJJ | | |
| Ruth | KKK | | |
| Smith | AAA | | Add |
| Frank | BBB | | Add |
| Martin | AAA | Accounts,Research | |
| Martin | Auth | | Remove |
| Blake | BBB | Sales | Add |
+--------+--------+---------------------+----------+
I added listagg to existing query but getting error
SELECT p.person, r.role as myrole,
listagg(d.dept, ', ') within group (order by d.dept) as dept,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END as myaccess
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
JOIN (
SELECT p.id, MIN(CASE WHEN r.ROLE = 'Auth' THEN 0 WHEN r.ROLE in ('AAA','BBB') THEN 1 ELSE 2 END) as role_type
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
GROUP BY p.id
) rl ON rl.id = pr.person_id
left join dept d on d.id = pr.dept_id
getting not a single group by error. Not sure how to fix. Appreciate any help.
Thanks,
Richa

LISTAGG is an aggregate function. If you apply it to a column, then you need to specify in the query what columns you're grouping by. Typically that is all the columns that don't have an aggregate function.
I didn't test since there is no sample data for the dept table nor the person_roles table but this is probably the issue
SELECT p.person, r.role as myrole, listagg(d.dept, ', ') within group (order by d.dept) as dept_list,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END as myaccess
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
JOIN (
SELECT p.id, MIN(CASE WHEN r.ROLE = 'Auth' THEN 0 WHEN r.ROLE in ('AAA','BBB') THEN 1 ELSE 2 END) as role_type
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
GROUP BY p.id
) rl ON rl.id = pr.person_id
left join dept d on d.id = pr.dept_id
GROUP BY
p.person,
r.role,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END
ORDER BY p.person

Related

Retrieving the most recent change/s for given user start and end period dates

I would really appreciate any help with this query.
I have 2 tables:
Audit - captures one or more changes made in table x
pStatus - Reference table
Here are the table scripts and sample data.
CREATE TABLE Audit (
AID INTEGER NOT NULL PRIMARY KEY
,PropertyID INTEGER NOT NULL
,StatusID INTEGER NOT NULL
,LastChangedBy VARCHAR(20) NOT NULL
,LastChanged VARCHAR(23) NOT NULL
)
INSERT INTO Audit (
AID
,PropertyID
,StatusID
,LastChangedBy
,LastChanged
)
VALUES (
44363
,65532
,2
,'Agent009'
,'2021-02-18 12:17:17.280'
);
INSERT INTO Audit (
AID
,PropertyID
,StatusID
,LastChangedBy
,LastChanged
)
VALUES (
44362
,65531
,7
,'Agent009'
,'2021-02-18 12:17:17.280'
);
INSERT INTO Audit (
AID
,PropertyID
,StatusID
,LastChangedBy
,LastChanged
)
VALUES (
44361
,65530
,5
,'Agent007'
,'2021-02-28 11:10:43.587'
);
INSERT INTO Audit (
AID
,PropertyID
,StatusID
,LastChangedBy
,LastChanged
)
VALUES (
44360
,65530
,3
,'Agent009'
,'2021-02-18 11:37:43.390'
);
INSERT INTO Audit (
AID
,PropertyID
,StatusID
,LastChangedBy
,LastChanged
)
VALUES (
44359
,65530
,3
,'Agent009'
,'2021-02-18 11:37:29.967'
);
INSERT INTO Audit (
AID
,PropertyID
,StatusID
,LastChangedBy
,LastChanged
)
VALUES (
44358
,65531
,2
,'Agent009'
,'2021-02-18 11:05:07.527'
);
CREATE TABLE PStatus (
ID INTEGER NOT NULL PRIMARY KEY
,StatusName VARCHAR(20) NOT NULL
,LastChangedBy VARCHAR(20) NOT NULL
,LastChanged VARCHAR(23) NOT NULL
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
1
,'REJECTED'
,'dbo'
,'2013-05-28 17:02:42.977'
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
2
,'NEW PROP'
,'dbo'
,'2013-05-28 17:02:42.977'
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
3
,'ACTIVE PROP'
,'dbo'
,'2013-10-15 12:41:14.280'
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
4
,'MONITOR'
,'dbo'
,'2013-10-15 12:41:14.280'
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
5
,'DEAl AGR'
,'dbo'
,'2013-10-15 12:41:14.280'
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
6
,'CONTRACTS EXCH'
,'dbo'
,'2013-10-15 12:41:14.280'
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
7
,'COMPLETED'
,'dbo'
,'2013-10-15 12:41:14.280'
);
INSERT INTO PStatus (
ID
,StatusName
,LastChangedBy
,LastChanged
)
VALUES (
8
,'ABORTED'
,'dbo'
,'2013-10-15 12:41:14.280'
);
Demo on DB Fiddle: to create tables and output
This is what I'm trying to achieve.
Final output:
+---------+---------------+----------------+-----------+-----------------------------+
| PRD Ref | Latest_Status | Opening_Status | lastchangedby | lastchanged |
+---------+---------------+----------------+---------------+-------------------------+
| 65530 | DEA AGR | ACTIVE PROP | Agent007 | 2021-02-28 11:10:43.587 |
+---------+---------------+----------------+---------------+-------------------------+
| 65531 | COMPLETED | NEW PROP | Agent009 | 2021-02-18 12:17:17.280 |
+---------+---------------+----------------+---------------+-------------------------+
| 65532 | NEW PROP | *null* | Agent009 | 2021-02-18 12:17:17.280 |
+---------+---------------+----------------+---------------+-------------------------+
Here is what I have tried.
select pa.propertyid as [PRD Ref],
max(case when pa.rn_desc = 1 then s.statusname end) last_status,
max(case when pa.rn_asc = 1 then s.statusname end) opening_status,
max(case when pa.rn_desc = 1 then pa.lastchangedby end) lastchangedby,
max(case when pa.rn_desc = 1 then pa.lastchanged end) lastchanged
from (
select pa.*,
row_number() over(partition by propertyid order by lastchanged) rn_asc,
row_number() over(partition by propertyid order by lastchanged desc) rn_desc
from audit pa
) pa
inner join pstatus s on s.id = pa.statusid
where 1 in (rn_asc, rn_desc)
and pa.LastChanged BETWEEN '2021-01-20' AND '2021-02-20'
group by pa.propertyid
+---------+---------------+----------------+-----------+-----------------------------+
| PRD Ref | Latest_Status | Opening_Status | lastchangedby | lastchanged |
+---------+---------------+----------------+-----------+-----------------------------+
| 65530 | *null * | ACTIVE PROP | *null* | *null* |
+---------+---------------+----------------+-----------+-----------------------------+
| 65531 | COMPLETED | NEW PROP | Agent009 | 2021-02-18 12:17:17.280 |
+---------+---------------+----------------+-----------+-----------------------------+
| 65532 | NEW PROP | *null* | Agent009 | 2021-02-18 12:17:17.280 |
+---------+---------------+----------------+-----------+-----------------------------+
please check if following T-SQL code helps you.
DECLARE #StartDate DATETIME = '2021-02-15 00:00:00.000'
DECLARE #EndDate DATETIME = '2021-02-28 23:30:00.000'
;WITH CTE_Audit
AS
(
SELECT pa.PropertyID, COUNT(pa.PropertyID) AS TotalAuditRowCnt, Max(pa.LastChanged) as LastChanged
FROM
[Audit](NOLOCK) pa
GROUP BY pa.PropertyID
),
CTE_SecondMostRecent
AS
(
SELECT os.PropertyID, os.opening_status
FROM
(
SELECT pa.PropertyID, ps.StatusName as opening_status,
RANK() OVER(PARTITION BY pa.PropertyID ORDER BY pa.LastChanged DESC) rnk
FROM
[Audit](NOLOCK) pa
INNER JOIN PStatus(NOLOCK) ps on pa.StatusID = ps.ID
WHERE
pa.LastChanged BETWEEN #StartDate AND #EndDate
)os
Where os.rnk = 2 /* Get Second Most Recent Records */
),
CTE_FirstMostRecent
AS
(
SELECT ls.PropertyID, ls.last_status, ls.LastChangedBy, ls.LastChanged
FROM
(
SELECT pa.PropertyID, ps.StatusName as last_status, pa.lastchangedby, pa.lastchanged,
RANK() OVER(PARTITION BY pa.PropertyID ORDER BY pa.LastChanged DESC) rnk
FROM
[Audit](NOLOCK) pa
INNER JOIN PStatus(NOLOCK) ps on pa.StatusID = ps.ID
WHERE
pa.LastChanged BETWEEN #StartDate AND #EndDate
)ls
Where ls.rnk = 1 /* Get First Most Recent Records */
)
SELECT
a.propertyid as [PRD Ref]
, c.last_status as [Latest_Status]
, CASE WHEN a.TotalAuditRowCnt > 1 THEN b.opening_status ELSE '*null*' END as [Opening_Status]
, c.lastchangedby as [lastchangedby]
, c.lastchanged as [lastchanged]
FROM
CTE_Audit a
LEFT JOIN CTE_SecondMostRecent b ON a.PropertyID = b.PropertyID
LEFT JOIN CTE_FirstMostRecent c ON a.PropertyID = c.PropertyID
ORDER BY
a.LastChanged DESC

Join on multiple tables using distinct on

create table emp
(
emp_id serial primary key,
emp_no integer,
emp_ref_no character varying(15),
emp_class character varying(15)
);
create table emp_detail
(
emp_detail_id serial primary key,
emp_id integer,
class_no integer,
created_at timestamp without time zone,
constraint con_fk foreign key(emp_id) references emp(emp_id)
);
create table class_detail
(
class_id serial primary key,
emp_id integer,
class_no integer,
col1 JSONB,
created_at timestamp without time zone default now(),
constraint cd_fk foreign key(emp_id) references emp(emp_id)
);
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2QcW', 'abc' );
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2FQx', 'abc');
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2yz', 'abc');
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 2, '2018-05-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 1, '2018-04-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 1, '2018-05-10 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 2, '2018-02-01 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 3, 2, '2018-02-01 11:00:00'
);
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik"}', '2018-02-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik Anderson"}', '2018-03-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"James Anderson TST"}', '2018-03-15 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"Tim Paine ST"}', '2018-04-01 10:00:00');
I want to display corresponding emp_id, emp_no, emp_ref_no, class_no(the latest one from emp_detail table based on created at)along with all the columns of class_detail table. Class_detail table should show the latest corresponding record of the class no
The expected output which I would like to see is something like below :-
emp id | emp_no | emp_ref_no | class_no | class_id | class.col1 | class.created_at | class.created_by
1 | 548251 | 2QcW | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NUlL
2 | 548251 | 2FQx | 1 | 2 |{"Name":"Nik Anderson"}|2018-03-01 10:00:00| NULL
3 | 548251 | 2yz | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NULL
As I stated in the comments: It is exactly the same thing as in Inner join using distinct on. You simply have to add another join and another ORDER BY group (cd.created_at DESC)
demo:db<>fiddle
SELECT DISTINCT ON (ed.emp_id)
e.emp_id, e.emp_no, e.emp_ref_no, ed.class_no, cd.*
FROM
emp_detail ed
JOIN emp e ON e.emp_id = ed.emp_id
JOIN class_detail cd ON ed.class_no = cd.class_no
ORDER BY ed.emp_id, ed.created_at DESC, cd.created_at DESC
Note: I am not sure what the emp_id column in class_detail is for. It seems not well designed (this is also because it is always 1 in your example.) You should check whether you really need it.

PostgreSQL querying through schemas

I want a query that lists all Customers who's status is "active". This query would return a list of customers who are marked as active. My problem is that I am lost on querying tables that reference other tables. Here is my schema.
CREATE TABLE Customer (
ID BIGSERIAL PRIMARY KEY NOT NULL,
fNAME TEXT NOT NULL,
lNAME TEXT NOT NULL,
create_date DATE NOT NULL DEFAULT NOW()
);
CREATE TABLE CustomerStatus (
recordID BIGSERIAL NOT NULL,
ID BIGSERIAL REFERENCES Customer NOT NULL,
status TEXT NOT NULL,
create_date DATE NOT NULL DEFAULT NOW()
);
INSERT INTO Customer (fNAME, lNAME) VALUES ('MARK', 'JOHNSON'), ('ERICK', 'DAWN'), ('MAY', 'ERICKSON'), ('JESS', 'MARTIN');
INSERT INTO CustomerStatus (ID, status) VALUES (1, 'pending'), (1, 'active');
INSERT INTO CustomerStatus (ID, status) VALUES (2, 'pending'), (2, 'active'), (2, 'cancelled');
INSERT INTO CustomerStatus (ID, status) VALUES (3, 'pending'), (3, 'active');
INSERT INTO CustomerStatus (ID, status) VALUES (4, 'pending');
I took courage to assume that record_id is serial => the latest id would be the last, to produce this qry:
t=# with a as (
select *, max(recordid) over (partition by cs.id)
from Customer c
join CustomerStatus cs on cs.id = c.id
)
select *
from a
where recordid=max and status = 'active';
id | fname | lname | create_date | recordid | id | status | create_date | max
----+-------+----------+-------------+----------+----+--------+-------------+-----
1 | MARK | JOHNSON | 2017-04-27 | 2 | 1 | active | 2017-04-27 | 2
3 | MAY | ERICKSON | 2017-04-27 | 7 | 3 | active | 2017-04-27 | 7
(2 rows)
Time: 0.450 ms

Postgres insert trigger fills id

I have a BEFORE trigger which should fill record's root ID which, of course, would point to rootmost entry. I.e:
id | parent_id | root_id
-------------------------
a | null | a
a.1 | a | a
a.1.1 | a.1 | a
b | null | b
If entry's parent_id is null, it would point to record itself.
Question is - inside BEFORE INSERT trigger, if parent_id is null, can I or should I fetch next sequence value, fill id and root_id in order to avoid filling root_id in AFTER trigger?
According to your own definition:
if entry's parent_id is null, it would point to record itself
then you have to do:
if new.parent_id is null then
new.root_id = new.id ;
else
WITH RECURSIVE p (parent_id, level) AS
(
-- Base case
SELECT
parent_id, 0 as level
FROM
t
WHERE
t.id = new.id
UNION ALL
SELECT
t.parent_id, level + 1
FROM
t JOIN p ON t.id = p.parent_id
WHERE
t.parent_id IS NOT NULL
)
SELECT
parent_id
INTO
new.root_id
FROM
p
ORDER BY
level DESC
LIMIT
1 ;
end if ;
RETURN new ;

How to compare two identicals tables data of each column in postgres?

I want compare two table's all column values.The two table is identical tables means column number is same and primary key is same. can any one suggest query which compare such two tables in postgres.
The query should give the column name and what is the two different value of two tables.Like this
pkey | column_name | table1_value | table2_value
123 | bonus | 1 | 0
To get all different rows you can use:
select *
from table_1 t1
join table_2 t2 on t1.pkey = t2.pkey
where t1 is distinct from t2;
This will only compare rows that exist in both tables. If you also want to find those that are missing in on of them use a full outer join:
select coalesce(t1.pkey, t2.pkey) as pkey,
case
when t1.pkey is null then 'Missing in table_1'
when t2.pkey is null then 'Missing in table_2'
else 'At least one column is different'
end as status,
*
from table_1 t1
full ojoin table_2 t2 on t1.pkey = t2.pkey
where (t1 is distinct from t2)
or (t1.pkey is null)
or (t2.pkey is null);
If you install the hstore extension, you can view the differences as a key/value map:
select coalesce(t1.pkey, t2.pkey) as pkey,
case
when t1.pkey is null then 'Missing in table_1'
when t2.pkey is null then 'Missing in table_2'
else 'At least one column is different'
end as status,
hstore(t1) - hstore(t2) as values_in_table_1,
hstore(t2) - hstore(t1) as values_in_table_2
from table_1 t1
full ojoin table_2 t2 on t1.pkey = t2.pkey
where (t1 is distinct from t2)
or (t1.pkey is null)
or (t2.pkey is null);
Using this sample data:
create table table_1 (pkey integer primary key, col_1 text, col_2 int);
insert into table_1 (pkey, col_1, col_2)
values (1, 'a', 1), (2, 'b', 2), (3, 'c', 3), (5, 'e', 42);
create table table_2 (pkey integer primary key, col_1 text, col_2 int);
insert into table_2 (pkey, col_1, col_2)
values (1,'a', 1), (2, 'x', 2), (3, 'c', 33), (4, 'd', 52);
A possible result would be:
pkey | status | values_in_table_1 | values_in_table_2
-----+----------------------------------+-------------------+------------------
2 | At least one column is different | "col_1"=>"b" | "col_1"=>"x"
3 | At least one column is different | "col_2"=>"3" | "col_2"=>"33"
4 | Missing in table_1 | |
5 | Missing in table_2 | |
Example data:
create table test1(pkey serial primary key, str text, val int);
insert into test1 (str, val) values ('a', 1), ('b', 2), ('c', 3);
create table test2(pkey serial primary key, str text, val int);
insert into test2 (str, val) values ('a', 1), ('x', 2), ('c', 33);
This simple query gives a complete information on differences of two tables (including rows missing in one of them):
(select 1 t, * from test1
except
select 1 t, * from test2)
union all
(select 2 t, * from test2
except
select 2 t, * from test1)
order by pkey, t;
t | pkey | str | val
---+------+-----+-----
1 | 2 | b | 2
2 | 2 | x | 2
1 | 3 | c | 3
2 | 3 | c | 33
(4 rows)
In Postgres 9.5+ you can transpose the result to the expected format using jsonb functions:
select pkey, key as column, val[1] as value_1, val[2] as value_2
from (
select pkey, key, array_agg(value order by t) val
from (
select t, pkey, key, value
from (
(select 1 t, * from test1
except
select 1 t, * from test2)
union all
(select 2 t, * from test2
except
select 2 t, * from test1)
) s,
lateral jsonb_each_text(to_jsonb(s))
group by 1, 2, 3, 4
) s
group by 1, 2
) s
where key <> 't' and val[1] <> val[2]
order by pkey;
pkey | column | value_1 | value_2
------+--------+---------+---------
2 | str | b | x
3 | val | 3 | 33
(2 rows)
I tried all of the above answer.Thanks guys for your help.Bot after googling I found a simple query.
SELECT <common_column_list> from table1
EXCEPT
SELECT <common_column_list> from table2.
It shows all the row of table1 if any table1 column value is different from table2 column value.
Not very nice but fun and it works :o)
Just replace public.mytable1 and public.mytable2 by correct tables and
update the " where table_schema='public' and table_name='mytable1'"
select * from (
select pkey,column_name,t1.col_value table1_value,t2.col_value table2_value from (
select pkey,generate_subscripts(t,1) ordinal_position,unnest(t) col_value from (
select pkey,
(
replace(regexp_replace( -- null fields
'{'||substring(a::character varying,'^.(.*).$') ||'}' -- {} instead of ()
,'([\{,])([,\}])','\1null\2','g'),',,',',null,')
)::TEXT[] t
from public.mytable1 a
) a) t1
left join (
select pkey,generate_subscripts(t,1) ordinal_position,unnest(t) col_value from (
select pkey,
(
replace(regexp_replace( -- null fields
'{'||substring(a::character varying,'^.(.*).$') ||'}' -- {} instead of ()
,'([\{,])([,\}])','\1null\2','g'),',,',',null,')
)::TEXT[] t
from public.mytable2 a
) a) t2 using (pkey,ordinal_position)
join (select * from information_schema.columns where table_schema='public' and table_name='mytable1') c using (ordinal_position)
) final where COALESCE(table1_value,'')!=COALESCE(table2_value,'')