db2 Recursive with min and max records - db2

We would need from this table :
drop table relations;
create table relations (OLD_GID varchar(60), NEW_GID varchar(60));
insert into relations values('GID5','GID4');
insert into relations values('GID4','GID3');
insert into relations values('GID2','GID1');
insert into relations values('GID3','GID2');
insert into relations values('GID10','GID11');
insert into relations values('GID20','GID21');
insert into relations values('GID30','GID32');
insert into relations values('GID31','GID32');
insert into relations values('GID40','GID42');
insert into relations values('GID41','GID42'); insert into relations values('GID42','GID43');
End with a table like this
OLD_GID NEW_GID
------------------------------------------------------------
GID5 GID1
GID4 GID1
GID3 GID1
GID2 GID1
GID40 GID43
GID41 GID43
GID42 GID43
GID10 GID11
GID20 GID21
GID30 GID32
GID31 GID32
meaning
if I have :
Gid1 father is Gid2
Gid2 father is Gid3
then the output would be
Gid1 with father of all that is Gid3
Gid2 with father of all that is Gid3
I started to write this query But I'm stuck
WITH RPL (OLD_GID, NEW_GID) AS
( SELECT ROOT.OLD_GID, ROOT.NEW_GID
FROM relations ROOT
WHERE ROOT.OLD_GID in ( select OLD_GID from relations where OLD_GID not in ( select NEW_GID from relations ))
UNION ALL
SELECT CHILD.OLD_GID, CHILD.NEW_GID
FROM RPL PARENT, relations CHILD
WHERE PARENT.NEW_GID = CHILD.OLD_GID
)
select -- row_number() over(order by 1) as genkeycol , OLD_GID, NEW_GID
OLD_GID, NEW_GID
from RPL
Thank You -

Try this:
WITH T (OLD_GID, NEW_GID) AS
(
SELECT *
FROM RELATIONS P
WHERE NOT EXISTS (SELECT 1 FROM RELATIONS C WHERE C.OLD_GID = P.NEW_GID)
UNION ALL
SELECT R.OLD_GID, T.NEW_GID
FROM RELATIONS R, T
WHERE R.NEW_GID = T.OLD_GID
)
SELECT *
FROM T
ORDER BY NEW_GID, OLD_GID
The result is:
OLD_GID
NEW_GID
GID2
GID1
GID3
GID1
GID4
GID1
GID5
GID1
GID10
GID11
GID20
GID21
GID30
GID32
GID31
GID32
GID40
GID43
GID41
GID43
GID42
GID43

Related

Postgresql Select rows where column::text = array::text[]

This is similar to Postgresql Select rows where column = array question
create table students (id int, name text);
insert into students values
(1,'AA'),
(2,'BB'),
(3,'CC'),
(4,'DD');
create table classes (name text,students text[]);
insert into classes values
('CL-1','{2,4}'),
('YL-2','{2,1,4}'),
('CL-3','{2,3}'),
('BL-33','{2}'),
('CL-5','{1,3,4}'),
('CL-6','{4}');
How can I get the names of the students in each class?
select cl.name,
(select st.names
from student st
where st.id in cl.student) as student_names -- exp: AA,BB,CC
from class cl;
You can join the tables and re-aggregate the names that correspond the the ID's in your array:
select c.name as class_name,
string_agg(s.name,',') as student_names
from classes c
inner join students s
on s.id::text=any(students)
group by c.name;
-- class_name | student_names
--------------+---------------
-- CL-5 | AA,CC,DD
-- YL-2 | AA,BB,DD
-- CL-6 | DD
-- BL-33 | BB
-- CL-1 | BB,DD
-- CL-3 | BB,CC
If you don't want to group by a ton of columns in classes, you can initially retrieve these lists in a CTE, then join that to classes:
with student_name_lists as
( select c.name as class_name,
string_agg(s.name,',') as student_names
from classes c join students s
on s.id::text = any(students)
group by c.name )
select c.*,
sn.student_names
from classes c join student_name_lists sn
on c.name=sn.class_name;
online demo

How to find in a many to many relation all the identical values in a column and join the table with other three tables?

I have a many to many relation with three columns, (owner_id,property_id,ownership_perc) and for this table applies (many owners have many properties).
So I would like to find all the owner_id who has many properties (property_id) and connect them with other three tables (Table 1,3,4) in order to get further information for the requested result.
All the tables that I'm using are
Table 1: owner (id_owner,name)
Table 2: owner_property (owner_id,property_id,ownership_perc)
Table 3: property(id_property,building_id)
Table 4: building(id_building,address,region)
So, when I'm trying it like this, the query runs but it returns empty.
SELECT address,region,name
FROM owner_property
JOIN property ON owner_property.property_id = property.id_property
JOIN owner ON owner.id_owner = owner_property.owner_id
JOIN building ON property.building_id=building.id_building
GROUP BY owner_id,address,region,name
HAVING count(owner_id) > 1
ORDER BY owner_id;
Only when I'm trying the code below, it returns the owner_id who has many properties (see image below) but without joining it with the other three tables:
SELECT a.*
FROM owner_property a
JOIN (SELECT owner_id, COUNT(owner_id)
FROM owner_property
GROUP BY owner_id
HAVING COUNT(owner_id)>1) b
ON a.owner_id = b.owner_id
ORDER BY a.owner_id,property_id ASC;
So, is there any suggestion on what I'm doing wrong when I'm joining the tables? Thank you!
This query:
SELECT owner_id
FROM owner_property
GROUP BY owner_id
HAVING COUNT(property_id) > 1
returns all the owner_ids with more than 1 property_ids.
If there is a case of duplicates in the combination of owner_id and property_id then instead of COUNT(property_id) use COUNT(DISTINCT property_id) in the HAVING clause.
So join it to the other tables:
SELECT b.address, b.region, o.name
FROM (
SELECT owner_id
FROM owner_property
GROUP BY owner_id
HAVING COUNT(property_id) > 1
) t
INNER JOIN owner_property op ON op.owner_id = t.owner_id
INNER JOIN property p ON op.property_id = p.id_property
INNER JOIN owner o ON o.id_owner = op.owner_id
INNER JOIN building b ON p.building_id = b.id_building
ORDER BY op.owner_id, op.property_id ASC;
Always qualify the column names with the table name/alias.
You can try to use a correlated subquery that counts the ownerships with EXISTS in the WHERE clause.
SELECT b1.address,
b1.region,
o1.name
FROM owner_property op1
INNER JOIN owner o1
ON o1.id_owner = op1.owner_id
INNER JOIN property p1
ON p1.id_property = op1.property_id
INNER JOIN building b1
ON b1.id_building = p1.building_id
WHERE EXISTS (SELECT ''
FROM owner_property op2
WHERE op2.owner_id = op1.owner_id
HAVING count(*) > 1);

Joining two one-to-many tables duplicates records

I have 3 tables, Transaction, Transaction_Items and Transaction_History.
Where the Transaction is the parent table, while Transaction_Items and Transaction_History are the children tables, with one to many relationship.
When i try to join those tables together, if i have 2+ Transaction_History records, or 2+ Transaction_Items i get duplicated or triplicated record results.
This is the SQL query im currently using which works, but what worries me that in the future if i have to Join another one-to-many table, it will duplicate the results again.
I found a workaround for this, but i was just wondering if there is a better and cleaner way to do this ?
The results should be a PostgreSQL JSON array which will contain the Transaction_Items and Transaction_History
SELECT
TR.id AS transaction_id,
TR.transaction_number,
TR.status,
TR.status AS status,
to_json(TR_INV.list),
COUNT(TR_INV) item_cnt,
COUNT(THR) tr_cnt,
json_agg(THR)
FROM transaction_transaction AS TR
LEFT JOIN (
SELECT
array_agg(t) list, -- this is a workaround method
t.transaction_id
FROM (
SELECT
TR_INV.transaction_id transaction_id,
IT.id,
IT.stock_number,
CAT.key category_key,
ITP.description description,
ITP.serial_number serial_number,
ITP.color color,
ITP.manufacturer manufacturer,
ITP.inventory_model inventory_model,
ITP.average_cost average_cost,
ITP.location_in_store location_in_store,
ITP.firearm_caliber firearm_caliber,
ITP.federal_firearm_number federal_firearm_number,
ITP.sold_price sold_price
FROM transaction_transaction_item TR_INV
LEFT JOIN inventory_item IT ON IT.id = TR_INV.item_id
LEFT JOIN inventory_itemprofile ITP ON ITP.id = IT.current_profile_id
LEFT JOIN inventory_category CAT ON CAT.id = ITP.category_id
LEFT JOIN inventory_categorytype CAT_T ON CAT_T.id = CAT.category_type_id
) t
GROUP BY t.transaction_id
) TR_INV ON TR_INV.transaction_id = TR.id
LEFT JOIN transaction_transactionhistory THR ON THR.transaction_id = TR.id
AND (THR.audit_code_id = 44 OR THR.audit_code_id = 27 OR THR.audit_code_id = 28)
WHERE TR.store_id = 21
AND TR.transaction_type = 'Pawn_Loan' AND TR.date_made >= '2018-10-08'
GROUP BY TR.id, TR_INV.list
What you want to do can be achieved by not using joins, as shown below.
Because your actual tables have so many columns that I don't know and should not care. I just created the simplest forms of them for demonstration.
CREATE TABLE transactions (
tid serial PRIMARY KEY,
name varchar(40) NOT NULL
);
CREATE TABLE transaction_histories (
hid serial PRIMARY KEY ,
tid integer REFERENCES transactions(tid),
history varchar(40) NOT NULL
);
CREATE TABLE transaction_items (
iid serial PRIMARY KEY ,
tid integer REFERENCES transactions(tid),
item varchar(40) NOT NULL
);
INSERT INTO transactions(tid,name) Values(1, 'transaction');
INSERT INTO transaction_histories(tid, history) Values(1, 'history1');
INSERT INTO transaction_histories(tid, history) Values(1, 'history2');
INSERT INTO transaction_items(tid, item) Values(1, 'item1');
INSERT INTO transaction_items(tid, item) Values(1, 'item2');
select
t.*,
(select count(*) from transaction_histories h where h.tid= t.tid) h_count ,
(select json_agg(h) from transaction_histories h where h.tid= t.tid) h ,
(select count(*) from transaction_items i where i.tid= t.tid) i_count ,
(select json_agg(i) from transaction_items i where i.tid= t.tid) i
from transactions t;

How can I SUM distinct records in a Postgres database where there are duplicate records?

Imagine a table that looks like this:
The SQL to get this data was just SELECT *
The first column is "row_id" the second is "id" - which is the order ID and the third is "total" - which is the revenue.
I'm not sure why there are duplicate rows in the database, but when I do a SUM(total), it's including the second entry in the database, even though the order ID is the same, which is causing my numbers to be larger than if I select distinct(id), total - export to excel and then sum the values manually.
So my question is - how can I SUM on just the distinct order IDs so that I get the same revenue as if I exported to excel every distinct order ID row?
Thanks in advance!
Easy - just divide by the count:
select id, sum(total) / count(id)
from orders
group by id
See live demo.
Also handles any level of duplication, eg triplicates etc.
You can try something like this (with your example):
Table
create table test (
row_id int,
id int,
total decimal(15,2)
);
insert into test values
(6395, 1509, 112), (22986, 1509, 112),
(1393, 3284, 40.37), (24360, 3284, 40.37);
Query
with distinct_records as (
select distinct id, total from test
)
select a.id, b.actual_total, array_agg(a.row_id) as row_ids
from test a
inner join (select id, sum(total) as actual_total from distinct_records group by id) b
on a.id = b.id
group by a.id, b.actual_total
Result
| id | actual_total | row_ids |
|------|--------------|------------|
| 1509 | 112 | 6395,22986 |
| 3284 | 40.37 | 1393,24360 |
Explanation
We do not know what the reasons is for orders and totals to appear more than one time with different row_id. So using a common table expression (CTE) using the with ... phrase, we get the distinct id and total.
Under the CTE, we use this distinct data to do totaling. We join ID in the original table with the aggregation over distinct values. Then we comma-separate row_ids so that the information looks cleaner.
SQLFiddle example
http://sqlfiddle.com/#!15/72639/3
Create custom aggregate:
CREATE OR REPLACE FUNCTION sum_func (
double precision, pg_catalog.anyelement, double precision
)
RETURNS double precision AS
$body$
SELECT case when $3 is not null then COALESCE($1, 0) + $3 else $1 end
$body$
LANGUAGE 'sql';
CREATE AGGREGATE dist_sum (
pg_catalog."any",
double precision)
(
SFUNC = sum_func,
STYPE = float8
);
And then calc distinct sum like:
select dist_sum(distinct id, total)
from orders
SQLFiddle
You can use DISTINCT in your aggregate functions:
SELECT id, SUM(DISTINCT total) FROM orders GROUP BY id
Documentation here: https://www.postgresql.org/docs/9.6/static/sql-expressions.html#SYNTAX-AGGREGATES
If we can trust that the total for 1 order is actually 1 row. We could eliminate the duplicates in a sub-query by selecting the the MAX of the PK id column. An example:
CREATE TABLE test2 (id int, order_id int, total int);
insert into test2 values (1,1,50);
insert into test2 values (2,1,50);
insert into test2 values (5,1,50);
insert into test2 values (3,2,100);
insert into test2 values (4,2,100);
select order_id, sum(total)
from test2 t
join (
select max(id) as id
from test2
group by order_id) as sq
on t.id = sq.id
group by order_id
sql fiddle
In difficult cases:
select
id,
(
SELECT SUM(value::int4)
FROM jsonb_each_text(jsonb_object_agg(row_id, total))
) as total
from orders
group by id
I would suggest just use a sub-Query:
SELECT "a"."id", SUM("a"."total")
FROM (SELECT DISTINCT ON ("id") * FROM "Database"."Schema"."Table") AS "a"
GROUP BY "a"."id"
The Above will give you the total of each id
Use below if you want the full total of each duplicate removed:
SELECT SUM("a"."total")
FROM (SELECT DISTINCT ON ("id") * FROM "Database"."Schema"."Table") AS "a"
Using subselect (http://sqlfiddle.com/#!7/cef1c/51):
select sum(total) from (
select distinct id, total
from orders
)
Using CTE (http://sqlfiddle.com/#!7/cef1c/53):
with distinct_records as (
select distinct id, total from orders
)
select sum(total) from distinct_records;

Order by objects relation (PostgreSQL)

Have 2 tables for example:
In 1st: object & parent columns
object | parent
-------+---------
object1| null
object2| object1
object3| null
2nd has: object & reference columns
object | reference
-------+---------
object1| null
object2| null
object3| object1
Need to query tables to order like following: parent is first, then - child(s), objects which have reference(s) to parent.
object1
object2
object3
Is it possible to do in one SQL query or need to sort manually in an array? Seems it is a classical task, probably solution already exists somewhere?
Is this what you're looking for?
CREATE TABLE oparen (object varchar(10), parent varchar(10));
CREATE TABLE oref (object varchar(10), ref varchar(10));
INSERT INTO oparen VALUES
('object1',null),('object2','object1'),
('object3',null),('object4','object2');
INSERT INTO oref VALUES
('object1',null),('object2',null),('object3','object1'),
('object5','object6'),('object6','object1'),('object7','object4');
WITH hier AS (
SELECT parent AS obj, 1 AS rank FROM oparen
WHERE parent IS NOT NULL
UNION
SELECT object, 2 FROM oparen
WHERE parent IS NOT NULL
UNION
SELECT object, 3 FROM oref
WHERE ref IS NOT NULL),
allobj AS (
SELECT object AS obj FROM oparen
UNION
SELECT object FROM oref)
SELECT a.obj, coalesce(h.rank, 4) AS rank
FROM allobj a LEFT JOIN hier h ON a.obj = h.obj
ORDER BY coalesce(h.rank, 4), a.obj;
EDIT: After the improved example in the answer below, the following query should do the trick:
WITH parents AS (
SELECT parent AS obj, 1 AS rank FROM oparen
WHERE parent IS NOT NULL
),
family AS (
SELECT * FROM parents
UNION ALL
SELECT object, 2 FROM oparen op
WHERE parent IS NOT NULL
AND NOT EXISTS (SELECT obj FROM parents WHERE obj = op.object)
),
hier AS (
SELECT * FROM family
UNION ALL
SELECT object AS obj, coalesce(f.rank + 2, 5) AS rank
FROM oref LEFT JOIN family f ON oref.ref = f.obj
WHERE ref IS NOT NULL
),
allobj AS (
SELECT object AS obj FROM oparen
UNION
SELECT object FROM oref)
SELECT a.obj, h.rank AS rank
FROM allobj a LEFT JOIN hier h ON a.obj = h.obj
ORDER BY h.rank, a.obj;
Testbed creation in the top is updated according to the new requirements.
I inserted following data:
INSERT INTO oparen VALUES
('object1',null),('object2','object1'),('object3',null),('object4','object2');
INSERT INTO oref VALUES
('object1',null),('object2',null),('object3','object1'),('object5','object6'),('object6','object1');
Order is incorrect and object2 listed twice. DISTINCT on obj breaks the order also. Should go 6 then 5.
No, does not work: checked for another data and simplified to use and only by oref table content:
INSERT INTO oref VALUES
('object1',null),('object2',null),('object3','object1'),
('object5','object6'),('object6','object1'),('object7','object4'), ('object4','object5');
WITH family AS (
SELECT object AS obj, 1 AS rank FROM oref
WHERE ref IS NULL
),
hier AS (
SELECT * FROM family
UNION ALL
SELECT object AS obj, coalesce(f.rank + 2, 5) AS rank
FROM oref LEFT JOIN family f ON oref.ref = f.obj
WHERE ref IS NOT NULL
),
allobj AS (
SELECT object AS obj FROM oref)
SELECT a.obj, h.rank AS rank
FROM allobj a
LEFT JOIN hier h ON a.obj = h.obj
ORDER BY h.rank, a.obj;
Think need to use recursive queries here. Will write and post here.
Following recursive query works:
WITH RECURSIVE tables(object, rank) AS (
SELECT DISTINCT o.object, 1 AS rank FROM oref o
WHERE o.ref IS NULL
UNION
SELECT o.object, t.rank + 1 AS rank
FROM (SELECT DISTINCT o.object, o.ref FROM oref o
WHERE ref IS NOT NULL) o, tables t
WHERE o.ref = t.object AND rank <= t.rank
),
ordered AS (
SELECT * FROM tables
)
SELECT * FROM tables
WHERE tables.rank = (SELECT MAX(rank) FROM ordered WHERE ordered.object = tables.object)
ORDER BY rank;
Any comments, questions, objections, propositions? ;)