How do I simplify this complex query? - postgresql

The referred to table definitions are:
CREATE TABLE message
(
id BIGINT PRIMARY KEY,
user_id BIGINT NOT NULL,
guild_id BIGINT NOT NULL,
content TEXT NOT NULL,
created_at TIMESTAMP NOT NULL,
);
CREATE TABLE d_user
(
id BIGINT PRIMARY KEY
);
CREATE TABLE vcsession
(
id BIGINT PRIMARY KEY,
user_id BIGINT NOT NULL,
guild_id BIGINT NOT NULL,
duration INTEGER NOT NULL,
began_at TIMESTAMP NOT NULL,
last_active TIMESTAMP NOT NULL
);
The expected result set of this query should consist of a row for each user in the guild provided with columns for:
user_id: The user id
message_count: The number of messages sent by each user within an interval defined by two datetimes (this should be 0 if no messages were sent)
voice_time: Sum of each voice session's duration last active within an interval defined by two datetimes (this should be 0 if no voice sessions were active)
active_days: Days in which the user either sent a message or had an active voice session (this should be 0 if user wasn't active in the time interval provided)
This is the query I wrote:
select
activity.user_id,
message_count,
voice_time,
coalesce(active_days, 0) as active_days
from (
select
d_user.id as user_id,
coalesce(messages.count, 0) as message_count,
coalesce(vcsessions.duration, 0) as voice_time
from d_user left join (
select
user_id,
count(*) as "count"
from message where (
(guild_id = $1) and
(created_at >= $2) and
(created_at < $3)
) group by user_id
) as messages on messages.user_id = d_user.id left join (
select
user_id,
sum(duration) as "duration"
from vcsession where (
(guild_id = $1) and
(last_active >= $2) and
(last_active < $3)
) group by user_id
) as vcsessions on vcsessions.user_id = d_user.id
) as activity left join (
select user_id, count(*) as active_days from (
select * from (
select
user_id,
(cast(extract(EPOCH from message.created_at) as int) - cast(extract(EPOCH from $2) as int)) / 86400 as day_offset
from message where (
(created_at >= $2) and
(created_at < $3)
) group by user_id, day_offset
) as message_days union (
select
user_id,
(cast(extract(EPOCH from vcsession.last_active) as int) - cast(extract(EPOCH from $2) as int)) / 86400 as day_offset
from vcsession where (
(last_active >= $2) and
(last_active < $3)
) group by user_id, day_offset
)
) as active_days group by user_id
) as active_days on active_days.user_id = activity.user_id
And this is what the result set looks like:
|user_id |message_count |voice_time |active_days |
|--------------------|--------------------|--------------------|--------------------|
|1 |752 |694 |1 |
|2 |12 |543 |2 |
|3 |323 |7163 |4 |
|4 |56 |870 |3 |

It looks reasonably readable to me.
Maybe you could pull the two subselects in the forst FROM clause into the main query:
SELECT ...
FROM ((SELECT ...) AS messages
LEFT JOIN
(SELECT ...) AS vcsessions
) AS ...
LEFT JOIN ...
could become
SELECT ...
FROM (SELECT ...) AS messages
LEFT JOIN
(SELECT ...) AS vcsessions
LEFT JOIN ...

Related

Oracle SQL Listagg remove duplicates with case statement conditions

I am trying to show repeated column values with comma separated list by using listagg but getting error as "Not a single group by function". Hope I get some help.
Below is the DDL script with insert statements and data:
DROP TABLE dept CASCADE CONSTRAINTS;
DROP TABLE myrole CASCADE CONSTRAINTS;
DROP TABLE person CASCADE CONSTRAINTS;
DROP TABLE person_role CASCADE CONSTRAINTS;
CREATE TABLE dept (
id INTEGER NOT NULL,
dept VARCHAR2(50 CHAR)
);
INSERT INTO dept (
id,
dept
) VALUES (
1,
'Operations'
);
INSERT INTO dept (
id,
dept
) VALUES (
2,
'Research'
);
INSERT INTO dept (
id,
dept
) VALUES (
3,
'Accounts'
);
INSERT INTO dept (
id,
dept
) VALUES (
4,
'Sales'
);
ALTER TABLE dept ADD CONSTRAINT dept_pk PRIMARY KEY ( id );
CREATE TABLE myrole (
id INTEGER NOT NULL,
role VARCHAR2(50 CHAR)
);
INSERT INTO myrole (
id,
role
) VALUES (
1,
'JJJ'
);
INSERT INTO myrole (
id,
role
) VALUES (
2,
'Auth'
);
INSERT INTO myrole (
id,
role
) VALUES (
3,
'AAA'
);
INSERT INTO myrole (
id,
role
) VALUES (
4,
'MMM'
);
INSERT INTO myrole (
id,
role
) VALUES (
5,
'KKK'
);
INSERT INTO myrole (
id,
role
) VALUES (
6,
'BBB'
);
ALTER TABLE myrole ADD CONSTRAINT myrole_pk PRIMARY KEY ( id );
CREATE TABLE person (
id INTEGER NOT NULL,
person VARCHAR2(50 CHAR)
);
INSERT INTO person (
id,
person
) VALUES (
1,
'John'
);
INSERT INTO person (
id,
person
) VALUES (
2,
'Scott'
);
INSERT INTO person (
id,
person
) VALUES (
3,
'Ruth'
);
INSERT INTO person (
id,
person
) VALUES (
4,
'Smith'
);
INSERT INTO person (
id,
person
) VALUES (
5,
'Frank'
);
INSERT INTO person (
id,
person
) VALUES (
6,
'Martin'
);
INSERT INTO person (
id,
person
) VALUES (
7,
'Blake'
);
ALTER TABLE person ADD CONSTRAINT person_pk PRIMARY KEY ( id );
CREATE TABLE person_role (
id INTEGER NOT NULL,
person_id INTEGER NOT NULL,
role_id INTEGER NOT NULL,
dept_id INTEGER
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
1,
1,
1,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
2,
2,
2,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
3,
2,
4,
1
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
4,
2,
4,
2
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
5,
3,
1,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
6,
3,
5,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
7,
4,
3,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
8,
5,
6,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
9,
6,
6,
3
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
10,
6,
6,
2
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
11,
6,
2,
NULL
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
12,
7,
6,
4
);
INSERT INTO person_role (
id,
person_id,
role_id,
dept_id
) VALUES (
13,
7,
6,
4
);
ALTER TABLE person_role ADD CONSTRAINT person_role_pk PRIMARY KEY ( id );
ALTER TABLE person_role
ADD CONSTRAINT person_role_myrole_fk FOREIGN KEY ( myrole_id )
REFERENCES myrole ( id );
ALTER TABLE person_role
ADD CONSTRAINT person_role_person_fk FOREIGN KEY ( person_id )
REFERENCES person ( id );
CREATE SEQUENCE dept_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER dept_tr BEFORE
INSERT ON dept
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := dept_seq.nextval;
END;
/
CREATE SEQUENCE myrole_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER myrole_tr BEFORE
INSERT ON myrole
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := myrole_seq.nextval;
END;
/
CREATE SEQUENCE person_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER person_tr BEFORE
INSERT ON person
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := person_seq.nextval;
END;
/
CREATE SEQUENCE person_role_seq START WITH 1 NOCACHE;
CREATE OR REPLACE TRIGGER person_role_tr BEFORE
INSERT ON person_role
FOR EACH ROW
WHEN ( new.id IS NULL )
BEGIN
:new.id := person_role_seq.nextval;
END;
/
By using below query that #Koen Lostrie provided and by adding columns I need, I get output as shown:
SELECT p.person, r.role as myrole, d.dept,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END as myaccess
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
JOIN (
SELECT p.id, MIN(CASE WHEN r.ROLE = 'Auth' THEN 0 WHEN r.ROLE in ('AAA','BBB') THEN 1 ELSE 2 END) as role_type
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
GROUP BY p.id
) rl ON rl.id = pr.person_id
left join dept d on d.id = pr.dept_id
Output from query:
+--------+--------+------------+----------+
| PERSON | MYROLE | DEPT | MYACCESS |
+--------+--------+------------+----------+
| John | JJJ | | |
| Scott | Auth | | Remove |
| Scott | MMM | Operations | |
| Scott | MMM | Research | |
| Ruth | JJJ | | |
| Ruth | KKK | | |
| Smith | AAA | | Add |
| Frank | BBB | | Add |
| Martin | AAA | Accounts | |
| Martin | AAA | Research | |
| Martin | Auth | | Remove |
| Blake | BBB | Sales | |
| Blake | BBB | Sales | Add |
+--------+--------+------------+----------+
Now I want to show DEPT column values comma separated based on PERSON and MYROLE columns and the output expected is shown below:
+--------+--------+---------------------+----------+
| PERSON | MYROLE | DEPT | MYACCESS |
+--------+--------+---------------------+----------+
| John | JJJ | | |
| Scott | Auth | | Remove |
| Scott | MMM | Operations,Research | |
| Ruth | JJJ | | |
| Ruth | KKK | | |
| Smith | AAA | | Add |
| Frank | BBB | | Add |
| Martin | AAA | Accounts,Research | |
| Martin | Auth | | Remove |
| Blake | BBB | Sales | Add |
+--------+--------+---------------------+----------+
I added listagg to existing query but getting error
SELECT p.person, r.role as myrole,
listagg(d.dept, ', ') within group (order by d.dept) as dept,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END as myaccess
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
JOIN (
SELECT p.id, MIN(CASE WHEN r.ROLE = 'Auth' THEN 0 WHEN r.ROLE in ('AAA','BBB') THEN 1 ELSE 2 END) as role_type
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
GROUP BY p.id
) rl ON rl.id = pr.person_id
left join dept d on d.id = pr.dept_id
getting not a single group by error. Not sure how to fix. Appreciate any help.
Thanks,
Richa
LISTAGG is an aggregate function. If you apply it to a column, then you need to specify in the query what columns you're grouping by. Typically that is all the columns that don't have an aggregate function.
I didn't test since there is no sample data for the dept table nor the person_roles table but this is probably the issue
SELECT p.person, r.role as myrole, listagg(d.dept, ', ') within group (order by d.dept) as dept_list,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END as myaccess
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
JOIN (
SELECT p.id, MIN(CASE WHEN r.ROLE = 'Auth' THEN 0 WHEN r.ROLE in ('AAA','BBB') THEN 1 ELSE 2 END) as role_type
FROM person_role pr
JOIN person p ON p.id = pr.person_id
JOIN myrole r ON r.id = pr.role_id
GROUP BY p.id
) rl ON rl.id = pr.person_id
left join dept d on d.id = pr.dept_id
GROUP BY
p.person,
r.role,
CASE
WHEN rl.role_type = 1 AND r.role IN ('AAA','BBB') THEN 'Add'
WHEN rl.role_type = 0 AND r.role = 'Auth' THEN 'Remove'
END
ORDER BY p.person

Join on multiple tables using distinct on

create table emp
(
emp_id serial primary key,
emp_no integer,
emp_ref_no character varying(15),
emp_class character varying(15)
);
create table emp_detail
(
emp_detail_id serial primary key,
emp_id integer,
class_no integer,
created_at timestamp without time zone,
constraint con_fk foreign key(emp_id) references emp(emp_id)
);
create table class_detail
(
class_id serial primary key,
emp_id integer,
class_no integer,
col1 JSONB,
created_at timestamp without time zone default now(),
constraint cd_fk foreign key(emp_id) references emp(emp_id)
);
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2QcW', 'abc' );
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2FQx', 'abc');
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2yz', 'abc');
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 2, '2018-05-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 1, '2018-04-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 1, '2018-05-10 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 2, '2018-02-01 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 3, 2, '2018-02-01 11:00:00'
);
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik"}', '2018-02-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik Anderson"}', '2018-03-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"James Anderson TST"}', '2018-03-15 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"Tim Paine ST"}', '2018-04-01 10:00:00');
I want to display corresponding emp_id, emp_no, emp_ref_no, class_no(the latest one from emp_detail table based on created at)along with all the columns of class_detail table. Class_detail table should show the latest corresponding record of the class no
The expected output which I would like to see is something like below :-
emp id | emp_no | emp_ref_no | class_no | class_id | class.col1 | class.created_at | class.created_by
1 | 548251 | 2QcW | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NUlL
2 | 548251 | 2FQx | 1 | 2 |{"Name":"Nik Anderson"}|2018-03-01 10:00:00| NULL
3 | 548251 | 2yz | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NULL
As I stated in the comments: It is exactly the same thing as in Inner join using distinct on. You simply have to add another join and another ORDER BY group (cd.created_at DESC)
demo:db<>fiddle
SELECT DISTINCT ON (ed.emp_id)
e.emp_id, e.emp_no, e.emp_ref_no, ed.class_no, cd.*
FROM
emp_detail ed
JOIN emp e ON e.emp_id = ed.emp_id
JOIN class_detail cd ON ed.class_no = cd.class_no
ORDER BY ed.emp_id, ed.created_at DESC, cd.created_at DESC
Note: I am not sure what the emp_id column in class_detail is for. It seems not well designed (this is also because it is always 1 in your example.) You should check whether you really need it.

Creating a Void Function in PostgreSQL

I am getting an error on this create function code in Postgresql. The error says it is happening around Line 2 at DELETE, but it happens at WITH if I remove that line so I think it is a problem with the format of my Creat Function
create or replace function retention_data(shopId integer) returns void as $$
delete from retention where shop_id = shopId;
WITH ret_grid_step1 as (
select * from (
SELECT
order_id as order_name,
cust_name as cust_name,
email as email,
date(order_date) as created_at,
count(*) as num_items_in_order,
sum(total_price) as sales ,
rank() over (partition BY order_id ORDER BY cust_name ASC) as rnk_shipping_name,
rank() over (partition BY order_id ORDER BY email ASC) as rnk_email
FROM orders
WHERE shop_id = shopId
and order_date is not null and order_date > now()::date - 365 and order_date < now()::date + 1
group by 1,2,3,4
) x
where rnk_shipping_name = 1 and rnk_email = 1
)
insert into retention(shop_id, cust_name, email, last_purchase_dt, total_sales, num_orders, days_since_last_order)
select
shopId as shop_id,
coalesce(b.cust_name,'null') as cust_name,
a.email,
a.last_purchase_dt,
total_sales,
num_orders,
current_date - last_purchase_dt as days_since_last_order
from (
select
email,
max(created_at) as last_purchase_dt,
count(*) as num_orders,
sum(sales) as total_sales
from ret_grid_step1
group by 1
) as a
left join (
select
email,
cust_name,
rank() over (partition BY email ORDER BY created_at DESC) as rnk
from ret_grid_step1
--where cust_name is not null
group by 1,2,created_at
) as b
on a.email = b.email
where b.rnk = 1
and a.email <> '';
$$ language plpgsql;

Need row returned from upsert

I have a table that I need to upsert. If the row already exists then I want to update and return the row. If the row doesn't already exist then I need to insert and return the row. With the query I have below I get the row returned on insert, but not on update.
Table "main.message_account_seen"
Column | Type | Modifiers
----------------+--------------------------+-------------------------------------------------------------------
id | integer | not null default nextval('message_account_seen_id_seq'::regclass)
field_config_id | integer | not null
edit_stamp | timestamp with time zone | not null default now()
audit_stamp | timestamp with time zone |
message_id | integer | not null
account_id | integer |
Here's the sql.
with upsert as (
update message_account_seen set (message_id, account_id, field_config_id ) = (1, 60, 980)
where message_id = 1 and account_id = 60 and field_config_id = 980 returning *
)
insert into message_account_seen (message_id, account_id, field_config_id)
select 1, 60, 980
where not exists (select message_id, account_id, field_config_id from upsert) returning *;
I can't do a postgres function, it needs to be handled in a regular sql query. Also, there is no constraint on the table for uniqueness of row otherwise I would use on conflict. But I'm willing to scrap this query and go with something else if need be.
These are the results when I run the query, and then run it again. You can see that on the insert or first run I get the row returned. However on subsequent runs of the query I get 0 rows returned. I know that it's working because the edit_stamp increases in time. That's a good thing.
# with upsert as (
update message_account_seen set (message_id, account_id, field_config_id ) = (1, 60, 980)
where message_id = 1 and account_id = 60 and field_config_id = 980 returning *
)
insert into message_account_seen (message_id, account_id, field_config_id)
select 1, 60, 980
where not exists (select message_id, account_id, field_config_id from upsert) returning *;
id | field_config_id | edit_stamp | audit_stamp | message_id | account_id
--+-----------------+--------------------------------+-------------+------------+------------
38 | 980 | 09/27/2016 11:43:22.153908 MDT | | 1 | 60
(1 row)
INSERT 0 1
# with upsert as (
update message_account_seen set (message_id, account_id, field_config_id ) = (1, 60, 980)
where message_id = 1 and account_id = 60 and field_config_id = 980 returning *
)
insert into message_account_seen (message_id, account_id, field_config_id)
select 1, 60, 980
where not exists (select message_id, account_id, field_config_id from upsert) returning *;
id | field_config_id | edit_stamp | audit_stamp | message_id | account_id
----+-----------------+------------+-------------+------------+------------
(0 rows)
INSERT 0 0
When the update succeeds its result is not returned in your query. This does it:
with upsert as (
update message_account_seen
set (message_id, account_id, field_config_id ) = (1, 60, 980)
where (message_id, account_id, field_config_id) = (1, 60, 980)
returning *
), ins as (
insert into message_account_seen (message_id, account_id, field_config_id)
select 1, 60, 980
where not exists (select 1 from upsert)
returning *
)
select * from upsert
union all
select * from ins
;
The best option here is to use the new upsert that postgres 9.5 offers, but this requires a unique index on (message_id, account_id, field_config_id). It can be used like this:
INSERT INTO message_account_seen(message_id, account_id, field_config_id)
VALUES (1, 60, 980)
ON CONFLICT (message_id, account_id, field_config_id)
DO UPDATE
SET edit_stamp=now() -- adjust here
RETURNING *;
This is probably the fastest way to do this and guarantees that nothing unexpected will happen if two processes try to upsert into the same table at the same time (your approach doesn't guarantee that).

PostgreSQL grouping

I would like to group values according to values in over columns.
This is an example:
I would like to get the output:
{{-30,-50,20},{-20,30,60},{-30,NULL or other value, 20}}
I managed to arrive to:
SELECT array_agg("val")
FROM my_table
WHERE "t_id" = 1
GROUP BY "m_id";
{{-30,-50,20},{-20,30,60},{-30,20}}
What would be the best approach?
create table my_table (
t_id int,
m_id int,
s_id int,
val int
);
insert into my_table (t_id, m_id, s_id, val) values
(1,1,1,-30),
(1,1,2,-50),
(1,1,3,20),
(1,2,1,-20),
(1,2,2,30),
(1,2,3,60),
(1,3,1,-30),
(1,3,3,20);
select array_agg(val order by s_id)
from
my_table t
right join
(
(
select distinct t_id, m_id
from my_table
) a
cross join
(
select distinct s_id
from my_table
) b
) s using (t_id, m_id, s_id)
where t_id = 1
group by m_id
order by m_id
;
array_agg
---------------
{-30,-50,20}
{-20,30,60}
{-30,NULL,20}