JOIN inside a SELECT MAX in Postgres - postgresql

I have the following query that returns these two elements.
SELECT ticket_holds.id, ticket_holds.created_at, orders.user_id, users.id, charges.payment_method
FROM ticket_holds
LEFT JOIN order_items ON order_items.id = ticket_holds.order_item_id
LEFT JOIN orders ON orders.id = order_items.order_id
LEFT JOIN users ON users.id = orders.user_id
LEFT JOIN charges on charges.order_id = orders.id;
This result is okay.
id | created_at | user_id | id | payment_method
----+----------------------------+---------+----+----------------
1 | 2017-05-26 04:29:22.189628 | 4 | 4 |
2 | 2017-05-26 04:33:00.721404 | 4 | 4 | some_payment_method
When I try to filter the query to display the last record created per user it also works okay using SELECT MAX.
SELECT ticket_holds.id, ticket_holds.created_at, orders.user_id, users.id, charges.payment_method
FROM ticket_holds
LEFT JOIN order_items ON order_items.id = ticket_holds.order_item_id
LEFT JOIN orders ON orders.id = order_items.order_id
LEFT JOIN users ON users.id = orders.user_id
LEFT JOIN charges on charges.order_id = orders.id
WHERE ticket_holds.created_at = (SELECT MAX(ticket_holds.created_at) FROM ticket_holds WHERE orders.user_id = users.id);
And the result
id | created_at | user_id | id | payment_method
----+----------------------------+---------+----+----------------
2 | 2017-05-26 04:33:00.721404 | 4 | 4 | some_payment_method
(1 row)
However, when Im trying to filter to only select items without a payment_method I get no results.
SELECT ticket_holds.id, ticket_holds.created_at, orders.user_id, users.id, charges.payment_method
FROM ticket_holds
LEFT JOIN order_items ON order_items.id = ticket_holds.order_item_id
LEFT JOIN orders ON orders.id = order_items.order_id
LEFT JOIN users ON users.id = orders.user_id
LEFT JOIN charges on charges.order_id = orders.id
WHERE ticket_holds.created_at = (SELECT MAX(ticket_holds.created_at) FROM ticket_holds WHERE orders.user_id = users.id AND charges.payment_method IS NULL);
(Notice the AND charges.payment_method IS NULL) and the end of the query)
This is what I get instead
id | created_at | user_id | id | payment_method
----+------------+---------+----+----------------
(0 rows)
Any idea how can I retrieve the elements with no payment method, in this case would be the record with id 1?

As per discussion on chat. i think below query will help you to get required result.
with data as ( SELECT ticket_holds.id, ticket_holds.created_at, orders.user_id, users.id, charges.payment_method
FROM ticket_holds
LEFT JOIN order_items ON order_items.id = ticket_holds.order_item_id
LEFT JOIN orders ON orders.id = order_items.order_id
LEFT JOIN users ON users.id = orders.user_id
LEFT JOIN charges on charges.order_id = orders.id
where charges.payment_method is null
)
select * from data d1
where d1.created_at = (select max(created_at) from data d2
where d1.users_id = d2.user_id
)
There are few points while writing an sub query in where clause. Join your table carefully as in your case you are not utilizing any column for joining.

Related

Postgres Query Optimization without adding an extra index

I was trying to optimize this query differently, but before that, Can we make any slight change in this query to reduce the time without adding any index?
Postgres version: 13.5
Query:
SELECT
orders.id as order_id,
orders.*, u1.name as user_name,
u2.name as driver_name,
u3.name as payment_by_name, referrals.name as ref_name,
array_to_string(array_agg(orders_payments.payment_type_name), ',') as payment_type_name,
array_to_string(array_agg(orders_payments.amount), ',') as payment_type_amount,
array_to_string(array_agg(orders_payments.reference_code), ',') as reference_code,
array_to_string(array_agg(orders_payments.tips), ',') as tips,
array_to_string(array_agg(locations.name), ',') as location_name,
(select
SUM(order_items.tax) as tax from order_items
where order_items.order_id = orders.id and order_items.deleted = 'f'
) as tax,
(select
SUM(orders_surcharges.surcharge_tax) as surcharge_tax from orders_surcharges
where orders_surcharges.order_id = orders.id
)
FROM "orders"
LEFT JOIN
users as u1 ON u1.id = orders.user_id
LEFT JOIN
users as u2 ON u2.id = orders.driver_id
LEFT JOIN
users as u3 ON u3.id = orders.payment_received_by
LEFT JOIN
referrals ON referrals.id = orders.referral_id
INNER JOIN
locations ON locations.id = orders.location_id
LEFT JOIN
orders_payments ON orders_payments.order_id = orders.id
WHERE
(orders.company_id = '626')
AND
(orders.created_at BETWEEN '2021-04-23 20:00:00' AND '2021-07-24 20:00:00')
AND
orders.order_status_id NOT IN (10, 5, 50)
GROUP BY
orders.id, u1.name, u2.name, u3.name, referrals.name
ORDER BY
created_at ASC LIMIT 300 OFFSET 0
Current Index:
"orders_pkey" PRIMARY KEY, btree (id)
"idx_orders_company_and_location" btree (company_id, location_id)
"idx_orders_created_at" btree (created_at)
"idx_orders_customer_id" btree (customer_id)
"idx_orders_location_id" btree (location_id)
"idx_orders_order_status_id" btree (order_status_id)
Execution Plan
Seems this takes more time on the parallel heap scan.
You're looking for 300 orders and try to get some additional information about these records. I would see if I could first get these 300 records, instead of getting all the data and then limit it to 300. Something like this:
WITH orders_300 AS (
SELECT * -- just get the columns that you really need, never use * in production
FROM orders
INNER JOIN locations ON locations.id = orders.location_id
WHERE orders.company_id = '626'
AND orders.created_at BETWEEN '2021-04-23 20:00:00' AND '2021-07-24 20:00:00'
AND orders.order_status_id NOT IN (10, 5, 50)
ORDER BY
created_at ASC LIMIT 300 -- LIMIT
OFFSET 0
)
SELECT
orders.id as order_id,
orders.*, -- just get the columns that you really need, never use * in production
u1.name as user_name,
u2.name as driver_name,
u3.name as payment_by_name, referrals.name as ref_name,
array_to_string(array_agg(orders_payments.payment_type_name), ',') as payment_type_name,
array_to_string(array_agg(orders_payments.amount), ',') as payment_type_amount,
array_to_string(array_agg(orders_payments.reference_code), ',') as reference_code,
array_to_string(array_agg(orders_payments.tips), ',') as tips,
array_to_string(array_agg(locations.name), ',') as location_name,
(SELECT SUM(order_items.tax) as tax
FROM order_items
WHERE order_items.order_id = orders.id
AND order_items.deleted = 'f'
) as tax,
( SELECT SUM(orders_surcharges.surcharge_tax) as surcharge_tax
FROM orders_surcharges
WHERE orders_surcharges.order_id = orders.id
)
FROM "orders_300" AS orders
LEFT JOIN users as u1 ON u1.id = orders.user_id
LEFT JOIN users as u2 ON u2.id = orders.driver_id
LEFT JOIN users as u3 ON u3.id = orders.payment_received_by
LEFT JOIN referrals ON referrals.id = orders.referral_id
LEFT JOIN orders_payments ON orders_payments.order_id = orders.id
GROUP BY
orders.id, u1.name, u2.name, u3.name, referrals.name
ORDER BY
created_at;
This will at least have a huge impact on the slowest part of your query, all these index scans on orders_payments. Every single scan is fast, but the query is doing 165000 of them... Limit this to just 300 and will be much faster.
Another issue is that none of your indexes covers the entire WHERE condition on the table "orders". But if you can't create a new index, you're out of luck.

Postgresql select based on another select with order, limit and agregate

I have a tables like:
books:
id | title | rating_avr | visible
1 | 'Overlord' | 5 | true
2 | 'Avengers' | 10 | false
tags_books:
tag_id | book_id | likes
1 | 1 | 5
2 | 1 | 25
1 | 2 | 11
tags:
id | name
1 | 'Adventure'
2 | 'Drama'
Now i need to load books that have tag 'Drama' with LIMIT, ORDER and agregate tags for each book.
I managed to achive this using query:
SELECT b.id, b.title, b.rating_avr, json_agg(json_build_object('id', tb2.tag_id)) as tags
FROM books b
LEFT JOIN tags_books tb ON tb.book_id = b.id
LEFT JOIN tags_books tb2 ON tb2.book_id = b.id
WHERE tb.tag_id = 1 AND b.visible=true
GROUP BY b.id ORDER BY b.rating_avr DESC LIMIT 5
What i'm curious about:
1) Is it ok to join same table 2 times? First is for where clause and second to agregate tags.
2) How can i order agregated tags based on likes?
3) Is it a right approach, or maybe there is better way to do it?
It is strange that in your query, you don't use the table tags, although you want to fetch books with tag 'Drama' which is a column in the table tags.
What I would do is first get the ids of all the books with tag 'Drama' with a query like this:
SELECT b.id FROM books b
INNER JOIN tags_books tb ON tb.book_id = b.id
INNER JOIN tags t ON t.id = tb.tag_id
WHERE t.name = 'Drama' AND b.visible=true
and then use it to get the result:
SELECT
b.id, b.title, b.rating_avr,
json_agg(json_build_object('id', tb.tag_id) order by tb.likes desc) as tags
FROM books b INNER JOIN tags_books tb
ON tb.book_id = b.id
WHERE b.id IN (
SELECT b.id FROM books b
INNER JOIN tags_books tb ON tb.book_id = b.id
INNER JOIN tags t ON t.id = tb.tag_id
WHERE t.name = 'Drama' AND b.visible=true
)
GROUP BY b.id, b.title, b.rating_avr
ORDER BY b.rating_avr DESC LIMIT 5
See the demo.
Results:
> id | title | rating_avr | tags
> -: | :------- | ---------: | :-----------------------
> 1 | Overlord | 5 | [{"id" : 2}, {"id" : 1}]

Cascading sum hierarchy using recursive cte

I'm trying to perform recursive cte with postgres but I can't wrap my head around it. In terms of performance issue there are only 50 items in TABLE 1 so this shouldn't be an issue.
TABLE 1 (expense):
id | parent_id | name
------------------------------
1 | null | A
2 | null | B
3 | 1 | C
4 | 1 | D
TABLE 2 (expense_amount):
ref_id | amount
-------------------------------
3 | 500
4 | 200
Expected Result:
id, name, amount
-------------------------------
1 | A | 700
2 | B | 0
3 | C | 500
4 | D | 200
Query
WITH RECURSIVE cte AS (
SELECT
expenses.id,
name,
parent_id,
expense_amount.total
FROM expenses
WHERE expenses.parent_id IS NULL
LEFT JOIN expense_amount ON expense_amount.expense_id = expenses.id
UNION ALL
SELECT
expenses.id,
expenses.name,
expenses.parent_id,
expense_amount.total
FROM cte
JOIN expenses ON expenses.parent_id = cte.id
LEFT JOIN expense_amount ON expense_amount.expense_id = expenses.id
)
SELECT
id,
SUM(amount)
FROM cte
GROUP BY 1
ORDER BY 1
Results
id | sum
--------------------
1 | null
2 | null
3 | 500
4 | 200
You can do a conditional sum() for only the root row:
with recursive tree as (
select id, parent_id, name, id as root_id
from expense
where parent_id is null
union all
select c.id, c.parent_id, c.name, p.root_id
from expense c
join tree p on c.parent_id = p.id
)
select e.id,
e.name,
e.root_id,
case
when e.id = e.root_id then sum(ea.amount) over (partition by root_id)
else amount
end as amount
from tree e
left join expense_amount ea on e.id = ea.ref_id
order by id;
I prefer doing the recursive part first, then join the related tables to the result of the recursive query, but you could do the join to the expense_amount also inside the CTE.
Online example: http://rextester.com/TGQUX53703
However, the above only aggregates on the top-level parent, not for any intermediate non-leaf rows.
If you want to see intermediate aggregates as well, this gets a bit more complicated (and is probably not very scalable for large results, but you said your tables aren't that big)
with recursive tree as (
select id, parent_id, name, 1 as level, concat('/', id) as path, null::numeric as amount
from expense
where parent_id is null
union all
select c.id, c.parent_id, c.name, p.level + 1, concat(p.path, '/', c.id), ea.amount
from expense c
join tree p on c.parent_id = p.id
left join expense_amount ea on ea.ref_id = c.id
)
select e.id,
lpad(' ', (e.level - 1) * 2, ' ')||e.name as name,
e.amount as element_amount,
(select sum(amount)
from tree t
where t.path like e.path||'%') as sub_tree_amount,
e.path
from tree e
order by path;
Online example: http://rextester.com/MCE96740
The query builds up a path of all IDs belonging to a (sub)tree and then uses a scalar sub-select to get all child rows belonging to a node. That sub-select is what will make this quite slow as soon as the result of the recursive query can't be kept in memory.
I used the level column to create a "visual" display of the tree structure - this helps me debugging the statement and understanding the result better. If you need the real name of an element in your program you would obviously only use e.name instead of pre-pending it with blanks.
I could not get your query to work for some reason. Here's my attempt that works for the particular table you provided (parent-child, no grandchild) without recursion. SQL Fiddle
--- step 1: get parent-child data together
with parent_child as(
select t.*, amount
from
(select e.id, f.name as name,
coalesce(f.name, e.name) as pname
from expense e
left join expense f
on e.parent_id = f.id) t
left join expense_amount ea
on ea.ref_id = t.id
)
--- final step is to group by id, name
select id, pname, sum(amount)
from
(-- step 2: group by parent name and find corresponding amount
-- returns A, B
select e.id, t.pname, t.amount
from expense e
join (select pname, sum(amount) as amount
from parent_child
group by 1) t
on t.pname = e.name
-- step 3: to get C, D we union and get corresponding columns
-- results in all rows and corresponding value
union
select id, name, amount
from expense e
left join expense_amount ea
on e.id = ea.ref_id
) t
group by 1, 2
order by 1;

Query table with multiple joined values

I've created a query that joins six tables:
SELECT a.accession, b.value, c.name, d.description, e.value, f.seqlen, f.residues
FROM chado.dbxref a inner join chado.dbxrefprop b on a.dbxref_id = b.dbxref_id
inner join chado.biomaterial d on b.dbxref_id = d.dbxref_id
inner join chado.feature f on d.dbxref_id = f.dbxref_id
inner join chado.biomaterialprop e on d.biomaterial_id = e.biomaterial_id
inner join chado.contact c on d.biosourceprovider_id = c.contact_id;
The output:
I'm currently working with a PostgreSQL schema called Chado (http://gmod.org/wiki/Chado_Tables). My attempts to comply with the preexisting schema have led me to deposit multiple joined values within the same table (two different values within the dbxrefprop table, three different values within the biomaterialprop table). Querying the database results in a substantial amount of redundant output. Is there a way for me to reduce output redundancy by modifying my query statement? Ideally, I'd like the output to resemble the following:
test001 | GB0101 | source011 | Faaberg,K.; Lyoo,K.; Korol,D.M. | serum | T1 | Iowa, USA | 01 Jan 2005 | 1234 | AUGAACGCCUUGCAUUACUAUGACUAUGAUU
Working query statement:
SELECT a.accession, string_agg(distinct b.value, ' | ' ORDER BY b.value) AS bvalue_list, c.name, d.description, string_agg(distinct e.value, ' | ' ORDER BY e.value) AS evalue_list, f.seqlen, f.residues
FROM chado.dbxref a INNER JOIN chado.dbxrefprop b ON a.dbxref_id = b.dbxref_id
INNER JOIN chado.biomaterial d ON b.dbxref_id = d.dbxref_id
INNER JOIN chado.feature f ON d.dbxref_id = f.dbxref_id
INNER JOIN chado.biomaterialprop e ON d.biomaterial_id = e.biomaterial_id
INNER JOIN chado.contact c ON d.biosourceprovider_id = c.contact_id
GROUP BY a.accession, c.name, d.description, f.seqlen, f.residues;

How to perform != or <> in postgresql

i need to fetch the table1 content which is not present in the table2 i dont think which is possible by <> or != operator so can any one help me here
select t1._id, t1."categoryName", t1.company_id, t1.active from table1 t1
inner join table2 t2 on t1._id <> t2.category_id
inner join table3 t3 on t2 .department_id <> t3 ._id where t3._id = 1
for example:
a collage can have 10 departments , students enrolled to 5 department which is present 1 collage
table1 is dept
table 3 is col
table 2 looks like this:
col_id| dept_id | student_id
1 | 1 | 1
1 | 2 | 2
1 | 3 | 3
1 | 4 | 4
1 | 5 | 5
i need to fetch rest of the dept_id which is not present in table 2
i need to fetch the table1 content which is not present in the table2
Use a not exists query, something like this:
select *
from table1 t1
where not exists (select *
from table2 t2
where t1._id = t2.category_id)
Note sure how table3 relates to the question "not present in table2"
try not in, or outer join, like here:
select t1._id, t1."categoryName", t1.company_id, t1.active
from table1 t1
left outer join table2 t2 on t1._id = t2.category_id
left outer join table3 t3 on t2 .department_id = t3 ._id
where t3._id = 1
and t2.category_id is null
and t3 ._id is null
yep i got a solution to my question from dbastackexchange this might help others so pasting here
select
cat._id,
cat.cat_name
from
cat
where
not exists(
select
*
from
deptcat
inner join dept on(
dept._id = deptcat.dept_id
)
where
deptcat.cat_id = cat._id
and dept._id = 1
)