How to optimize join and sub-query in pgSQL - postgresql

I've 2 tables in PostgreSQL, Schema ==> sts
product_price_log contains columns product_name (character varying), K (numeric), sale_date (date)
product_transaction contains columns product_name (character varying), price (numeric), sale_date (date)
I want to get json data for sale_date = '2018-01-15' and '2018-01-01'.
Product price should be from product_price_log table and sale data from product_transaction table for summarized report.
this my query:
(SELECT
json_agg(j.*)
from(
select
a.sale_date, a.product_name, sum(a.quantity) as quantity, sum( a.quantity * b.price) as total_sale
from(
select
sale_date, product_name, quantity
from
sts.product_transaction p
where
sale_date = '2018-01-15' or sale_date = '2018-01-01' and quantity > 0
)a
inner join (
select * from(
select product_name, price, sale_date from sts.product_price_log where sale_date <= '2018-01-15' order by sale_date desc limit 1
union all
select product_name, price, sale_date from sts.product_price_log where sale_date <= '2018-01-01' order by sale_date desc limit 1
)a order by sale_date
)r on (a.product_name = r.product_name and a.sale_date = r.sale_date)
group by
sale_date,
product_name
)j)
;
QUERY PLAN
Aggregate (cost=230444.51..230444.52 rows=1 width=28)
-> Subquery Scan on j (cost=230290.98..230444.50 rows=1 width=28)
-> GroupAggregate (cost=230290.98..230444.49 rows=1 width=20)
How to I reduce time cost or optimize this query?

Related

Optimise query: count latest win/lose streak for all teams

I'm not an expert in data-warehousing nor analytics, so I give birth to a monster-query that I'd like to optimise (if possible).
The problem is: I need to display the stagings table for a given tournament. This table should display team id, total score, team position (i.e. bucket for same-score teams), order and latest (!) win-lose strike.
I.e. for the following sequence (most recent first) WWLLLWLD I should get 2W
W = win, L = lose, D = draft
Schema
create table matches (
id integer primary key,
stage_id integer not null,
scheduled_at timestamp not null,
winner_id integer null,
status text not null default 'finished' -- just to give extra context
);
create table teams (
id integer primary key
);
create table match_teams (
match_id integer,
team_id integer,
constraint fk_mt_m foreign key (match_id) references matches(id),
constraint fk_mt_t foreign key (team_id) references teams(id)
);
insert into teams(id) values(1),(2);
insert into matches(id, stage_id, scheduled_at, winner_id) values
(1, 1, now() - interval '1 day', 1),
(2, 1, now() - interval '2 days', 1),
(3, 1, now() - interval '3 days', 2),
(4, 1, now() - interval '4 days', 1),
(5, 1, now() - interval '5 days', null);
insert into match_teams(match_id, team_id) values
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 2),
(4, 1),
(4, 2),
(5, 1),
(5, 2);
Query itself:
with v_mto as (
SELECT
m.id,
m."stage_id",
mt."team_id",
m."scheduled_at",
(
case
when m."winner_id" IS NULL then 0
when (m."winner_id" = mt."team_id") then 1
else -1
end
) win
FROM matches m
INNER JOIN match_teams mt ON m.id = mt."match_id"
WHERE m.status = 'finished'
ORDER BY "stage_id", "team_id", "scheduled_at" desc
),
v_lag as (
select
"stage_id",
"team_id",
win,
lag(win, 1, win) over (partition by "stage_id", "team_id" order by "scheduled_at" desc ) lag_win,
first_value(win) over (partition by "stage_id", "team_id" order by "scheduled_at" desc ) first_win
from v_mto
)
select
"stage_id",
"team_id",
v_lag.win,
count(1)
from v_lag
where v_lag.win = v_lag.lag_win and v_lag.win = v_lag.first_win
group by 1, 2, 3
-- This is the query for the final table (on a screenshot)
-- with team_scores as (
-- select
-- m."tournamentStageId",
-- "teamId",
-- sum(
-- -- each win gives 3 score, each draft gives 1 score
-- coalesce((m."winner_id" = mt."team_id")::integer, 0) * 3
-- +
-- (m."winner_id" IS NULL)::int
-- ) as score
-- from matches m
-- inner join match_teams mt on m.id = mt."matchId"
-- where m.status = 1
-- group by m."tournamentStageId", "teamId")
-- select
-- "tournamentStageId",
-- "teamId",
-- t.name,
-- score,
-- dense_rank() over (partition by "tournamentStageId" order by score desc) rank,
-- row_number() over (partition by "tournamentStageId" order by t.name) position
-- -- total number of wins/losses/drafts to be added (the "score" column from the screenshot)
-- from team_scores ts
-- inner join teams t on t.id = ts."teamId"
-- order by "tournamentStageId", rank, position
I've created a sandbox for those who is brave enough to get a deep dive into the task: https://www.db-fiddle.com/f/6jsFFnxQMKwNQWznR3VXHC/2
Also, I've already crafted the part that creates a list of teams together with scores and points, so the attached query will be used as a joined one or sub-select.
Query plan on the real database and query (some indexes, probably, are missing, but that's ok for this moment):
GroupAggregate (cost=24862.28..29423.68 rows=3 width=24)
" Group Key: v_lag.""computerGameId"", v_lag.""tournamentStageId"", v_lag.""teamId"", v_lag.win"
-> Incremental Sort (cost=24862.28..29423.61 rows=3 width=16)
" Sort Key: v_lag.""computerGameId"", v_lag.""tournamentStageId"", v_lag.""teamId"", v_lag.win"
" Presorted Key: v_lag.""computerGameId"", v_lag.""tournamentStageId"", v_lag.""teamId"""
-> Subquery Scan on v_lag (cost=22581.67..29423.47 rows=3 width=16)
Filter: ((v_lag.win = v_lag.lag_win) AND (v_lag.lag_win = v_lag.first_win))
-> WindowAgg (cost=22581.67..27468.67 rows=130320 width=32)
-> Subquery Scan on v_mto (cost=22581.67..24210.67 rows=130320 width=24)
-> Sort (cost=22581.67..22907.47 rows=130320 width=28)
" Sort Key: m.""computerGameId"", m.""tournamentStageId"", mt.""teamId"", m.""scheduledAt"" DESC"
-> Hash Join (cost=3863.39..8391.38 rows=130320 width=28)
" Hash Cond: (mt.""matchId"" = m.id)"
-> Seq Scan on match_teams mt (cost=0.00..2382.81 rows=137281 width=8)
-> Hash (cost=2658.10..2658.10 rows=65623 width=24)
-> Seq Scan on matches m (cost=0.00..2658.10 rows=65623 width=24)
Filter: (status = 1)
Thanks everyone for help and suggestions!
The final result:
P.S. it is possible to convert the first query (v_mto) as materialised view or de-normalise win into the match_teams table, as this piece will be used in different queries to build match/game stats.
So, the original query is wrong - gives incorrect result for the standings results.
I've moved to row_number math to solve this task.
The final query (with scores) looks like this:
create materialized view vm_tournament_stage_standings as
with v_mto as (SELECT m.id,
m."computerGameId",
m."tournamentStageId",
mt."teamId",
m."scheduledAt",
(
case
when m."winnerId" IS NULL then 'D'
when m."winnerId" = mt."teamId" then 'W'
else 'L'
end
) win
FROM matches m
INNER JOIN match_teams mt ON
m.id = mt."matchId"
WHERE m.status = 1),
v_streaks as (select "computerGameId",
"tournamentStageId",
"teamId",
row_number()
over grp_ord_matches
- row_number()
over (partition by "computerGameId", "tournamentStageId", "teamId", win order by "scheduledAt" desc ) streak_index,
win
from v_mto
window grp_ord_matches as (partition by "computerGameId", "tournamentStageId", "teamId" order by "scheduledAt" desc)),
v_streak as (select "computerGameId",
"tournamentStageId",
"teamId",
count(1) || win as streak
from v_streaks
where streak_index = 0
group by "computerGameId", "tournamentStageId", "teamId", "win"),
team_scores as (select m."tournamentStageId",
"teamId",
sum((m."winnerId" = mt."teamId")::int) as wins,
sum((m."winnerId" is null)::int) draws,
sum((m."winnerId" <> mt."teamId")::int) loses,
sum(
coalesce((m."winnerId" = mt."teamId")::integer, 0) * 3
+
(m."winnerId" IS NULL)::int
) as score
from matches m
inner join match_teams mt on m.id = mt."matchId"
where m.status = 1
group by m."tournamentStageId", "teamId")
select ts."tournamentStageId",
ts."teamId",
score,
wins,
draws,
loses,
vs.streak as streak,
dense_rank() over (partition by ts."tournamentStageId" order by score desc) rank,
row_number() over (partition by ts."tournamentStageId" order by t.name) position
from team_scores ts
inner join teams t on t.id = ts."teamId"
inner join v_streak vs on vs."teamId" = t.id and vs."tournamentStageId" = ts."tournamentStageId"
order by "tournamentStageId", rank, position

Postgres Query Optimization without adding an extra index

I was trying to optimize this query differently, but before that, Can we make any slight change in this query to reduce the time without adding any index?
Postgres version: 13.5
Query:
SELECT
orders.id as order_id,
orders.*, u1.name as user_name,
u2.name as driver_name,
u3.name as payment_by_name, referrals.name as ref_name,
array_to_string(array_agg(orders_payments.payment_type_name), ',') as payment_type_name,
array_to_string(array_agg(orders_payments.amount), ',') as payment_type_amount,
array_to_string(array_agg(orders_payments.reference_code), ',') as reference_code,
array_to_string(array_agg(orders_payments.tips), ',') as tips,
array_to_string(array_agg(locations.name), ',') as location_name,
(select
SUM(order_items.tax) as tax from order_items
where order_items.order_id = orders.id and order_items.deleted = 'f'
) as tax,
(select
SUM(orders_surcharges.surcharge_tax) as surcharge_tax from orders_surcharges
where orders_surcharges.order_id = orders.id
)
FROM "orders"
LEFT JOIN
users as u1 ON u1.id = orders.user_id
LEFT JOIN
users as u2 ON u2.id = orders.driver_id
LEFT JOIN
users as u3 ON u3.id = orders.payment_received_by
LEFT JOIN
referrals ON referrals.id = orders.referral_id
INNER JOIN
locations ON locations.id = orders.location_id
LEFT JOIN
orders_payments ON orders_payments.order_id = orders.id
WHERE
(orders.company_id = '626')
AND
(orders.created_at BETWEEN '2021-04-23 20:00:00' AND '2021-07-24 20:00:00')
AND
orders.order_status_id NOT IN (10, 5, 50)
GROUP BY
orders.id, u1.name, u2.name, u3.name, referrals.name
ORDER BY
created_at ASC LIMIT 300 OFFSET 0
Current Index:
"orders_pkey" PRIMARY KEY, btree (id)
"idx_orders_company_and_location" btree (company_id, location_id)
"idx_orders_created_at" btree (created_at)
"idx_orders_customer_id" btree (customer_id)
"idx_orders_location_id" btree (location_id)
"idx_orders_order_status_id" btree (order_status_id)
Execution Plan
Seems this takes more time on the parallel heap scan.
You're looking for 300 orders and try to get some additional information about these records. I would see if I could first get these 300 records, instead of getting all the data and then limit it to 300. Something like this:
WITH orders_300 AS (
SELECT * -- just get the columns that you really need, never use * in production
FROM orders
INNER JOIN locations ON locations.id = orders.location_id
WHERE orders.company_id = '626'
AND orders.created_at BETWEEN '2021-04-23 20:00:00' AND '2021-07-24 20:00:00'
AND orders.order_status_id NOT IN (10, 5, 50)
ORDER BY
created_at ASC LIMIT 300 -- LIMIT
OFFSET 0
)
SELECT
orders.id as order_id,
orders.*, -- just get the columns that you really need, never use * in production
u1.name as user_name,
u2.name as driver_name,
u3.name as payment_by_name, referrals.name as ref_name,
array_to_string(array_agg(orders_payments.payment_type_name), ',') as payment_type_name,
array_to_string(array_agg(orders_payments.amount), ',') as payment_type_amount,
array_to_string(array_agg(orders_payments.reference_code), ',') as reference_code,
array_to_string(array_agg(orders_payments.tips), ',') as tips,
array_to_string(array_agg(locations.name), ',') as location_name,
(SELECT SUM(order_items.tax) as tax
FROM order_items
WHERE order_items.order_id = orders.id
AND order_items.deleted = 'f'
) as tax,
( SELECT SUM(orders_surcharges.surcharge_tax) as surcharge_tax
FROM orders_surcharges
WHERE orders_surcharges.order_id = orders.id
)
FROM "orders_300" AS orders
LEFT JOIN users as u1 ON u1.id = orders.user_id
LEFT JOIN users as u2 ON u2.id = orders.driver_id
LEFT JOIN users as u3 ON u3.id = orders.payment_received_by
LEFT JOIN referrals ON referrals.id = orders.referral_id
LEFT JOIN orders_payments ON orders_payments.order_id = orders.id
GROUP BY
orders.id, u1.name, u2.name, u3.name, referrals.name
ORDER BY
created_at;
This will at least have a huge impact on the slowest part of your query, all these index scans on orders_payments. Every single scan is fast, but the query is doing 165000 of them... Limit this to just 300 and will be much faster.
Another issue is that none of your indexes covers the entire WHERE condition on the table "orders". But if you can't create a new index, you're out of luck.

How to order by column from subquery which has index for sorting

I use (PostgreSQL) 11.8. And I faced with this query and task about sort products by createdAt, price and numberOfEntries for owner table column (createdAt and price) I have index and I need to use it.
create index npdb_swedish_custom_index on products
using GIN(to_tsvector('pg_catalog.swedish', name||price||description||brand))
'CREATE INDEX IF NOT EXISTS created_desc_index ON products (created_at DESC NULLS LAST)'
'CREATE INDEX IF NOT EXISTS created_asc_index ON products (created_at ASC NULLS LAST)'
'CREATE INDEX IF NOT EXISTS price_desc_index ON products (price DESC NULLS LAST)'
'CREATE INDEX IF NOT EXISTS price_asc_index ON products (price ASC NULLS LAST)'
Figure out around the problem I try to join left the same table but, I did not found a solution, because when I add ORDER BY sp.price ASC I'm faced with
column "sp.price" must appear in the GROUP BY clause or be used in an aggregate function
my query
SELECT
array_agg(DISTINCT main_products_alias.id) AS ids
FROM (
SELECT
products_alias.id,
products_alias.group_identity,
products_alias.price,
products_alias.extras,
products_alias.created_at AS "createdAt"
,COUNT(DISTINCT uip.id) as "numberOfEntries"
,ts_rank_cd(to_tsvector('pg_catalog.swedish', products_alias.name||products_alias.price||products_alias.description||products_alias.brand), to_tsquery('pg_catalog.swedish', 'Barnvagnar|Solskydd')) AS rank
FROM products products_alias
LEFT JOIN product_category cpt on cpt.product_id = products_alias.id
LEFT JOIN user_ip_product uip on uip.products_id = products_alias.id
WHERE to_tsvector('pg_catalog.swedish', products_alias.name||products_alias.price||products_alias.description||products_alias.brand) ## to_tsquery('pg_catalog.swedish', 'Barnvagnar|Solskydd')
GROUP BY products_alias.id
) AS main_products_alias
LEFT JOIN products sp on sp.id = main_products_alias.id
GROUP BY main_products_alias.group_identity
ORDER BY sp.price ASC
Only thinkg what I made this is add to main select
(array_agg(DISTINCT main_products_alias.created_at))[1]::TIMESTAMP AS created_at_g,
and then ORDER BY created_at_g DESC but in this case my indexes not used :(
do some one some suggestion for that ?

Postgres select work 3x time faster then function with that select

I have a SELECT in Postgres:
SELECT DISTINCT ON (price) price, quantity, is_ask, final_update_id
FROM (SELECT *
FROM ((SELECT price, quantity, is_ask, book_depth.final_update_id
FROM order_depth
LEFT JOIN book_depth ON book_depth_id = book_depth.id
WHERE book_depth_id IN (SELECT id
FROM book_depth
WHERE final_update_id > (SELECT last_update_id
FROM order_book
WHERE symbol_name = 'XRPRUB'
ORDER BY last_update_id DESC
LIMIT 1)
AND symbol_name = 'XRPRUB'))
UNION
(SELECT price, quantity, is_ask, order_book_id
FROM "order"
WHERE order_book_id = (SELECT id
FROM order_book
WHERE symbol_name = 'XRPRUB'
ORDER BY last_update_id DESC
LIMIT 1))
ORDER BY final_update_id DESC) AS t) AS t1
ORDER BY price, final_update_id DESC;
It works for about 20 seconds.
But when I create function with this select this function works for about 1 min 40 seconds. Can someone explain me is it normal or I make mistake somewhere?

CTE query performance improvement (postgres 9.6)

I have got a query below that does what needed, but super slow. It returns in a given period of time (for example below between 2017-05-01 00:00:00 and 2017-05-01 01:00:00, but could be anything between 1 second and several days) first and last records of a given intervals (15 sec as example, but could be anything from 1 second to several days).
It is unbearably slow. For example for interval on 1 second for a period 2017-05-01 00:00:00 to 2017-05-01 00:00:01 it runs 6 sec on my i7 7700HQ. for interval for 1 for period 2017-05-01 00:00:00 to 2017-05-01 00:00:05 I never saw the result! The database has about 60 millions row now. in production it will be 1 billion with approx 50 millions added every months.
QUERY:
WITH ranges as (
SELECT dd as start_range,
dd + '15 seconds'::interval as end_range,
ROW_NUMBER() over () as grp
FROM generate_series
( '2017-05-01 00:00:00'::timestamp
, '2017-05-01 01:00:00'::timestamp
, '15 seconds'::interval) dd
), create_grp as (
SELECT r.grp, r.start_range, r.end_range, p.*
FROM prices p
JOIN ranges r
ON p.dt >= r.start_range
AND p.dt < r.end_range
WHERE instrument='EURGBP'
), minmax as (
SELECT row_number() over (partition by grp
order by dt asc) as rn1,
row_number() over (partition by grp
order by dt desc) as rn2,
create_grp.*
FROM create_grp
)
SELECT *,
CASE WHEN rn1 = 1 and rn2 = 1 THEN 'first and last'
WHEN rn1 = 1 THEN 'first'
WHEN rn2 = 1 THEN 'last'
END as row_position
FROM minmax
WHERE
1 IN (rn1, rn2)
ORDER BY dt
;
SCHEMA:
CREATE TABLE public.prices
(
uid uuid NOT NULL DEFAULT uuid_generate_v4(),
instrument character varying COLLATE pg_catalog."default" NOT NULL,
bid double precision NOT NULL,
ask double precision NOT NULL,
dt timestamp without time zone NOT NULL DEFAULT now(),
CONSTRAINT prices_pkey PRIMARY KEY (uid)
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
INDEXES:
CREATE INDEX idx_dt_instrument
ON public.prices USING btree
(dt, instrument COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX idx_dt_instrument_bid_ask
ON public.prices USING btree
(dt, instrument COLLATE pg_catalog."default", bid, ask)
TABLESPACE pg_default;
CREATE INDEX idx_instrument
ON public.prices USING btree
(instrument COLLATE pg_catalog."default")
TABLESPACE pg_default;
EXAMPLE dataset for 5 seconds staring from 2017-05-01 00:00:00:
"uid","instrument","bid","ask","dt"
"4ecaa607-3733-4aba-9093-abc8f59e1638","EURGBP","0.84331","0.8434","2017-05-01 00:00:00.031"
"d1a41847-4945-4cf4-a45f-781db977ae07","GBPJPY","143.949005","143.970993","2017-05-01 00:00:00.031"
"34972c12-899b-404a-b0de-bae0fd3f6733","GBPJPY","143.947998","143.970993","2017-05-01 00:00:00.056"
"384b8246-3eac-4826-b6d2-d6caaa364f81","GBPUSD","1.29311","1.29323","2017-05-01 00:00:00.066"
"d879b04d-a4ed-452e-9208-7dfff672e860","GBPJPY","143.947006","143.970993","2017-05-01 00:00:00.067"
"a9e735ec-30d9-4c9a-a28e-5e5553273372","GBPJPY","143.945999","143.970993","2017-05-01 00:00:00.079"
"ee40ee5f-d8ac-41ce-9f39-ae50ef15d02d","GBPJPY","143.947006","143.964005","2017-05-01 00:00:00.091"
"7b605c3b-121f-46c2-a3e4-297d187f0a28","GBPJPY","143.947006","143.968994","2017-05-01 00:00:00.115"
"ccb307b0-7fa3-4354-8707-1426eded49e8","GBPJPY","143.942001","143.968994","2017-05-01 00:00:00.205"
"206c339d-bc36-469d-82d1-c7ae74002f44","EURGBP","0.84332","0.8434","2017-05-01 00:00:00.206"
"bc581318-91c7-4c80-85e0-e06f7236b277","GBPJPY","143.944","143.968994","2017-05-01 00:00:00.206"
"1fabf850-9045-4beb-81ae-bfa3adada62e","GBPUSD","1.29311","1.29324","2017-05-01 00:00:00.208"
"06d40f9a-a47e-466d-aebf-97a0154bdc74","GBPJPY","143.942001","143.968994","2017-05-01 00:00:00.209"
"b3b7fac9-340f-4e3b-8946-7bdecc383191","GBPUSD","1.29311","1.29327","2017-05-01 00:00:00.211"
"b5c28955-b40f-446f-9f43-9d5e9f145c1b","EURGBP","0.84331","0.8434","2017-05-01 00:00:00.212"
"192a40d6-8001-42ea-9430-96e800f2d4e8","GBPJPY","143.942993","143.968994","2017-05-01 00:00:00.212"
"e98dbba7-8231-4fa3-926b-291eb22b0f87","GBPJPY","143.944","143.968994","2017-05-01 00:00:00.215"
"7c6e952d-d01f-4dac-a1df-6b32168246fd","GBPUSD","1.29311","1.29326","2017-05-01 00:00:00.216"
"c86ba29f-3edb-4147-ba99-d9dfe594b0ff","GBPUSD","1.29312","1.29327","2017-05-01 00:00:00.243"
"35ca131e-b714-462d-827f-b5bc593aa3e6","GBPJPY","143.942993","143.968994","2017-05-01 00:00:00.262"
"91fc6fc0-7af9-4036-8e0e-29d4a3bb3e43","EURGBP","0.8433","0.8434","2017-05-01 00:00:00.283"
"e71946e0-1859-461a-b3eb-0539584ac4dc","GBPJPY","143.944","143.968994","2017-05-01 00:00:00.296"
"7321eea8-2610-408b-8dbf-4087f01e8c6e","GBPJPY","143.947998","143.968994","2017-05-01 00:00:00.377"
"f146716d-cadf-4e2f-9f17-6e7e8c5f2175","GBPUSD","1.29312","1.29327","2017-05-01 00:00:00.38"
"b3d81295-8cd5-44e7-879c-f15476ffac21","GBPUSD","1.29312","1.29326","2017-05-01 00:00:00.391"
"c037e27b-a8f4-4ec3-8472-b0f72a58fd33","EURGBP","0.8433","0.8434","2017-05-01 00:00:00.413"
"dba0f8f5-f218-49ea-8ebf-132f3ecf8910","GBPJPY","143.947998","143.968994","2017-05-01 00:00:00.443"
"a08449e3-44aa-4fed-b8e9-bf1a6bfc35c5","EURGBP","0.8433","0.84339","2017-05-01 00:00:00.585"
"a0b7ba20-653f-46db-93e9-d1edd8972dba","GBPUSD","1.29312","1.29326","2017-05-01 00:00:00.588"
"c2855ce8-8c5b-4de7-a92b-186d928e8f31","GBPJPY","143.947998","143.968994","2017-05-01 00:00:00.591"
"01b19a70-3ce7-44c7-9abd-9945321fdfd0","EURGBP","0.8433","0.84339","2017-05-01 00:00:00.621"
"4518aa9d-1f76-428e-ace4-7dcffeb6aa22","GBPJPY","143.947998","143.968994","2017-05-01 00:00:00.796"
"e4d4bac4-dd02-4da3-b231-20bfa6424412","EURGBP","0.8433","0.84339","2017-05-01 00:00:00.907"
"e48d3721-3157-4033-bd4f-09baae0f989c","GBPUSD","1.29312","1.29326","2017-05-01 00:00:00.909"
"64a33b2d-c756-4a0a-823e-075143ae7263","GBPJPY","143.947998","143.968994","2017-05-01 00:00:00.913"
"e477c47d-efd1-44dd-8058-cac17e08bc5d","GBPUSD","1.29314","1.29327","2017-05-01 00:00:00.914"
"cf6d5341-f7fd-47bc-89f6-a5448f78fb99","EURGBP","0.84329","0.84339","2017-05-01 00:00:00.943"
"4caa8bb9-094e-48ca-8a9a-7b2dd23a4fb4","GBPJPY","143.947006","143.968994","2017-05-01 00:00:00.967"
"274b7f51-3b07-430d-bfc5-a0b1ce62a750","GBPUSD","1.29312","1.29327","2017-05-01 00:00:00.975"
"2e3c2cd6-2525-46b3-86c5-b88e48bb0138","GBPJPY","143.947998","143.968994","2017-05-01 00:00:01.076"
"bcb12b90-2795-4789-bfa2-8e9a494eaeb1","GBPUSD","1.29312","1.29326","2017-05-01 00:00:01.076"
"d63d6037-fa81-47cc-bd62-4655850c0f80","GBPUSD","1.29312","1.29327","2017-05-01 00:00:01.077"
"6dbf8d8e-37c8-4537-80b5-c9219f4356b1","EURGBP","0.8433","0.84339","2017-05-01 00:00:01.079"
"7e3b6eaf-22e1-4f87-a7c1-226d3ee76146","EURGBP","0.84329","0.84339","2017-05-01 00:00:01.08"
"63e451c5-b8c6-4b57-ac9e-2171bc1dfbfa","GBPJPY","143.947998","143.968994","2017-05-01 00:00:01.121"
"866316e7-90a2-4c80-9a38-b3a062837415","GBPJPY","143.949005","143.968994","2017-05-01 00:00:01.143"
"fb11e963-cd36-4cfc-89b3-1bba3b8595b5","GBPUSD","1.29312","1.29327","2017-05-01 00:00:01.156"
"ad57e34f-5cbe-4b79-8579-b2c77c83b50b","EURGBP","0.84329","0.84339","2017-05-01 00:00:01.249"
"46b1840e-e424-41c2-8c71-691b201183ab","GBPJPY","143.949005","143.968994","2017-05-01 00:00:01.259"
"c5fa5e09-46df-4ea4-8640-9cf18e1f8fcc","GBPUSD","1.29312","1.29327","2017-05-01 00:00:01.265"
"926d092d-601e-43ec-b398-3300b0345cfd","GBPJPY","143.949997","143.968994","2017-05-01 00:00:01.267"
"4ddfbc84-20a1-4281-86c2-f0a4e77d0152","EURGBP","0.84329","0.84339","2017-05-01 00:00:01.305"
"e75af139-51a2-4a0a-acc8-e0adfbe5472a","GBPUSD","1.29313","1.29327","2017-05-01 00:00:01.346"
"408bca2d-2d57-471b-b6a7-819a3257741f","GBPJPY","143.951004","143.968994","2017-05-01 00:00:01.348"
"53c6f444-bd76-4a28-a370-7af98d5fa9ec","GBPUSD","1.29313","1.29326","2017-05-01 00:00:01.359"
"cd002ef4-925e-469b-8f2f-848cadd5943f","EURGBP","0.84329","0.84339","2017-05-01 00:00:01.443"
"13587e44-b694-4ce9-a626-e592d28c507d","EURGBP","0.8433","0.84339","2017-05-01 00:00:01.45"
"aabbacaf-3f09-4313-b992-e8b8d91df7ad","GBPJPY","143.951004","143.968994","2017-05-01 00:00:01.461"
"6b2111ef-c285-4482-b93c-238f27522ca3","GBPUSD","1.29313","1.29326","2017-05-01 00:00:01.477"
"19507f29-149c-4fdf-a1fa-aac312ab8479","EURGBP","0.8433","0.84338","2017-05-01 00:00:01.506"
"916cc759-a536-449d-b825-8203ebea9bf8","GBPJPY","143.949997","143.968994","2017-05-01 00:00:01.649"
"1fdd2b35-fd44-4dbb-81df-b98514af7004","GBPUSD","1.29313","1.29326","2017-05-01 00:00:01.649"
"3e9fc214-3cc6-4991-9eb3-2f5a5e557e96","GBPUSD","1.29312","1.29326","2017-05-01 00:00:01.65"
"5da9a29b-0d8a-42b1-98b6-f89dd2893c77","EURGBP","0.8433","0.84338","2017-05-01 00:00:01.651"
"b87f85e3-fba5-4556-8ca5-52625d978d53","EURGBP","0.84329","0.84338","2017-05-01 00:00:01.652"
"75b7624c-b90c-40d4-a7fc-09df7da9f659","GBPJPY","143.949997","143.968994","2017-05-01 00:00:01.715"
"b076534f-b893-4b55-9d9a-e3bac6e77ab2","GBPUSD","1.29312","1.29325","2017-05-01 00:00:01.732"
"6464da85-9eb5-4548-bd5e-3331a69f2121","EURGBP","0.84329","0.84338","2017-05-01 00:00:01.817"
"f9937464-e36a-4c57-a212-2f32943307d3","EURGBP","0.8433","0.84338","2017-05-01 00:00:01.83"
"1c7848e5-c101-4a50-87fe-1f5e980dbb95","GBPJPY","143.949005","143.968994","2017-05-01 00:00:01.83"
"30ad5bfc-f968-4b49-9b22-80c12e801f37","GBPUSD","1.29312","1.29325","2017-05-01 00:00:01.847"
"94eb4b51-4901-4e56-873a-05f93ef35e64","EURGBP","0.8433","0.84338","2017-05-01 00:00:02.007"
"97ef7e05-0a2f-4d70-80d1-1d3c748ea70f","GBPJPY","143.949005","143.968994","2017-05-01 00:00:02.007"
"af9274d9-02d6-43fc-9336-c3f512e4bd78","GBPUSD","1.29312","1.29325","2017-05-01 00:00:02.008"
"78c12ba8-992a-469e-bdda-bef004596aa1","GBPUSD","1.29311","1.29325","2017-05-01 00:00:02.062"
"4e1e31a9-bfdc-4f56-b8e1-0c7e83852fe0","GBPUSD","1.29311","1.29324","2017-05-01 00:00:02.071"
"e4f1c459-f75b-4005-8dd1-00c572ded4f9","GBPUSD","1.29311","1.29324","2017-05-01 00:00:02.203"
"2c8b38ce-50de-43e7-ba63-b6d1d7a7f76c","GBPJPY","143.947998","143.968994","2017-05-01 00:00:02.244"
"438f4439-a259-421f-8f45-a0dc88d80e1d","GBPJPY","143.949997","143.968994","2017-05-01 00:00:02.257"
"f12b7e18-84f0-4686-9588-0ecfab8f8bf8","GBPUSD","1.29311","1.29324","2017-05-01 00:00:02.301"
"f335ee32-cd5e-4cd2-aabe-06c14985778f","GBPJPY","143.949997","143.968994","2017-05-01 00:00:02.394"
"3c55786e-f9b8-424b-9c32-0b4b432a553f","EURGBP","0.8433","0.84338","2017-05-01 00:00:02.404"
"53c3e013-b016-41bb-a3ed-48158640831a","GBPUSD","1.29311","1.29324","2017-05-01 00:00:02.417"
"a944e2b6-3700-4ecd-994d-cb5ab97cf5ad","EURGBP","0.8433","0.84339","2017-05-01 00:00:02.508"
"ab32781d-1502-4047-b320-c3f4cda389fd","GBPJPY","143.949997","143.968994","2017-05-01 00:00:02.605"
"24759950-e095-4bb7-be3d-fda725423589","EURGBP","0.84329","0.84339","2017-05-01 00:00:02.625"
"cd7940d9-8a30-4a12-9dc9-a2c710cbb982","GBPUSD","1.29311","1.29325","2017-05-01 00:00:02.649"
"233e0a3d-eeae-403f-b512-2add055d6735","GBPJPY","143.949997","143.968994","2017-05-01 00:00:02.761"
"58c7c33c-75b2-45bd-a50c-615d9eaaec07","EURGBP","0.84329","0.84339","2017-05-01 00:00:02.763"
"675b2847-b0f9-44f1-ab75-d6ddafbddfd5","GBPUSD","1.29311","1.29325","2017-05-01 00:00:02.766"
"d2946714-e84c-4a36-9819-0cca4f2dd197","EURGBP","0.8433","0.84339","2017-05-01 00:00:02.81"
"5322418b-4c45-4771-b591-cc8fea167424","GBPJPY","143.949997","143.968994","2017-05-01 00:00:02.832"
"9a1c572e-28b9-4d16-b196-ccef3e48f602","EURGBP","0.84331","0.84339","2017-05-01 00:00:02.929"
"a4377169-5400-4dd6-81f0-8b455d178b65","EURGBP","0.8433","0.84339","2017-05-01 00:00:02.951"
"37cdddb0-db5e-4574-bf4d-45fb8d52c6be","GBPUSD","1.29311","1.29325","2017-05-01 00:00:02.951"
"1258c886-b797-43ed-9886-323daac83dde","GBPUSD","1.29311","1.29325","2017-05-01 00:00:02.999"
"453406c3-5902-4dab-b39c-2e0b85739767","EURGBP","0.8433","0.84339","2017-05-01 00:00:03.005"
"95699a6d-8412-4f2f-9dd4-86a1d3eed57d","GBPUSD","1.29312","1.29325","2017-05-01 00:00:03.129"
"2268fcc4-104e-434b-8237-bce765bda084","EURGBP","0.8433","0.84339","2017-05-01 00:00:03.131"
"ba771b59-b04c-4683-8ceb-8e9b8451bcc7","GBPUSD","1.29312","1.29325","2017-05-01 00:00:03.17"
"00541afa-79a6-4d09-8c6e-4d59e8d15bb2","GBPJPY","143.947006","143.968994","2017-05-01 00:00:03.216"
"b4dd6e15-0e25-4da1-b210-021b91e4245a","GBPUSD","1.29312","1.29325","2017-05-01 00:00:03.323"
"79b11513-97c5-4146-a4c8-7daf7d8ffe6f","GBPJPY","143.947006","143.966995","2017-05-01 00:00:03.324"
"c5283502-eb09-45d3-b92b-d5625de9e9fc","GBPJPY","143.947006","143.966003","2017-05-01 00:00:03.411"
"67715855-9171-4ae7-b205-5ba017d328f2","EURGBP","0.8433","0.84339","2017-05-01 00:00:03.501"
"00b7647e-7d53-407d-9704-78a8712ac580","GBPJPY","143.947006","143.966003","2017-05-01 00:00:03.525"
"b41b5483-5fb2-4c57-9892-0d02e5e6a823","GBPJPY","143.947998","143.966003","2017-05-01 00:00:03.549"
"881712b2-ff9a-4065-ad6a-9caee03283e3","GBPJPY","143.947998","143.964996","2017-05-01 00:00:03.56"
"7248bc13-5a9f-4fba-97a1-fe041e975c38","EURGBP","0.8433","0.84339","2017-05-01 00:00:03.581"
"0379b4bb-7691-48e8-a668-2388f6e5d510","GBPJPY","143.947998","143.964996","2017-05-01 00:00:03.672"
"2715fe9a-eb6f-445d-b7d0-850293bb5b2e","GBPJPY","143.947006","143.964996","2017-05-01 00:00:03.698"
"52ce7685-0964-46e4-ab8f-282e68bdb73d","GBPJPY","143.947006","143.964005","2017-05-01 00:00:03.711"
"6b31dee4-1aec-4d10-9b37-0c58bae7e449","GBPJPY","143.947006","143.964005","2017-05-01 00:00:03.848"
"7e61571a-ea8b-44cb-9481-ae31ba8b86c4","GBPUSD","1.29312","1.29325","2017-05-01 00:00:03.868"
"89e316cb-e01d-4f01-9dcd-a380e94ed4e8","GBPJPY","143.947006","143.964005","2017-05-01 00:00:03.943"
"966318e4-a6b5-4c1b-82e1-fbbf0029ce02","GBPUSD","1.29312","1.29325","2017-05-01 00:00:04.096"
"8fc7b075-7f5e-40f5-9879-55604f77a3c5","EURGBP","0.8433","0.84339","2017-05-01 00:00:04.227"
"23ab8b66-337e-4c4e-b73a-f31101716565","GBPJPY","143.947006","143.964005","2017-05-01 00:00:04.227"
"eb7469be-8a40-4498-af85-7b2c42630149","GBPJPY","143.947006","143.964005","2017-05-01 00:00:04.288"
"f945a821-9790-4760-beea-1b54bb1c2a85","EURGBP","0.8433","0.84339","2017-05-01 00:00:04.406"
"978c6d4c-22f9-4218-b447-940a3d3c436e","EURGBP","0.8433","0.84339","2017-05-01 00:00:04.505"
"2ace7d77-b228-491c-9cf0-610b8e48cdf7","GBPJPY","143.947006","143.966003","2017-05-01 00:00:04.786"
"0f89d5dd-52f2-44e9-aa54-cbb423cd7416","GBPJPY","143.947006","143.964996","2017-05-01 00:00:04.787"
"db7748a8-317b-4c62-a1f2-d6679430b343","GBPUSD","1.29312","1.29325","2017-05-01 00:00:04.895"
"ae29728e-773b-42a9-9f1e-199ba996731a","EURGBP","0.8433","0.84339","2017-05-01 00:00:04.986"
"19ffa8e4-63c5-4f47-90d5-771dd5a839ba","GBPJPY","143.947006","143.962997","2017-05-01 00:00:04.986"
The slowdown is caused by " WHERE instrument='EURGBP' ". As soon as I remove it the query flies. However, I must have filtering by 'instrument'.
EXPLAIN OUTPUT:
Sort (cost=2356660289.95..2356711257.16 rows=20386886 width=144)
Sort Key: minmax.dt
CTE ranges
-> WindowAgg (cost=0.00..25.00 rows=1000 width=24)
-> Function Scan on generate_series dd (cost=0.00..10.00 rows=1000 width=8)
CTE create_grp
-> Nested Loop (cost=344299.48..460976348.77 rows=2043798111 width=71)
Join Filter: ((p.dt >= r.start_range) AND (p.dt < r.end_range))
-> Bitmap Heap Scan on prices p (cost=344299.48..1121763.77 rows=18394183 width=47)
Recheck Cond: ((instrument)::text = 'EURGBP'::text)
-> Bitmap Index Scan on idx_instrument (cost=0.00..339700.94 rows=18394183 width=0)
Index Cond: ((instrument)::text = 'EURGBP'::text)
-> CTE Scan on ranges r (cost=0.00..20.00 rows=1000 width=24)
CTE minmax
-> WindowAgg (cost=1796644092.85..1837520055.07 rows=2043798111 width=112)
-> Sort (cost=1796644092.85..1801753588.13 rows=2043798111 width=104)
Sort Key: create_grp.grp, create_grp.dt DESC
-> WindowAgg (cost=880857947.67..921733909.89 rows=2043798111 width=104)
-> Sort (cost=880857947.67..885967442.95 rows=2043798111 width=96)
Sort Key: create_grp.grp, create_grp.dt
-> CTE Scan on create_grp (cost=0.00..40875962.22 rows=2043798111 width=96)
-> CTE Scan on minmax (cost=0.00..51298821.64 rows=20386886 width=144)
Filter: ((1 = rn1) OR (1 = rn2))
Any suggestions to optimize it are very very welcome.
CREATE TEMP TABLE ranges
( start_range timestamp NOT NULL
, end_range timestamp NOT NULL
, grp INTEGER NOT NULL UNIQUE
, PRIMARY KEY (start_range)
);
INSERT INTO ranges(start_range, end_range, grp)
SELECT dd as start_range,
dd + '1 seconds'::interval as end_range,
ROW_NUMBER() over () as grp
FROM generate_series
( '2017-05-01 00:00:00'::timestamp
, '2017-05-01 01:00:00'::timestamp
--, '15 seconds'::interval) dd
, '1 seconds'::interval) dd
;
VACUUM ANALYZE ranges;
-- EXPLAIN ANALYZE
SELECT *,
CASE WHEN rn1 = 1 and rn2 = 1 THEN 'first and last'
WHEN rn1 = 1 THEN 'first'
WHEN rn2 = 1 THEN 'last'
END as row_position
FROM (
SELECT r.grp, p.uid, p.instrument, r.start_range , p.dt AS dt
, p.bid, p.ask
, row_number() over (partition by r.grp order by p.dt asc) as rn1
, row_number() over (partition by r.grp order by p.dt desc) as rn2
FROM prices p
JOIN ranges r ON p.dt >= r.start_range AND p.dt < r.end_range
WHERE p.instrument='EURGBP'
) zzzz
WHERE (rn1=1 OR rn2=1)
-- ORDER BY instrument,grp, dt
;
And, you don't actually need the row number, since you only want the beginning/end of a window:
-- EXPLAIN ANALYZE
SELECT *,
CASE WHEN (prev IS NULL AND next IS NULL) THEN 'first and last'
WHEN prev IS NULL THEN 'first'
WHEN next IS NULL THEN 'last'
END as row_position
FROM (
SELECT r.grp, p.uid, p.instrument, r.start_range , p.dt AS dt, p.bid, p.ask
, lag(uid) over (www) as prev
, lead(uid) over (www) as next
FROM prices p
JOIN ranges r ON p.dt >= r.start_range AND p.dt < r.end_range
WHERE p.instrument='EURGBP'
WINDOW www AS (partition by r.grp order by p.dt )
-- ORDER BY p.instrument,r.grp, p.dt
) qqqq
WHERE (prev IS NULL OR next IS NULL)
-- ORDER BY instrument,grp, dt
;
Thanks to all for help and hints! Apparently if I move WHERE clause from create_gpr to minimax, the query become extremely fast. I have no explanation to it whatsoever. But it works. Here it is:
WITH ranges as (
SELECT dd as start_range,
dd + '1 seconds'::interval as end_range,
ROW_NUMBER() over () as grp
FROM generate_series
( '2017-05-01 00:00:00'::timestamp
, '2017-05-01 00:01:00'::timestamp
, '1 seconds'::interval) dd
), create_grp as (
SELECT r.grp, r.start_range, r.end_range, p.*
FROM prices p
JOIN ranges r
ON p.dt >= r.start_range
AND p.dt < r.end_range
-- WHERE need to be moved out of here, which has no indexes as being temporary
-- WHERE instrument='EURGBP'
), minmax as (
SELECT row_number() over (partition by grp
order by dt asc) as rn1,
row_number() over (partition by grp
order by dt desc) as rn2,
create_grp.*
FROM create_grp
-- Here WHERE goes! It does use index here for some reason. And the query flies.
WHERE instrument='EURGBP'
)
SELECT *,
CASE WHEN rn1 = 1 and rn2 = 1 THEN 'first and last'
WHEN rn1 = 1 THEN 'first'
WHEN rn2 = 1 THEN 'last'
END as row_position
FROM minmax
WHERE
1 IN (rn1, rn2)
ORDER BY dt
;
This query (for 1 minute range) runs in 84 msec, the old one never finished to run (I gave up after 10 minutes). The old query took 10 sec to run for the range of 1 sec.
Thanks to everyone for help!