Related
i have this Postgres query where i left join a couple of tables. This query runs for hours and causes issues. When I run explain analyse I see that the most time is spent in one of the left joins, for which optimiser selects Right Hash Join. When I use inner join instead and run explain analyse, optimiser selects a different plan and query finishes in minutes.
I have to use left join because with inner join some data will be excluded.
How should i rewrite the query to avoid this hash right join?
Many thanks in advance!
Links to query plans are attached above. I am using PostgreSQL 12.11 on x86_64-pc-linux-gnu, compiled by Debian clang version 12.0.1, 64-bit
WITH memberships AS (
SELECT customer_sk
, membership_sk
, membership_state
, membership_b2b_type
, membership_sml_type
, membership_start_date
, membership_end_date
, membership_pause_from
, membership_pause_to
, covid_pause_start_date
, covid_pause_end_date
, city_sk AS membership_city_region_sk
, sport_persona_current
, membership_cancellation_reason
, membership_sequence_nr_reverse
, company_sk
, company_name
FROM dwh.fact_membership
WHERE membership_is_urban_sports IS TRUE
),
-- Data preparation
request_cancellation AS (
SELECT membership_sk,
requested_cancellation_last_date
FROM staging.request_cancellation
),
blacklisted_emails AS (
SELECT customer_sk, email, 'blacklisted' AS blacklisted
FROM dwh_userdata.blacklist_emails
),
nonanon_customer AS (
SELECT id
, first_name
, last_name
, email
FROM dwh_userdata.customer
),
nonanon_customer_address_prep AS (
SELECT customer_id
, city
, state
, country
, zip
, row_number() over (partition by customer_id order by created_at desc) as row_number
FROM dwh_userdata.customer_address
),
nonanon_customer_address AS (
SELECT *
FROM nonanon_customer_address_prep
WHERE row_number = 1
),
favorite_sport_category_prep_1 AS (
SELECT membership_sk
, service_top_category_name
, count(DISTINCT booking_sk) as cnt_booking
FROM dwh.report_venue_visitors
WHERE booking_is_valid
GROUP BY 1, 2
),
favorite_sport_category_prep_2 AS (
SELECT membership_sk
, service_top_category_name
, cnt_booking
, row_number()
over (partition by membership_sk order by cnt_booking DESC,service_top_category_name ) AS row_number
FROM favorite_sport_category_prep_1
),
favorite_sport_category AS (
SELECT membership_sk
, service_top_category_name AS favourite_sport_category
, cnt_booking
FROM favorite_sport_category_prep_2
WHERE row_number = 1
),
free_trial AS (
select distinct membership_sk
, customer_sk
, trial_status AS free_trial_status
, trial AS free_trial_length
, trial_start_date AS free_trial_start
, trial_end_date AS free_trial_end
FROM dwh.report_memberships
WHERE trial_status IS NOT NULL
and trial_start_date >= '2020-06-23'
)
-- #### OUTOPUT TABLE
SELECT c.customer_sk AS named_user
, CASE WHEN c.gender IN ('M', 'F') THEN c.gender ELSE NULL END AS gender
, nc.first_name
, nc.last_name
, customer_language
, anss.state AS newsletter_status
, dl.city_name AS membership_city_region
, dl.country_code AS membership_country_code
, dl.country_name AS membership_country_name
, dl.admin1 AS membership_administrative_state
, m.membership_sk
, m.membership_state
, m.membership_b2b_type
, m.company_sk
, m.company_name
, m.membership_sml_type
, CASE
WHEN m.membership_start_date IS NOT NULL
THEN CONCAT(TO_CHAR(m.membership_start_date, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS membership_start_date
, CASE
WHEN m.membership_end_date IS NOT NULL THEN CONCAT(TO_CHAR(m.membership_end_date, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS membership_end_date
, ft.free_trial_status
, ft.free_trial_length
, CASE
WHEN ft.free_trial_start IS NOT NULL THEN CONCAT(TO_CHAR(ft.free_trial_start, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS free_trial_start
, CASE
WHEN ft.free_trial_end IS NOT NULL THEN CONCAT(TO_CHAR(ft.free_trial_end, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS free_trial_end
, CASE
WHEN m.membership_pause_from IS NOT NULL
THEN CONCAT(TO_CHAR(m.membership_pause_from, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS membership_pause_from
, CASE
WHEN m.membership_pause_to IS NOT NULL THEN CONCAT(TO_CHAR(m.membership_pause_to, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS membership_pause_to
, CASE
WHEN m.covid_pause_start_date IS NOT NULL THEN CONCAT(TO_CHAR(m.covid_pause_start_date, 'YYYY-MM-DD'),
'T00:00:00')
ELSE NULL END AS covid_pause_start_date
, CASE
WHEN m.covid_pause_end_date IS NOT NULL
THEN CONCAT(TO_CHAR(m.covid_pause_end_date, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS covid_pause_end_date
, CASE
WHEN rc.requested_cancellation_last_date IS NOT NULL THEN CONCAT(
TO_CHAR(rc.requested_cancellation_last_date, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS requested_cancellation_last_date
, membership_cancellation_reason
, be.blacklisted AS blacklist_email
, fsc.favourite_sport_category AS fav_sports_category
, m.sport_persona_current
, ambd.membership_months_active
, ambd.membership_months_total
, ambd.is_gm1_positive
, ambd.cnt_bookings_total
, ambd.cnt_bookings_last_30_days_total
, ambd.cnt_bookings_last_30_days_onsite
, ambd.cnt_bookings_onsite
, ambd.cnt_bookings_online
, ambd.cnt_bookings_last_30_days_online
, CASE
WHEN ambd.latest_booking_date IS NOT NULL
THEN CONCAT(TO_CHAR(ambd.latest_booking_date, 'YYYY-MM-DD'), 'T00:00:00')
ELSE NULL END AS latest_booking_date
, ambd.avg_bookings_active_month
, ambd.last_checkin_type
, ambd.fav_sports_category_onsite
, ambd.fav_sports_category_online
, ambd.fav_studio_last_30_days
, ambd.fav_studio_group_website
FROM dwh.dim_customer c
LEFT JOIN nonanon_customer nc
ON nc.id = c.customer_sk
LEFT JOIN nonanon_customer_address nca
ON nca.customer_id = customer_sk
LEFT JOIN memberships m
ON c.customer_sk = m.customer_sk
AND membership_sequence_nr_reverse = 1
LEFT JOIN request_cancellation rc
ON m.membership_sk = rc.membership_sk
LEFT JOIN dwh.dim_location dl
ON m.membership_city_region_sk = dl.city_sk
LEFT JOIN blacklisted_emails be
ON be.email = nc.email
LEFT JOIN favorite_sport_category fsc
ON fsc.membership_sk = m.membership_sk
LEFT JOIN staging.airship_newsletter_subscription_status anss
ON anss.customer_id = c.customer_sk
LEFT JOIN free_trial ft
ON ft.customer_sk = m.customer_sk
LEFT JOIN staging.airship_membership_booking_details ambd
ON ambd.membership_sk = m.membership_sk
AND membership_sequence_nr_reverse = 1
WHERE be.blacklisted IS NULL
AND nc.email NOT LIKE '%delete%'
AND nc.email IS NOT NULL
AND ((m.membership_sk IS NULL AND anss.state = 'subscribed') OR membership_state IS NOT NULL)
Results of EXPLAIN ANALYSE:
Hash Left Join (cost=6667580.77..6764370.56 rows=3256 width=692) (actual time=4319030.909..4328353.358 rows=518825 loops=1)
Hash Cond: (fact_membership.customer_sk = ft.customer_sk)
-> Hash Left Join (cost=6663581.42..6759951.96 rows=3256 width=380) (actual time=4318059.369..4324841.032 rows=518825 loops=1)
Hash Cond: (fact_membership.membership_sk = ambd.membership_sk)
Join Filter: (fact_membership.membership_sequence_nr_reverse = 1)
-> Hash Left Join (cost=6655261.78..6748793.03 rows=3256 width=242) (actual time=4317733.942..4323056.862 rows=518825 loops=1)
Hash Cond: (c.customer_sk = anss.customer_id)
Filter: (((fact_membership.membership_sk IS NULL) AND (anss.state = 'subscribed'::text)) OR (fact_membership.membership_state IS NOT NULL))
Rows Removed by Filter: 129098
-> Merge Left Join (cost=6642237.84..6733674.25 rows=3256 width=227) (actual time=4317378.943..4321020.832 rows=647923 loops=1)
Merge Cond: (fact_membership.membership_sk = favorite_sport_category_prep_2.membership_sk)
-> Sort (cost=167496.47..167504.61 rows=3256 width=218) (actual time=4146517.144..4147134.144 rows=647923 loops=1)
Sort Key: fact_membership.membership_sk
Sort Method: external merge Disk: 82352kB
-> Merge Left Join (cost=150681.68..167306.50 rows=3256 width=218) (actual time=4142397.925..4145027.017 rows=647923 loops=1)
Merge Cond: (c.customer_sk = nonanon_customer_address_prep.customer_id)
-> Sort (cost=59476.20..59484.34 rows=3256 width=218) (actual time=4139725.733..4140241.833 rows=647923 loops=1)
Sort Key: c.customer_sk
Sort Method: external merge Disk: 82344kB
-> Hash Right Join (cost=52983.04..59286.23 rows=3256 width=218) (actual time=33403.336..4135281.108 rows=647923 loops=1)
Hash Cond: (request_cancellation.membership_sk = fact_membership.membership_sk)
-> Seq Scan on request_cancellation (cost=0.00..5128.40 rows=308340 width=8) (actual time=1.160..228.691 rows=308340 loops=1)
-> Hash (cost=52942.34..52942.34 rows=3256 width=214) (actual time=30038.787..30048.670 rows=647923 loops=1)
Buckets: 65536 (originally 4096) Batches: 131072 (originally 1) Memory Usage: 10511kB
-> Gather (cost=1064.24..52942.34 rows=3256 width=214) (actual time=11.564..12621.194 rows=647923 loops=1)
Workers Planned: 2
Workers Launched: 2
-> Hash Left Join (cost=64.24..51616.74 rows=1357 width=214) (actual time=5.510..22450.906 rows=215974 loops=3)
Hash Cond: (fact_membership.city_sk = dl.city_sk)
-> Nested Loop Left Join (cost=59.79..51608.59 rows=1357 width=191) (actual time=5.239..22013.464 rows=215974 loops=3)
-> Nested Loop (cost=59.37..50428.72 rows=1357 width=60) (actual time=4.923..6958.191 rows=215974 loops=3)
-> Hash Left Join (cost=58.94..49440.62 rows=1357 width=55) (actual time=3.419..2000.407 rows=215976 loops=3)
Hash Cond: ((customer.email)::text = blacklist_emails.email)
Filter: (('blacklisted'::text) IS NULL)
Rows Removed by Filter: 122
-> Parallel Seq Scan on customer (cost=0.00..46660.28 rows=271334 width=46) (actual time=0.999..1668.668 rows=216091 loops=3)
Filter: ((email IS NOT NULL) AND ((email)::text !~~ '%delete%'::text))
Rows Removed by Filter: 3191
-> Hash (cost=34.53..34.53 rows=1953 width=54) (actual time=2.222..2.226 rows=1953 loops=3)
Buckets: 2048 Batches: 1 Memory Usage: 144kB
-> Seq Scan on blacklist_emails (cost=0.00..34.53 rows=1953 width=54) (actual time=0.263..1.207 rows=1953 loops=3)
-> Index Scan using customer_pk on dim_customer c (cost=0.42..0.73 rows=1 width=13) (actual time=0.020..0.020 rows=1 loops=647929)
Index Cond: (customer_sk = customer.id)
-> Index Scan using dwh_fact_membership_3b307128 on fact_membership (cost=0.42..0.86 rows=1 width=131) (actual time=0.066..0.067 rows=1 loops=647923)
Index Cond: (customer_sk = c.customer_sk)
Filter: ((membership_is_urban_sports IS TRUE) AND (membership_sequence_nr_reverse = 1))
Rows Removed by Filter: 0
-> Hash (cost=3.09..3.09 rows=109 width=35) (actual time=0.148..0.214 rows=109 loops=3)
Buckets: 1024 Batches: 1 Memory Usage: 16kB
-> Seq Scan on dim_location dl (cost=0.00..3.09 rows=109 width=35) (actual time=0.031..0.098 rows=109 loops=3)
-> Materialize (cost=91205.48..107807.50 rows=2553 width=4) (actual time=2668.900..3946.682 rows=470415 loops=1)
-> Subquery Scan on nonanon_customer_address_prep (cost=91205.48..107801.12 rows=2553 width=4) (actual time=2666.188..3647.463 rows=470415 loops=1)
Filter: (nonanon_customer_address_prep.row_number = 1)
Rows Removed by Filter: 40218
-> WindowAgg (cost=91205.48..101418.18 rows=510635 width=148) (actual time=2664.902..3526.361 rows=510633 loops=1)
-> Sort (cost=91205.48..92482.07 rows=510635 width=12) (actual time=2664.083..2833.676 rows=510634 loops=1)
Sort Key: customer_address.customer_id, customer_address.created_at DESC
Sort Method: external merge Disk: 13032kB
-> Seq Scan on customer_address (cost=0.00..34063.35 rows=510635 width=12) (actual time=4.596..1522.444 rows=510635 loops=1)
-> Materialize (cost=6474741.37..6566128.10 rows=13051 width=13) (actual time=170857.053..173215.019 rows=465703 loops=1)
-> Subquery Scan on favorite_sport_category_prep_2 (cost=6474741.37..6566095.47 rows=13051 width=13) (actual time=170855.731..173002.743 rows=465703 loops=1)
Filter: (favorite_sport_category_prep_2.row_number = 1)
Rows Removed by Filter: 1343535
-> WindowAgg (cost=6474741.37..6533469.01 rows=2610117 width=29) (actual time=170854.901..172755.674 rows=1809238 loops=1)
-> Sort (cost=6474741.37..6481266.67 rows=2610117 width=21) (actual time=170853.124..171205.257 rows=1809238 loops=1)
Sort Key: report_venue_visitors.membership_sk, (count(DISTINCT report_venue_visitors.booking_sk)) DESC, report_venue_visitors.service_top_category_name
Sort Method: external merge Disk: 63696kB
-> GroupAggregate (cost=5839877.44..6063400.07 rows=2610117 width=21) (actual time=154838.978..169250.761 rows=1809238 loops=1)
Group Key: report_venue_visitors.membership_sk, report_venue_visitors.service_top_category_name
-> Sort (cost=5839877.44..5889232.80 rows=19742146 width=21) (actual time=154835.761..158654.645 rows=19827987 loops=1)
Sort Key: report_venue_visitors.membership_sk, report_venue_visitors.service_top_category_name
Sort Method: external merge Disk: 694120kB
-> Seq Scan on report_venue_visitors (cost=0.00..2233036.56 rows=19742146 width=21) (actual time=1.868..117392.591 rows=19827987 loops=1)
Filter: booking_is_valid
Rows Removed by Filter: 6441170
-> Hash (cost=7199.42..7199.42 rows=317242 width=19) (actual time=352.386..352.386 rows=317242 loops=1)
Buckets: 65536 Batches: 8 Memory Usage: 2606kB
-> Seq Scan on airship_newsletter_subscription_status anss (cost=0.00..7199.42 rows=317242 width=19) (actual time=1.120..154.407 rows=317242 loops=1)
-> Hash (cost=4207.06..4207.06 rows=121006 width=150) (actual time=320.770..320.771 rows=121006 loops=1)
Buckets: 32768 Batches: 8 Memory Usage: 3111kB
-> Seq Scan on airship_membership_booking_details ambd (cost=0.00..4207.06 rows=121006 width=150) (actual time=1.446..107.525 rows=121006 loops=1)
-> Hash (cost=3993.93..3993.93 rows=434 width=26) (actual time=951.259..951.264 rows=26392 loops=1)
Buckets: 32768 (originally 1024) Batches: 1 (originally 1) Memory Usage: 1760kB
-> Subquery Scan on ft (cost=3981.99..3993.93 rows=434 width=26) (actual time=857.944..888.163 rows=26392 loops=1)
-> Unique (cost=3981.99..3989.59 rows=434 width=30) (actual time=857.288..878.098 rows=26392 loops=1)
-> Sort (cost=3981.99..3983.08 rows=434 width=30) (actual time=856.675..863.298 rows=26392 loops=1)
Sort Key: report_memberships.membership_sk, report_memberships.customer_sk, report_memberships.trial_status, report_memberships.trial, report_memberships.trial_start_date, report_memberships.trial_end_date
Sort Method: quicksort Memory: 2830kB
-> Bitmap Heap Scan on report_memberships (cost=2256.96..3962.98 rows=434 width=30) (actual time=102.229..817.152 rows=26392 loops=1)
Recheck Cond: ((trial_start_date >= '2020-06-23'::date) AND (trial_status IS NOT NULL))
Heap Blocks: exact=1383
-> BitmapAnd (cost=2256.96..2256.96 rows=434 width=0) (actual time=99.478..99.479 rows=0 loops=1)
-> Bitmap Index Scan on dwh_report_memberships_bc76fe51 (cost=0.00..578.02 rows=31145 width=0) (actual time=7.497..7.497 rows=26392 loops=1)
Index Cond: (trial_start_date >= '2020-06-23'::date)
-> Bitmap Index Scan on dwh_report_memberships_35525e76 (cost=0.00..1678.48 rows=90406 width=0) (actual time=91.704..91.704 rows=92029 loops=1)
Index Cond: (trial_status IS NOT NULL)
Planning Time: 7.850 ms
Execution Time: 4328700.854 ms
In the below execution plan, the index scan on five_lima (table has 900m records) is where it's spending most of its time. I want to bring down the runtime to few seconds, how do I optimize it? Tried forcing seq scan and ran vacuum/analyze but it is not helping.
As per explain analysis from depesz, the index scan on five_lima is spending 86% of time.
five_lima 2 43,600.875 ms 86.6 %
Index Only Scan Backward 1 21,936.780 ms 50.3 %
Index Scan 1 21,664.095 ms 49.7 %
https://explain.depesz.com/s/7lXg
GroupAggregate (cost=5236122.79..5238409.75 rows=19058 width=392) (actual time=50337.968..50338.284 rows=76 loops=1)
Group Key: ((((((three.papa)::text || 'sierra_tango'::text) || (quebec_three.mike_india)::text) || bravo_five((quebec_three.sierra_uniform)::text, 3, 'november_golf'::text)) || 'lima_charlie'::text)), quebec_three.mike_india, quebec_three.sierra_uniform
-> Sort (cost=5236122.79..5236170.44 rows=19058 width=120) (actual time=50337.880..50337.903 rows=773 loops=1)
Sort Key: ((((((three.papa)::text || 'sierra_tango'::text) || (quebec_three.mike_india)::text) || bravo_five((quebec_three.sierra_uniform)::text, 3, 'november_golf'::text)) || 'lima_charlie'::text)), quebec_three.mike_india, quebec_three.sierra_uniform
Sort Method: quicksort Memory: 142kB
-> Hash Left Join (cost=5221327.29..5234767.95 rows=19058 width=120) (actual time=49423.721..50337.319 rows=773 loops=1)
Hash Cond: (((quebec_three.mike_india)::bpchar = three.mike_india) AND (quebec_three.sierra_uniform = three.sierra_uniform))
-> GroupAggregate (cost=5221204.51..5233639.85 rows=19058 width=292) (actual time=49422.982..50336.121 rows=773 loops=1)
Group Key: quebec_three.mike_india, quebec_three.sierra_uniform, quebec_three.whiskey, quebec_three.tango, quebec_three.juliet_charlie, quebec_three.victor_papa, quebec_three.yankee, quebec_three.india_papa, quebec_three.victor_charlie, quebec_three.november_hotel, quebec_three.hotel_november
-> Sort (cost=5221204.51..5221680.96 rows=190580 width=228) (actual time=49408.728..49416.532 rows=250551 loops=1)
Sort Key: quebec_three.mike_india, quebec_three.sierra_uniform, quebec_three.whiskey, quebec_three.tango, quebec_three.juliet_charlie, quebec_three.victor_papa, quebec_three.yankee, quebec_three.india_papa, quebec_three.victor_charlie, quebec_three.november_hotel, quebec_three.hotel_november
Sort Method: quicksort Memory: 27472kB
-> Subquery Scan on quebec_three (cost=5191626.46..5204490.61 rows=190580 width=228) (actual time=49045.224..49167.610 rows=250551 loops=1)
-> Unique (cost=5191626.46..5198773.21 rows=190580 width=286) (actual time=49045.204..49136.969 rows=250551 loops=1)
-> Sort (cost=5191626.46..5192102.91 rows=190580 width=286) (actual time=49045.190..49071.536 rows=252496 loops=1)
Sort Key: mike_november1.sierra_uniform, mike_november1.charlie_six, mike_november1.foxtrot_india, (xray(mike_november1.delta_xray, 'zulu'::text)), mike_november1.whiskey, quebec_sierra.tango, quebec_sierra.juliet_charlie, golf.oscar_lima, (CASE WHEN ((("six_four"((five_hotel.tango)::text, 2))::integer = 5) AND (five_hotel.juliet_charlie <> november_november ('charlie_tango'::bpchar[]))) THEN 'oscar_romeo'::text ELSE NULL::text END), (CASE WHEN ((("six_four"((five_hotel.tango)::text, 2))::integer = 5) AND (five_hotel.juliet_charlie = ANY ('charlie_tango'::bpchar[]))) THEN 'romeo'::text ELSE NULL::text END), (CASE WHEN ((("six_four"((five_hotel.tango)::text, 2))::integer = 6) AND (five_hotel.juliet_charlie <> november_november ('charlie_tango'::bpchar[]))) THEN 'oscar_romeo'::text ELSE NULL::text END), (CASE WHEN ((("six_four"((five_hotel.tango)::text, 2))::integer = 6) AND (five_hotel.juliet_charlie = ANY ('charlie_tango'::bpchar[]))) THEN 'romeo'::text ELSE NULL::text END), (CASE WHEN (golf.oscar_lima five_romeo NOT NULL) THEN 'delta_foxtrot'::text ELSE 'oscar_romeo'::text END), (CASE WHEN (("six_four"((five_hotel.tango)::text, 2))::integer = 15) THEN 'oscar_romeo'::text ELSE NULL::text END)
Sort Method: quicksort Memory: 41652kB
-> Nested Loop Left Join (cost=661986.99..5174912.56 rows=190580 width=286) (actual time=1737.304..47625.922 rows=252496 loops=1)
-> Gather (cost=661986.29..3041816.11 rows=190580 width=79) (actual time=1733.755..1827.448 rows=252383 loops=1)
Workers Planned: 2
Workers Launched: 2
-> Parallel Hash Left Join (cost=660986.29..3021758.11 rows=79408 width=79) (actual time=1723.881..8094.375 rows=84128 loops=3)
Hash Cond: (((mike_november1.mike_india)::text = (seven_quebec.mike_india)::text) AND ((mike_november1.foxtrot_india)::text = (seven_quebec.foxtrot_india)::text) AND (mike_november1.sierra_uniform = seven_quebec.sierra_uniform))
-> Nested Loop Left Join (cost=420698.30..2780844.78 rows=79408 width=74) (actual time=1263.213..7579.180 rows=84128 loops=3)
-> Nested Loop Left Join (cost=420697.72..2359299.65 rows=79408 width=69) (actual time=1262.995..6114.930 rows=84128 loops=3)
-> Nested Loop Left Join (cost=420697.15..1943062.02 rows=79408 width=53) (actual time=1262.617..4846.387 rows=83834 loops=3)
-> Parallel Hash Left Join (cost=420691.40..1289489.61 rows=79408 width=53) (actual time=1262.432..3244.712 rows=83191 loops=3)
Hash Cond: (((mike_november1.mike_india)::text = (five_hotel.mike_india)::text) AND ((mike_november1.foxtrot_india)::text = (five_hotel.foxtrot_india)::text) AND (mike_november1.sierra_uniform = five_hotel.sierra_uniform))
-> Parallel Index Scan using lima_papa on six_echo juliet_xray_xray (cost=0.57..867904.42 rows=79408 width=45) (actual time=1.304..1922.996 rows=83190 loops=3)
Index Cond: ((delta_xray >= 'four_kilo'::timestamp without time zone) AND (delta_xray <= 'uniform'::timestamp without time zone) AND ((mike_india)::text = 'five_papa'::text))
Filter: (oscar_quebec = 'quebec_golf'::numeric)
Rows Removed by Filter: 115955
-> Parallel Hash (cost=404875.54..404875.54 rows=421741 width=32) (actual time=1259.567..1259.567 rows=93164 loops=3)
Buckets: 1048576 Batches: 1 Memory Usage: 27936kB
-> Parallel Bitmap Heap Scan on delta_echo five_hotel (cost=87668.95..404875.54 rows=421741 width=32) (actual time=947.771..1217.127 rows=93164 loops=3)
Recheck Cond: ((mike_india)::text = 'five_papa'::text)
Heap Blocks: exact=24664
-> Bitmap Index Scan on india_three (cost=0.00..87415.90 rows=1012179 width=0) (actual time=935.805..935.805 rows=466562 loops=1)
Index Cond: ((mike_india)::text = 'five_papa'::text)
-> Bitmap Heap Scan on two_bravo juliet_xray_delta (cost=5.76..8.20 rows=1 width=32) (actual time=0.018..0.018 rows=1 loops=249572)
Recheck Cond: (((charlie_six)::text = (mike_november1.charlie_six)::text) AND ((foxtrot_india)::text = (mike_november1.foxtrot_india)::text))
Filter: (((mike_india)::text = 'five_papa'::text) AND ((mike_india)::text = (mike_november1.mike_india)::text) AND (sierra_uniform = mike_november1.sierra_uniform))
Heap Blocks: exact=16
-> BitmapAnd (cost=5.76..5.76 rows=1 width=0) (actual time=0.016..0.016 rows=0 loops=249572)
-> Bitmap Index Scan on victor_three (cost=0.00..2.74 rows=5 width=0) (actual time=0.010..0.010 rows=1 loops=249572)
Index Cond: ((charlie_six)::text = (mike_november1.charlie_six)::text)
-> Bitmap Index Scan on two_delta (cost=0.00..2.77 rows=16 width=0) (actual time=0.010..0.010 rows=1 loops=128367)
Index Cond: ((foxtrot_india)::text = (mike_november1.foxtrot_india)::text)
-> Index Scan using hotel_oscar on charlie_yankee golf (cost=0.57..5.21 rows=1 width=40) (actual time=0.015..0.015 rows=1 loops=251501)
Index Cond: ((foxtrot_india)::text = (mike_november1.foxtrot_india)::text)
Filter: (((mike_india)::text = 'five_papa'::text) AND ((mike_india)::text = (mike_november1.mike_india)::text) AND (sierra_uniform = mike_november1.sierra_uniform))
Rows Removed by Filter: 0
-> Index Scan using seven_victor on five_charlie bravo_oscar (cost=0.57..5.28 rows=1 width=23) (actual time=0.017..0.017 rows=1 loops=252383)
Index Cond: ((charlie_six)::text = (mike_november1.charlie_six)::text)
Filter: (((mike_india)::text = 'five_papa'::text) AND ((mike_india)::text = (mike_november1.mike_india)::text) AND (sierra_uniform = mike_november1.sierra_uniform))
Rows Removed by Filter: 0
-> Parallel Hash (cost=232855.56..232855.56 rows=198198 width=29) (actual time=459.697..459.697 rows=66317 loops=3)
Buckets: 524288 Batches: 1 Memory Usage: 16608kB
-> Parallel Bitmap Heap Scan on victor_four seven_quebec (cost=20722.12..232855.56 rows=198198 width=29) (actual time=111.386..434.496 rows=66317 loops=3)
Recheck Cond: ((mike_india)::text = 'five_papa'::text)
Heap Blocks: exact=27484
-> Bitmap Index Scan on four_charlie (cost=0.00..20603.20 rows=475676 width=0) (actual time=107.013..107.013 rows=227154 loops=1)
Index Cond: ((mike_india)::text = 'five_papa'::text)
-> Index Scan using hotel_whiskey on five_lima quebec_sierra (cost=0.70..11.08 rows=1 width=33) (actual time=0.179..0.180 rows=1 loops=252383)
Index Cond: (((foxtrot_india)::text = (mike_november1.foxtrot_india)::text) AND ((mike_india)::text = (mike_november1.mike_india)::text) AND ((mike_india)::text = 'five_papa'::text) AND (sierra_uniform = mike_november1.sierra_uniform))
Filter: (bravo_lima = (delta_four 2))
Rows Removed by Filter: 6
SubPlan
-> Result (cost=5.55..5.58 rows=1 width=8) (actual time=0.013..0.013 rows=1 loops=1828065)
InitPlan
-> Limit (cost=0.70..5.55 rows=1 width=8) (actual time=0.012..0.012 rows=1 loops=1828065)
-> Index Only Scan Backward using hotel_whiskey on five_lima foxtrot_four (cost=0.70..5.55 rows=1 width=8) (actual time=0.012..0.012 rows=1 loops=1828065)
Index Cond: ((foxtrot_india = (quebec_sierra.foxtrot_india)::text) AND (mike_india = (quebec_sierra.mike_india)::text) AND (sierra_uniform = quebec_sierra.sierra_uniform) AND (oscar_quebec = quebec_sierra.oscar_quebec) AND (bravo_lima five_romeo NOT NULL))
Heap Fetches: 18062
-> Hash (cost=70.67..70.67 rows=1489 width=20) (actual time=0.644..0.644 rows=1489 loops=1)
Buckets: 2048 Batches: 1 Memory Usage: 95kB
-> Seq Scan on bravo_zulu three (cost=0.00..70.67 rows=1489 width=20) (actual time=0.048..0.416 rows=1489 loops=1)
Planning time: 24.541 ms
Execution time: 50356.651 ms
Here is the query -
explain analyze select quebec_three.mike_india,quebec_three.sierra_uniform,papa ||'('||quebec_three.mike_india||bravo_five(quebec_three.sierra_uniform::text,3,'0')||')' as papa,
sum( dms_appl_pending + dms_appl_done + no_of_fee_pending + veri_appl_pending )no_of_appl_done,
sum(veri_appl_done)veri_appl_done,sum(veri_appl_rejected)veri_appl_rejected,
sum(veri_appl_pending)veri_appl_pending,sum(app_appl_done)appr_appl_done,sum(app_appl_rejected)appr_appl_rejected,
sum(app_appl_pending)appr_appl_pending
,sum(no_of_fee_pending)no_of_fee_pending,sum(no_of_fee_done)no_of_fee_done
,sum(dms_appl_pending)dms_appl_pending,sum(dms_appl_done)dms_appl_done
from(
select quebec_three.mike_india,quebec_three.sierra_uniform,
case when right(quebec_three.tango::text,2)::int=05 and quebec_three.juliet_charlie ='C' then count(distinct quebec_three.foxtrot_india) else 0 end veri_appl_done,
case when (victor_papa='R' ) then count(quebec_three.foxtrot_india) else 0 end as veri_appl_rejected,
case when (yankee='P' ) then count(distinct quebec_three.foxtrot_india) else 0 end as veri_appl_pending,
case when whiskey='A' and (right(quebec_three.tango::text,2)::int=06 and quebec_three.juliet_charlie ='C') then count(distinct quebec_three.foxtrot_india) else 0 end app_appl_done,
case when (india_papa='R') then count(distinct quebec_three.foxtrot_india) else 0 end as app_appl_rejected,
case when (victor_charlie='P') then count(distinct quebec_three.foxtrot_india) else 0 end as app_appl_pending,
case when november_hotel='P' then count(distinct quebec_three.foxtrot_india) else 0 end as no_of_fee_pending,
case when november_hotel='A' then count(distinct quebec_three.foxtrot_india) else 0 end as no_of_fee_done,
case when (hotel_november='P' ) then count(distinct quebec_three.foxtrot_india) else 0 end as dms_appl_pending,
case when right(quebec_three.tango::text,2)::int=15 and quebec_three.juliet_charlie ='C' then count(distinct quebec_three.foxtrot_india) else 0 end dms_appl_done
from(
select distinct quebec_three.mike_india,quebec_three.sierra_uniform ,quebec_three.charlie_six,quebec_three.foxtrot_india,xray(quebec_three.delta_xray,'dd-Mon-yyyy')delta_xray,quebec_three.whiskey,quebec_sierra.tango,quebec_sierra.juliet_charlie,golf.oscar_lima,
case when right(five_hotel.tango::text,2)::int=05 and five_hotel.juliet_charlie not in ('M','I') then oscar_romeo else null end yankee,
case when right(five_hotel.tango::text,2)::int=05 and five_hotel.juliet_charlie in ('M','I') then romeo else null end victor_papa,
case when right(five_hotel.tango::text,2)::int=06 and five_hotel.juliet_charlie not in ('M','I') then oscar_romeo else null end victor_charlie,
case when right(five_hotel.tango::text,2)::int=06 and five_hotel.juliet_charlie in ('M','I') then romeo else null end india_papa,
case when golf.oscar_lima is not null then delta_foxtrot else oscar_romeo end november_hotel,
case when right(five_hotel.tango::text,2)::int=15 then oscar_romeo else null end hotel_november
from six_echo quebec_three
left join delta_echo five_hotel on five_hotel.foxtrot_india=quebec_three.foxtrot_india and five_hotel.mike_india=quebec_three.mike_india and five_hotel.sierra_uniform=quebec_three.sierra_uniform
left join five_lima quebec_sierra on quebec_sierra.foxtrot_india=quebec_three.foxtrot_india and quebec_sierra.mike_india=quebec_three.mike_india and quebec_sierra.sierra_uniform=quebec_three.sierra_uniform
and quebec_sierra.bravo_lima =(select max(bravo_lima) from five_lima foxtrot_four where foxtrot_four.foxtrot_india=quebec_sierra.foxtrot_india
and foxtrot_four.mike_india=quebec_sierra.mike_india and foxtrot_four.sierra_uniform=quebec_sierra.sierra_uniform and foxtrot_four.oscar_quebec=quebec_sierra.oscar_quebec)
left join hsrp.vt_hsrp h on h.foxtrot_india=quebec_three.foxtrot_india and h.charlie_six=quebec_three.charlie_six and h.mike_india=quebec_three.mike_india and h.sierra_uniform=quebec_three.sierra_uniform
left join two_bravo juliet_xray_delta on juliet_xray_delta.foxtrot_india=quebec_three.foxtrot_india and juliet_xray_delta.charlie_six=quebec_three.charlie_six and juliet_xray_delta.mike_india=quebec_three.mike_india and juliet_xray_delta.sierra_uniform=quebec_three.sierra_uniform
left join charlie_yankee golf on golf.foxtrot_india=quebec_three.foxtrot_india and golf.mike_india=quebec_three.mike_india and golf.sierra_uniform=quebec_three.sierra_uniform
left join five_charlie bravo_oscar on bravo_oscar.charlie_six=quebec_three.charlie_six and bravo_oscar.mike_india=quebec_three.mike_india and bravo_oscar.sierra_uniform=quebec_three.sierra_uniform
left join victor_four seven_quebec on seven_quebec.foxtrot_india=quebec_three.foxtrot_india and seven_quebec.mike_india=quebec_three.mike_india and seven_quebec.sierra_uniform=quebec_three.sierra_uniform
left join vm_vh_class vh on vh.vh_class=COALESCE(bravo_oscar.vh_class,seven_quebec.vh_class)
where quebec_three.mike_india='UP' and case when 0=0 then true else quebec_three.sierra_uniform=0 end and quebec_three.delta_xray between '2021-03-01 00:00:00.000000 +05:30' and ('2021-04-02 23:59:59.999000 +05:30'::date + interval '1 day' - interval '1 sec')
and quebec_three.oscar_quebec in (123)
)quebec_three
group by 1,2,whiskey,quebec_three.tango,quebec_three.juliet_charlie,victor_papa,yankee,india_papa,victor_charlie,november_hotel,hotel_november
)quebec_three
left join bravo_zulu three on three.mike_india=quebec_three.mike_india and three.sierra_uniform=quebec_three.sierra_uniform
group by 1,2,3 order by 3;
Adding orignal partial query/indexes/plan -
Partial query:
.....
vow4(# left join vha_status c on c.appl_no=a.appl_no and c.state_cd=a.state_cd and c.off_cd=a.off_cd
vow4(# and c.moved_on =(select max(moved_on) from vha_status c1 where c1.appl_no=c.appl_no
vow4(# and c1.state_cd=c.state_cd and c1.off_cd=c.off_cd and c1.pur_cd=c.pur_cd)
.....
vow4(# where a.state_cd='UP' and case when 0=0 then true else a.off_cd=0 end and a.appl_dt between '2021-03-01 00:00:00.000000 +05:30' and ('2021-04-02 23:59:59.999000 +05:30'::date + interval '1 day' - interval '1 sec')
vow4(# and a.pur_cd in (123)
Indexes:
"vha_status_pkey" PRIMARY KEY, btree (appl_no, pur_cd, file_movement_slno)
"idx_state_cd_vha_status" btree (state_cd)
"va_status_moved_on_indx" btree (moved_on)
"vha_status_appl_no_state_cd_off_cd_pur_cd_moved_on_idx" btree (appl_no, state_cd, off_cd, pur_cd, moved_on)
"vha_status_movedon_state_cd_off_cd_idx" btree (moved_on, state_cd, off_cd)
Partial Plan:
-> Index Scan using vha_status_appl_no_state_cd_off_cd_pur_cd_moved_on_idx on vha_status c (cost=0.70..11.08 rows=1 width=33) (actual time=0.179..0.180 rows=1 loops=252383)
Index Cond: (((appl_no)::text = (a_1.appl_no)::text) AND ((state_cd)::text = (a_1.state_cd)::text) AND ((state_cd)::text = 'UP'::text) AND (off_cd = a_1.off_cd))
Filter: (moved_on = (SubPlan 2))
Rows Removed by Filter: 6
SubPlan 2
-> Result (cost=5.55..5.58 rows=1 width=8) (actual time=0.013..0.013 rows=1 loops=1828065)
InitPlan 1 (returns $4)
-> Limit (cost=0.70..5.55 rows=1 width=8) (actual time=0.012..0.012 rows=1 loops=1828065)
-> Index Only Scan Backward using vha_status_appl_no_state_cd_off_cd_pur_cd_moved_on_idx on vha_status c1 (cost=0.70..5.55 rows=1 width=8) (actual time=0.012..0.012 rows=1 loops=1828065)
Index Cond: ((appl_no = (c.appl_no)::text) AND (state_cd = (c.state_cd)::text) AND (off_cd = c.off_cd) AND (pur_cd = c.pur_cd) AND (moved_on IS NOT NULL))
Heap Fetches: 18062
This looks like primarily that there is a nested loop on five_lima (900 million) rows are not the problem , no. of times your are querying is looks like.
Most probable solution for this would be either don't do that much query if possible, can discuss on exact solution , or else try to first limit the record by filtering and then query
In general querying that many times is not preferable.
We have a PostgreSQL query with multiple tables and left outer joins, and is running very slow.
It is completing in 25-40s, so we want to optimize it more and want to decrease run time to 1-2 sec.
select a.campaignid, b.campaign_name , case when b.message_type_id = 1 then 'Promotional'
when b.message_type_id = 2 then 'Transactional'
else 'Other' end as Campaign_type, c.username , aggregator_type,
e.cli_manager_id as senderID,
b.schedule_time as campaign_schedule_date,
count(a.mobile) as campaign_submitted_count, count(case when a.status = 'DELIVRD' then mobile end) as Delivered,
count(a.mobile) as Total_count,
count(case when a.status = 'FAILED' then mobile end) as failure_count,
count(case when a.status = 'DND_check_failed' then mobile end) as DND_count,
sum(credits_used) as credits_used
from tbl_cdr_test a left outer join tbl_campaign b
on a.campaignid = b.tbl_campaign_id left outer join tbl_users_master c
on b.user_id =c.user_master_id
left outer join tbl_cli_manager e on b.user_id = e.user_id
left outer join tbl_user_channel f on b.user_id =f.user_id
left outer join tbl_user_configurations g on b.user_id = g.user_id
where date(insert_datetime) between '2020-05-23' and '2020-06-23'
and c.username = coalesce(null, c.username)
and g.msg_cat_id = coalesce(null, g.msg_cat_id)
and a.campaignid = coalesce(null, a.campaignid)
and e.cli_manager_id = coalesce(null, e.cli_manager_id)
group by a.campaignid, b.campaign_name , b.message_type_id,c.username , b.schedule_time,
aggregator_type, e.cli_manager_id;
We have create appropriate indexes as well, but still it is taking time.
Moreover there is "external merge disk" sorting method in execution plan whereas to resolve same I have set work_mem = 50MB. Still it is using disk sort instead of memory.Please suggest
Below is execution plan:
GroupAggregate (cost=4872.01..4872.07 rows=1 width=543) (actual time=20564.239..27415.264 rows=8 loops=1)
Group Key: a.campaignid, b.campaign_name, b.message_type_id, c.username, b.schedule_time, f.aggregator_type, e.cli_manager_id
-> Sort (cost=4872.01..4872.01 rows=1 width=483) (actual time=19627.424..25020.702 rows=3206196 loops=1)
Sort Key: a.campaignid, b.campaign_name, b.message_type_id, c.username, b.schedule_time, f.aggregator_type, e.cli_manager_id
Sort Method: external merge Disk: 281456kB
-> Nested Loop (cost=22.03..4872.00 rows=1 width=483) (actual time=99.704..12086.244 rows=3206196 loops=1)
Join Filter: (b.user_id = g.user_id)
-> Nested Loop Left Join (cost=21.89..4871.79 rows=1 width=495) (actual time=99.688..4518.533 rows=3206196 loops=1)
-> Nested Loop (cost=21.75..4871.54 rows=1 width=77) (actual time=99.664..935.689 rows=356244 loops=1)
-> Nested Loop (cost=21.33..31.57 rows=1 width=65) (actual time=0.295..2.376 rows=588 loops=1)
Join Filter: (b.user_id = c.user_master_id)
-> Merge Join (cost=21.18..30.22 rows=6 width=46) (actual time=0.246..0.663 rows=588 loops=1)
Merge Cond: (e.user_id = b.user_id)
-> Index Scan using "idx_FK_7hc6agd_tbl_cli_ma_1592228110_32" on tbl_cli_manager e (cost=0.42..6281.84 rows=762 width=12) (actual time=0.014..0.035 rows=5 loops=1)
Filter: (cli_manager_id = COALESCE(cli_manager_id))
-> Sort (cost=20.76..21.13 rows=147 width=34) (actual time=0.225..0.333 rows=585 loops=1)
Sort Key: b.user_id
Sort Method: quicksort Memory: 36kB
-> Seq Scan on tbl_campaign b (cost=0.00..15.47 rows=147 width=34) (actual time=0.013..0.154 rows=147 loops=1)
-> Index Scan using ind_user_master_c_user on tbl_users_master c (cost=0.14..0.21 rows=1 width=19) (actual time=0.002..0.002 rows=1 loops=588)
Index Cond: (user_master_id = e.user_id)
Filter: ((username)::text = (COALESCE(username))::text)
-> Append (cost=0.42..4839.94 rows=3 width=20) (actual time=0.546..1.426 rows=606 loops=588)
-> Index Scan using testh11_campaignid_idx on testh11 a (cost=0.42..4253.99 rows=2 width=20) (actual time=0.543..0.543 rows=0 loops=588)
Index Cond: (campaignid = b.tbl_campaign_id)
Filter: ((campaignid = COALESCE(campaignid)) AND (date(insert_datetime) >= '2020-05-23'::date) AND (date(insert_datetime) <= '2020-06-23'::date))
Rows Removed by Filter: 656
-> Index Scan using testh21_campaignid_idx on testh21 a_1 (cost=0.42..585.94 rows=1 width=20) (actual time=0.002..0.796 rows=606 loops=588)
Index Cond: (campaignid = b.tbl_campaign_id)
Filter: ((campaignid = COALESCE(campaignid)) AND (date(insert_datetime) >= '2020-05-23'::date) AND (date(insert_datetime) <= '2020-06-23'::date))
-> Index Scan using idx_user_id_tbl_user_c_1592227657_19 on tbl_user_channel f (cost=0.14..0.24 rows=1 width=422) (actual time=0.002..0.004 rows=9 loops=356244)
Index Cond: (user_id = b.user_id)
-> Index Scan using "idx_FK_6958qvy_tbl_user_c_1592228774_151" on tbl_user_configurations g (cost=0.14..0.20 rows=1 width=8) (actual time=0.002..0.002 rows=1 loops=3206196)
Index Cond: (user_id = e.user_id)
Filter: (msg_cat_id = COALESCE(msg_cat_id))
Planning Time: 6.561 ms
Execution Time: 27477.860 ms
There is a gross underestimate of the result rows for the index scan on testh21. The consequence is that PostgreSQL chooses nested loop joins, which is where your time is spent.
Try the following:
New statistics:
ANALYZE testh21;
If that improves the estimate, make sure that autoanalyze treats the table more often.
Prevent bad estimates caused by correlation:
CREATE STATISTICS testh21_stat (dependencies)
ON campaignid, insert_datetime FROM testh21;
ANALYZE testh21;
Perhaps there is a correlation between the columns, and that improves the estimate.
More detailed statistics: try raising default_statistics_target before ANALYZE of the table.
If you cannot improve the estimates, take the hammer and set enable_nestloop = off for the duration of the query.
I have a query where the Postgres is performing a Hash join with sequence scan instead of an Index join with Nested loop, when I use an OR condition. This is causing the query to take 2 seconds instead of completing in < 100ms. I have run VACUUM ANALYZE and have rebuilt the index on the PATIENTCHARTNOTE table (which is about 5GB) but its still using hash join. Do you have any suggestions on how I can improve this?
explain analyze
SELECT Count (_pcn.id) AS total_open_note
FROM patientchartnote _pcn
INNER JOIN appointment _appt
ON _appt.id = _pcn.appointment_id
INNER JOIN patient _pt
ON _pt.id = _appt.patient_id
LEFT OUTER JOIN person _ps
ON _ps.id = _pt.appuser_id
WHERE _pcn.active = true
AND _pt.active = true
AND _appt.datecomplete IS NULL
AND _pcn.title IS NOT NULL
AND _pcn.title <> ''
AND ( _pt.assigned_to_user_id = '136964'
OR _pcn.createdby_id = '136964'
);
Aggregate (cost=237655.59..237655.60 rows=1 width=8) (actual time=1602.069..1602.069 rows=1 loops=1)
-> Hash Join (cost=83095.43..237645.30 rows=4117 width=4) (actual time=944.850..1602.014 rows=241 loops=1)
Hash Cond: (_appt.patient_id = _pt.id)
Join Filter: ((_pt.assigned_to_user_id = 136964) OR (_pcn.createdby_id = 136964))
Rows Removed by Join Filter: 94036
-> Hash Join (cost=46650.68..182243.64 rows=556034 width=12) (actual time=415.862..1163.812 rows=94457 loops=1)
Hash Cond: (_pcn.appointment_id = _appt.id)
-> Seq Scan on patientchartnote _pcn (cost=0.00..112794.20 rows=1073978 width=12) (actual time=0.016..423.262 rows=1
073618 loops=1)
Filter: (active AND (title IS NOT NULL) AND ((title)::text <> ''::text))
Rows Removed by Filter: 22488
-> Hash (cost=35223.61..35223.61 rows=696486 width=8) (actual time=414.749..414.749 rows=692839 loops=1)
Buckets: 131072 Batches: 16 Memory Usage: 2732kB
-> Seq Scan on appointment _appt (cost=0.00..35223.61 rows=696486 width=8) (actual time=0.010..271.208 rows=69
2839 loops=1)
Filter: (datecomplete IS NULL)
Rows Removed by Filter: 652426
-> Hash (cost=24698.57..24698.57 rows=675694 width=12) (actual time=351.566..351.566 rows=674929 loops=1)
Buckets: 131072 Batches: 16 Memory Usage: 2737kB
-> Seq Scan on patient _pt (cost=0.00..24698.57 rows=675694 width=12) (actual time=0.013..197.268 rows=674929 loops=
1)
Filter: active
Rows Removed by Filter: 17426
Planning time: 1.533 ms
Execution time: 1602.715 ms
When I replace "OR _pcn.createdby_id = '136964'" with "AND _pcn.createdby_id = '136964'", Postgres performs an index scan
Aggregate (cost=29167.56..29167.57 rows=1 width=8) (actual time=937.743..937.743 rows=1 loops=1)
-> Nested Loop (cost=1.28..29167.55 rows=7 width=4) (actual time=19.136..937.669 rows=37 loops=1)
-> Nested Loop (cost=0.85..27393.03 rows=1654 width=4) (actual time=2.154..910.250 rows=1649 loops=1)
-> Index Scan using patient_activeassigned_idx on patient _pt (cost=0.42..3075.00 rows=1644 width=8) (actual time=1.
599..11.820 rows=1627 loops=1)
Index Cond: ((active = true) AND (assigned_to_user_id = 136964))
Filter: active
-> Index Scan using appointment_datepatient_idx on appointment _appt (cost=0.43..14.75 rows=4 width=8) (actual time=
0.543..0.550 rows=1 loops=1627)
Index Cond: ((patient_id = _pt.id) AND (datecomplete IS NULL))
-> Index Scan using patientchartnote_activeappointment_idx on patientchartnote _pcn (cost=0.43..1.06 rows=1 width=8) (actual time=0.014..0.014 rows=0 loops=1649)
Index Cond: ((active = true) AND (createdby_id = 136964) AND (appointment_id = _appt.id) AND (title IS NOT NULL))
Filter: (active AND ((title)::text <> ''::text))
Planning time: 1.489 ms
Execution time: 937.910 ms
(13 rows)
Using OR in SQL queries usually results in bad performance.
That is because – different from AND – it does not restrict, but extend the number of rows in the query result. With AND, you can use an index scan for one part of the condition and further restrict the result set with a filter on the second condition. That is not possible with OR.
So PostgreSQL does the only thing left: it computes the whole join and then filters out all rows that do not match the condition. Of course that is very inefficient when you are joining three tables (I didn't count the outer join).
Assuming that all columns called id are primary keys, you could rewrite the query as follows:
SELECT count(*) FROM
(SELECT _pcn.id
FROM patientchartnote _pcn
INNER JOIN appointment _appt
ON _appt.id = _pcn.appointment_id
INNER JOIN patient _pt
ON _pt.id = _appt.patient_id
LEFT OUTER JOIN person _ps
ON _ps.id = _pt.appuser_id
WHERE _pcn.active = true
AND _pt.active = true
AND _appt.datecomplete IS NULL
AND _pcn.title IS NOT NULL
AND _pcn.title <> ''
AND _pt.assigned_to_user_id = '136964'
UNION
SELECT _pcn.id
FROM patientchartnote _pcn
INNER JOIN appointment _appt
ON _appt.id = _pcn.appointment_id
INNER JOIN patient _pt
ON _pt.id = _appt.patient_id
LEFT OUTER JOIN person _ps
ON _ps.id = _pt.appuser_id
WHERE _pcn.active = true
AND _pt.active = true
AND _appt.datecomplete IS NULL
AND _pcn.title IS NOT NULL
AND _pcn.title <> ''
AND _pcn.createdby_id = '136964'
) q;
Even though this is running the query twice, indexes can be used to filter out all but a few rows early on, so this query should perform better.
I did a tree structure using id and parent_id in the same table. For query's I'm using CTE provide by PostgreSQL, but it's spend so much time to do the joins with recursive results. For example, by the time I have 100 records on sadt_lot table, and this query is spend 8 seconds to return the results. Someone have a better idea to do it?
WITH RECURSIVE downlots as (
SELECT s1.sadt_lot_id, 0 AS level, s1.sadt_lot_id as root_id
FROM sadt_lot s1
WHERE s1.parent_lot_id IS NULL
UNION
SELECT s2.sadt_lot_id, d.level + 1, d.sadt_lot_id as root_id
FROM sadt_lot s2
INNER JOIN downlots d ON d.sadt_lot_id = s2.parent_lot_id
)
SELECT
"s"."sadt_lot_id",
"s"."name", concat(lpad(s.sadt_lot_id::TEXT, 3, '0'), '-', to_char(to_timestamp(s.created_at), 'DDMMYY')) sadt_lot_code,
"s"."created_at" AS "created_at",
"s"."version" AS "version", "s"."sadt_lot_status_id",
SUM(procedure_performed.amount_requested) procedures_total,
SUM(procedure_performed.total_value) procedures_total_value
FROM "sadt_lot" "s"
LEFT JOIN "sadt" ON sadt.sadt_lot_id = any(SELECT sadt_lot_id FROM downlots WHERE root_id = s.sadt_lot_id)
LEFT JOIN "procedure_auth" ON sadt.procedure_auth_id = procedure_auth.procedure_auth_id
LEFT JOIN "procedure_performed" ON procedure_auth.procedure_auth_id = procedure_performed.procedure_auth_id
WHERE "s"."parent_lot_id" IS NULL
GROUP BY "s"."sadt_lot_id"
ORDER BY "created_at" DESC
Other example listing all sadt's grouped by root sadt_lot's:
EXPLAIN ANALYZE WITH RECURSIVE downlots as (
SELECT sl1.sadt_lot_id, 0 AS level, sl1.sadt_lot_id as root_id
FROM sadt_lot sl1
WHERE sl1.parent_lot_id IS NULL
UNION
SELECT sl2.sadt_lot_id, d.level + 1, d.sadt_lot_id as root_id
FROM sadt_lot sl2
INNER JOIN downlots d ON d.sadt_lot_id = sl2.parent_lot_id
)
SELECT sl.sadt_lot_id, array_agg(s.sadt_id)
FROM sadt_lot sl
LEFT JOIN sadt s ON s.sadt_lot_id = any(SELECT sadt_lot_id FROM downlots WHERE root_id = sl.sadt_lot_id)
WHERE sl.parent_lot_id IS NULL
group by sl.sadt_lot_id
ORDEr By sl.sadt_lot_id
Query Plan
GroupAggregate (cost=42.53..15077.74 rows=1 width=36) (actual time=104.090..8436.505 rows=90 loops=1)
Group Key: sl.sadt_lot_id
CTE downlots
-> Recursive Union (cost=0.00..42.39 rows=101 width=12) (actual time=0.006..0.104 rows=95 loops=1)
-> Seq Scan on sadt_lot sl1 (cost=0.00..2.94 rows=1 width=12) (actual time=0.005..0.019 rows=90 loops=1)
Filter: (parent_lot_id IS NULL)
Rows Removed by Filter: 5
-> Hash Join (cost=0.33..3.74 rows=10 width=12) (actual time=0.027..0.028 rows=2 loops=2)
Hash Cond: (sl2.parent_lot_id = d.sadt_lot_id)
-> Seq Scan on sadt_lot sl2 (cost=0.00..2.94 rows=94 width=8) (actual time=0.002..0.008 rows=95 loops=2)
-> Hash (cost=0.20..0.20 rows=10 width=8) (actual time=0.010..0.010 rows=48 loops=2)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
-> WorkTable Scan on downlots d (cost=0.00..0.20 rows=10 width=8) (actual time=0.001..0.004 rows=48 loops=2)
-> Nested Loop Left Join (cost=0.14..15004.14 rows=6242 width=8) (actual time=8.234..8434.229 rows=11345 loops=1)
Join Filter: (SubPlan 2)
Rows Removed by Join Filter: 1112125
-> Index Only Scan using sadt_lot_sadt_lot_id_parent_lot_id_idx on sadt_lot sl (cost=0.14..12.86 rows=1 width=4) (actual time=0.011..0.252 rows=90 loops=1)
Index Cond: (parent_lot_id IS NULL)
Heap Fetches: 90
-> Seq Scan on sadt s (cost=0.00..635.83 rows=12483 width=8) (actual time=0.002..1.785 rows=12483 loops=90)
SubPlan 2
-> CTE Scan on downlots (cost=0.00..2.27 rows=1 width=4) (actual time=0.003..0.007 rows=1 loops=1123470)
Filter: (root_id = sl.sadt_lot_id)
Rows Removed by Filter: 94
Planning time: 0.203 ms
Execution time: 8436.598 ms
try EXPLAIN ANALYZE
WITH RECURSIVE downlots as (
SELECT sl1.sadt_lot_id, 0 AS level, sl1.sadt_lot_id as root_id
FROM sadt_lot sl1
WHERE sl1.parent_lot_id IS NULL
UNION
SELECT sl2.sadt_lot_id, d.level + 1, d.sadt_lot_id as root_id
FROM sadt_lot sl2
INNER JOIN downlots d ON d.sadt_lot_id = sl2.parent_lot_id
)
SELECT downlots.root_id AS sadt_lot_id, array_agg(s.sadt_id)
FROM downlots
LEFT JOIN sadt s ON s.sadt_lot_id = downlots.sadt_lot_id
GROUP BY downlots.root_id
ORDER BY downlots.root_id
I found the solution. I was using the recursive expression how parameter to join and it was do several lops on the table used on join, a better aprouch is before join with this table(sadt), do the join with recursive expression(downlots "table") and after, using result, do join with the sadt, with that, the query jump from 8sec to 8ms.
Follow the solution:
EXPLAIN ANALYZE SELECT sl.sadt_lot_id, array_agg(s.sadt_id)
FROM sadt_lot sl
LEFT JOIN (WITH RECURSIVE downlots as (
SELECT sl1.sadt_lot_id, 0 AS level, sl1.sadt_lot_id as root_id
FROM sadt_lot sl1
WHERE sl1.parent_lot_id IS NULL
UNION
SELECT sl2.sadt_lot_id, d.level + 1, d.sadt_lot_id as root_id
FROM sadt_lot sl2
INNER JOIN downlots d ON d.sadt_lot_id = sl2.parent_lot_id
)SELECT * FROM downlots) d ON d.sadt_lot_id = sl.sadt_lot_id
LEFT JOIN sadt s ON s.sadt_lot_id = d.root_id
WHERE sl.parent_lot_id IS NULL
group by sl.sadt_lot_id
ORDEr By sl.sadt_lot_id
Query Plan
Sort (cost=1935.35..1935.56 rows=82 width=36) (actual time=8.230..8.234 rows=82 loops=1)
Sort Key: sl.sadt_lot_id
Sort Method: quicksort Memory: 75kB
-> HashAggregate (cost=1931.72..1932.74 rows=82 width=36) (actual time=8.085..8.197 rows=82 loops=1)
Group Key: sl.sadt_lot_id
-> Hash Right Join (cost=469.73..1839.25 rows=18493 width=8) (actual time=0.328..6.273 rows=10742 loops=1)
Hash Cond: (s.sadt_lot_id = downlots.root_id)
-> Seq Scan on sadt s (cost=0.00..645.78 rows=12678 width=8) (actual time=0.007..1.406 rows=12493 loops=1)
-> Hash (cost=465.72..465.72 rows=321 width=8) (actual time=0.242..0.242 rows=82 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 12kB
-> Hash Right Join (cost=432.42..465.72 rows=321 width=8) (actual time=0.049..0.232 rows=82 loops=1)
Hash Cond: (downlots.sadt_lot_id = sl.sadt_lot_id)
-> CTE Scan on downlots (cost=428.41..444.05 rows=782 width=12) (actual time=0.007..0.167 rows=96 loops=1)
CTE downlots
-> Recursive Union (cost=0.00..428.41 rows=782 width=12) (actual time=0.006..0.143 rows=96 loops=1)
-> Seq Scan on sadt_lot sl1 (cost=0.00..2.99 rows=82 width=12) (actual time=0.004..0.018 rows=82 loops=1)
Filter: (parent_lot_id IS NULL)
Rows Removed by Filter: 14
-> Hash Join (cost=4.23..40.98 rows=70 width=12) (actual time=0.030..0.031 rows=5 loops=3)
Hash Cond: (d.sadt_lot_id = sl2.parent_lot_id)
-> WorkTable Scan on downlots d (cost=0.00..16.40 rows=820 width=8) (actual time=0.000..0.002 rows=32 loops=3)
-> Hash (cost=2.99..2.99 rows=99 width=8) (actual time=0.069..0.069 rows=14 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
-> Seq Scan on sadt_lot sl2 (cost=0.00..2.99 rows=99 width=8) (actual time=0.004..0.061 rows=96 loops=1)
-> Hash (cost=2.99..2.99 rows=82 width=4) (actual time=0.039..0.039 rows=82 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 11kB
-> Seq Scan on sadt_lot sl (cost=0.00..2.99 rows=82 width=4) (actual time=0.014..0.028 rows=82 loops=1)
Filter: (parent_lot_id IS NULL)
Rows Removed by Filter: 14
Planning time: 0.225 ms
Execution time: 8.300 ms