Why isn't PostgreSQL using index for this join query - postgresql

explain (analyze) select
event_user_detail.*
from event_user_detail
inner join guest_list on event_user_detail.guest_list_id = guest_list.id
where
guest_list.event_id=2985739029
Results in the following query plan:
Gather (cost=1052.56..43408.58 rows=244 width=97) (actual time=66.570..67.810 rows=0 loops=1)
Workers Planned: 2
Workers Launched: 2
-> Nested Loop (cost=52.56..42384.18 rows=102 width=97) (actual time=57.183..57.183 rows=0 loops=3)
-> Parallel Seq Scan on guest_list (cost=0.00..13151.33 rows=5 width=8) (actual time=56.941..57.169 rows=2 loops=3)
Filter: (event_id = '2985739029'::bigint)
Rows Removed by Filter: 254489
-> Bitmap Heap Scan on event_user_detail (cost=52.56..5830.93 rows=1564 width=97) (actual time=0.007..0.007 rows=0 loops=5)
Recheck Cond: (guest_list_id = guest_list.id)
-> Bitmap Index Scan on idx_event_user_detail_guest_list_id (cost=0.00..52.17 rows=1564 width=0) (actual time=0.005..0.005 rows=0 loops=5)
Index Cond: (guest_list_id = guest_list.id)
Planning time: 0.252 ms
Execution time: 67.838 ms
Even tho there is an index on guest_list(event_id). Can someone explain why this is happening and if there is some way to fix it?
If I split this up in 2 queries, of which one is just to get the guest_list ids, and then do a simple in (...ids) then the query is super quick. I tried doing the same with a subquery, but I think the optimiser made it into a join.
-- ----------------------------
-- Table structure for guest_list
-- ----------------------------
DROP TABLE IF EXISTS "public"."guest_list";
CREATE TABLE "public"."guest_list" (
"id" int8 NOT NULL,
"creation_date" timestamp(6),
"last_modification_date" timestamp(6),
"uuid" uuid,
"deleted" bool NOT NULL,
"name" varchar(255) COLLATE "pg_catalog"."default",
"version" int4,
"event_id" int8,
"permanent_guest_list_id" int8,
"color" varchar(255) COLLATE "pg_catalog"."default"
)
;
-- ----------------------------
-- Indexes structure for table guest_list
-- ----------------------------
CREATE INDEX "idx_guest_list_event_id" ON "public"."guest_list" USING btree (
"event_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
CREATE INDEX "idx_guest_list_permanent_guest_list_id" ON "public"."guest_list" USING btree (
"permanent_guest_list_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
-- ----------------------------
-- Uniques structure for table guest_list
-- ----------------------------
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "uk_o4sa0dw6lcdjv96gl2p96xwki" UNIQUE ("uuid");
-- ----------------------------
-- Primary Key structure for table guest_list
-- ----------------------------
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "guest_list_pkey" PRIMARY KEY ("id");
-- ----------------------------
-- Foreign Keys structure for table guest_list
-- ----------------------------
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "fk7tk6fxgyo4h7ykelb9c0pe5ap" FOREIGN KEY ("event_id") REFERENCES "public"."event" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "guest_list_permanent_guest_list_id_fkey" FOREIGN KEY ("permanent_guest_list_id") REFERENCES "public"."permanent_guest_list" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
-- ----------------------------
-- Table structure for event_user_detail
-- ----------------------------
DROP TABLE IF EXISTS "public"."event_user_detail";
CREATE TABLE "public"."event_user_detail" (
"id" int8 NOT NULL,
"creation_date" timestamp(6),
"last_modification_date" timestamp(6),
"uuid" uuid,
"deleted" bool NOT NULL,
"name" varchar(255) COLLATE "pg_catalog"."default",
"value" text COLLATE "pg_catalog"."default",
"version" int4,
"event_id" int8,
"user_id" int8,
"guest_list_id" int8,
"reference_user_id" int8
)
;
-- ----------------------------
-- Indexes structure for table event_user_detail
-- ----------------------------
CREATE INDEX "idx_event_user_detail_deleted" ON "public"."event_user_detail" USING btree (
"deleted" "pg_catalog"."bool_ops" ASC NULLS LAST
);
CREATE INDEX "idx_event_user_detail_event_id" ON "public"."event_user_detail" USING btree (
"event_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
CREATE INDEX "idx_event_user_detail_guest_list_id" ON "public"."event_user_detail" USING btree (
"guest_list_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
CREATE INDEX "idx_event_user_detail_user_id" ON "public"."event_user_detail" USING btree (
"user_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
-- ----------------------------
-- Uniques structure for table event_user_detail
-- ----------------------------
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "uk_orfh8fkwtk681af38a65everr" UNIQUE ("uuid");
-- ----------------------------
-- Primary Key structure for table event_user_detail
-- ----------------------------
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "event_user_detail_pkey" PRIMARY KEY ("id");
-- ----------------------------
-- Foreign Keys structure for table event_user_detail
-- ----------------------------
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fk8bffonom9l1fgcanegl9nm641" FOREIGN KEY ("user_id") REFERENCES "public"."user" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fk_event_user_detail_guest_list_id" FOREIGN KEY ("guest_list_id") REFERENCES "public"."guest_list" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fk_event_user_detail_reference_user_id" FOREIGN KEY ("reference_user_id") REFERENCES "public"."user" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fkisr2ccpapw537ntw4c0mlytcw" FOREIGN KEY ("event_id") REFERENCES "public"."event" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;

It vastly overestimates how many rows in event_user_detail it is going to find for each row in guest_list (probably because there is some row(s) in guest_list which does have a lot of entries in event_user_detail, just not the ones you are selecting here). The large number of rows it thinks it is going to find makes parallel query look attractive, but the way to get that parallel query is by using the seq scan on guest_list. So that is what it does.
You can disable parallel queries by setting max_parallel_workers_per_gather to 0. If you don't get much benefit from parallel query anyway, this may be a good "real" solution for you. If you do get a benefit from it and don't want to disable it, then you can at least do this just in the current session to see if my theory is correct.

I concur with jjanes' answer, but I want to suggest these additional experiments:
Try to ANALYZE event_user_detail; and see if that improves the estimate.
It could be that random_page_cost is set too high: it is designed for spinning disks and estimates index scans as comparatively expensive. If you lower that parameter, PostgreSQL will be more ready to use index scans.

You can do using CTE:
WITH guest AS (
SELECT id FROM guest_list WHERE event_id=2985739029 LIMIT 1
)
SELECT * FROM event_user_detail WHERE guest_list_id IN (SELECT id FROM guest)
CTE in older versions of postgresql runs like separate queries in one transactions and planned independently but doesn't send results from CTE to client.
You can read about them in the docs. Beware that this behaviour changed since 12 version of postgres and if you want to preserve old you should write it like:
WITH guest AS MATERIALIZED (
SELECT id FROM guest_list WHERE event_id=2985739029 LIMIT 1
)
SELECT * FROM event_user_detail WHERE guest_list_id IN (SELECT id FROM guest)
Also they are very useful to avoid deadlocks in updates:
WITH to_update AS (
SELECT * FROM my_table WHERE condition
ORDER BY id ASC FOR UPDATE
)
UPDATE my_table SET ... WHERE condition;
This would make all rows lock in certain order which prevents deadlocks which possible with plain update queries (e.g. both queries need to modify ids 1 and 2, and with this CTE there cannot be that first lock 1 and wait 2 while second lock 2 and wait for 1).

Related

Simple POSTGRESQL SELECT query too slow

I have a table that stores logs from an Electronic Invoicing System webservice, this is my SQL Structure
CREATE TABLE public.eis_transactions
(
id bigint NOT NULL DEFAULT nextval('eis_transactions_id_seq'::regclass),
operation_type character varying COLLATE pg_catalog."default",
sale_id integer,
delivery_note_id integer,
sale_credit_note_id integer,
debit_note_id integer,
cdc text COLLATE pg_catalog."default",
transaction_id text COLLATE pg_catalog."default",
response_code character varying COLLATE pg_catalog."default",
response_description text COLLATE pg_catalog."default",
xml text COLLATE pg_catalog."default",
response_xml text COLLATE pg_catalog."default",
response_datetime timestamp without time zone,
created timestamp without time zone,
modified timestamp without time zone,
user_id integer,
async boolean DEFAULT false,
url character varying COLLATE pg_catalog."default",
final_xml text COLLATE pg_catalog."default",
CONSTRAINT eis_transactions_pkey PRIMARY KEY (id),
CONSTRAINT eis_transactions_debit_note_id_fkey FOREIGN KEY (debit_note_id)
REFERENCES public.debit_notes (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_delivery_note_id_fkey FOREIGN KEY (delivery_note_id)
REFERENCES public.delivery_notes (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_sale_credit_note_id_fkey FOREIGN KEY (sale_credit_note_id)
REFERENCES public.sale_credit_notes (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_sale_id_fkey FOREIGN KEY (sale_id)
REFERENCES public.sales (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_user_id_fkey FOREIGN KEY (user_id)
REFERENCES public.users (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.eis_transactions
OWNER to postgres;
-- Index: eis_transactions_id_idx
-- DROP INDEX public.eis_transactions_id_idx;
CREATE INDEX eis_transactions_id_idx
ON public.eis_transactions USING btree
(id ASC NULLS LAST)
TABLESPACE pg_default;
-- Index: eis_transactions_id_idx1
-- DROP INDEX public.eis_transactions_id_idx1;
CREATE INDEX eis_transactions_id_idx1
ON public.eis_transactions USING btree
(id ASC NULLS FIRST)
TABLESPACE pg_default;
-- Index: eis_transactions_id_idx2
-- DROP INDEX public.eis_transactions_id_idx2;
CREATE INDEX eis_transactions_id_idx2
ON public.eis_transactions USING btree
(id DESC NULLS FIRST)
TABLESPACE pg_default;
-- Index: eis_transactions_sale_id_delivery_note_id_sale_credit_note__idx
-- DROP INDEX public.eis_transactions_sale_id_delivery_note_id_sale_credit_note__idx;
CREATE INDEX eis_transactions_sale_id_delivery_note_id_sale_credit_note__idx
ON public.eis_transactions USING btree
(sale_id ASC NULLS LAST, delivery_note_id ASC NULLS LAST, sale_credit_note_id ASC NULLS LAST, debit_note_id ASC NULLS LAST, user_id ASC NULLS LAST)
TABLESPACE pg_default;
Cointains ~800 rows, this is the query:
SELECT * FROM eis_transactions LIMIT 1000;
It takes more than 60 seconds to complete the query.
And this is the EXPLAIN ANALYZE result i got:
EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM eis_transactions LIMIT 100;
Limit (cost=0.00..15.94 rows=100 width=1108) (actual time=0.013..0.121 rows=100 loops=1)
Buffers: shared read=15
-> Seq Scan on eis_transactions (cost=0.00..128.03 rows=803 width=1108) (actual time=0.012..0.106 rows=100 loops=1)
Buffers: shared read=15
Total runtime: 0.180 ms
But doing a SELECT * FROM eis_transactions (With or without LIMIT) will take more than 60 seconds. While i have other tables with more than 1000 and they don't take so long as this particular table.
What could be wrong ?
Thank you !

Postgres not using index in simple query

I have a simple table storing a few values.
create table customer_usage
(
id bigserial not null
constraint customer_usage_pk
primary key,
customer_id bigint not null
constraint customer_usage_customer_id_fk
references crm.customer,
profile_id bigint not null,
supplier_id bigint not null,
direction smallint not null,
pod varchar not null,
trunced_date date not null,
period_end timestamp with time zone not null,
usage_wh real not null,
created_at timestamp with time zone default now() not null
);
create unique index customer_usage_id_uindex
on customer_usage (id);
create index customer_usage_profile_id_index
on customer_usage (profile_id);
create index customer_usage_supplier_id_index
on customer_usage (supplier_id);
create index customer_usage_trunced_date_index
on customer_usage (trunced_date);
alter table edm.customer_usage cluster on "customer_usage_trunced_date_index";
When I try to query data for for a specific "trunced_date" the explain plan shows, that it is NOT using the clustered index for this very column.
explain analyze select * from edm.customer_usage where trunced_date = '2021-05-26';
Explain plan:
QUERY PLAN
Seq Scan on customer_usage (cost=0.00..48792.40 rows=1495664 width=92) (actual time=0.053..20115.107 rows=1494912 loops=1)
Filter: (trunced_date = '2021-05-26'::date)
Rows Removed by Filter: 254880
Planning Time: 0.370 ms
Execution Time: 37914.739 ms
I don't understand, why -- in the simplest query asking specifically for one column that has a clustered index -- this index is not used.
Thank you very much for your help.
Fritz

Postgres selecting bad execution plan. vaccuum analyze doesn't seem to change its mind

This query
select "key","job","sentDate","scheduledDate","status","recipient","mergeVariables","opens","clicks","smtpEvents", "$$meta.deleted", "$$meta.created", "$$meta.modified", "$$meta.version", "$$meta.deleted", "$$meta.created", "$$meta.modified" from "emails"
where "emails"."$$meta.deleted" = false
and "job" in ('6f0b0288-6edd-408f-a0a9-8406fcf4bd88','a36c901c-b2df-427f-8a83-8e7072c1ad55','87127ee7-b13f-4a60-b981-65ea91988bcd','76a3eef0-32b3-4cd2-b6df-2e3360ec484a','893fd688-e789-49b8-9f95-cbaf84520852','3dc85b85-2de4-4e71-b9e0-26dbb122acfd','ae0615b1-2520-45d0-9159-b7794e535bc5','39562342-afa3-4054-82c6-cda103b205d7','6995b876-1781-4e84-b6cb-437ccb45fd4c','adfd15ce-e68a-405c-a18c-fa01daa711ea','901a9e3a-2c0d-476d-97b3-c64b954f0ecc','e6dec9f0-f670-4187-b0a4-fb8a676f0016','a373c541-32c8-4070-8ac0-209683257fe5','5ff6cec9-794f-49f1-9043-cc9120c3b1d4','3d2226c2-7559-41c7-b1e5-688830693ca2','b08bcf0b-fde2-4079-bfc4-aeff9bac48a7','e82eae7f-4e41-410d-9eb9-4b49d7ccde11','d0ae300f-5f8a-4851-9c56-3d87dad3ce2e','fc3c11df-7cd6-4819-888f-8abd2e32367b','0a27151a-3f33-488c-a3e6-4ffcfe9f7020','9b89d3f2-4484-4109-aaeb-382ef480b0e1','9ef54c01-2fe2-4ed9-8d34-6f3fa8108040','21a63e3a-bbdc-43e3-9c73-73c94e8a3ac3','f90ac6c0-d422-4e8c-9dcc-64a8a38f15e4','39d2c420-fbb1-4883-a184-9670c3b5ecb6','63681ea2-e567-4f6f-9b64-32c63d7d7f67','38d7e27b-86ce-4e05-a2eb-a50925e8afab','96dd4ec2-f2d6-44b4-bb0c-97025f6af7e5','4a9cb1ab-6a3e-47d5-9348-51efd918883f','cdd6b061-3a05-47cf-bba3-067ce03d81e7','ef9b60da-b26e-4f60-805a-5b1778a08288','0dcb9ea7-fa78-4c64-bf4b-4d62eb27be8e','104f4306-042c-4df9-bbf2-7c6d7ec5999d','340e95e3-0ff1-435b-babb-029533cd67ff','f5e8c4a1-a0ec-44b7-a84c-4e2bea03dab6','3acb7147-1fc9-4911-93b1-28b0b3316027','7874342e-b3d2-48e1-9ab8-bf3896ff5d69','3c1083e9-9b62-4ae4-9969-55e5813cb566','77afa7ae-436c-4d81-8917-a7bd787447ca','6615613e-5e22-48ce-bca6-1098c086d194','e2e28dab-9e68-41ca-8f98-a95e5c711d7d','ac1140cb-b3f0-4d4c-9236-8242851a4594','53254e80-eaee-4609-b141-b5f3eb50b33d','565fc864-5088-47f4-a5d3-4d1ea2b74e4b','fa4c7805-7208-4a17-9fcb-bfb4cdbbb6b8','7b5a0507-b59c-4de5-b738-7095c561fdd7','4727cc2b-7cb2-4009-bcd5-d20b22390b7d','7ed66544-7d5a-4eda-9cbf-f2a9ab2e8714','327989bd-83d7-4950-81a3-d9569f4b9bd8','aa4ae2b3-b3b0-41ef-8e7b-894fa85c9c70','3fe328f5-5ee6-46bb-8448-dd7e306400fb','29b2c9e0-8302-44bd-939d-3d8a5e242902','7303852d-6b5f-4210-a2df-b755dcc81417','6cb6d3ca-9ba5-4c2d-8cf3-08168cd14933','12fd7ace-5755-4b71-ac9f-cd0fb529873e','dd3a2020-3378-4603-8d70-60047e8189cc','cc9240c4-9d28-4d82-825d-bb88be3ec640','bbf5f70d-d828-44e1-867e-ff29c8945c1b','a11f458e-3176-4ffd-aaf4-71c6aca70a53','24d574d0-57de-41b3-9380-8968c0ab02f5','57de56d7-cdc2-4004-9853-65df9c3e0871','cef46cd7-bac5-4f46-a9a5-b0941553e3ce','9d344984-8164-45ce-a58a-715599f5fd0c','59598d21-90d5-4952-88be-e128aedda324','0f7c1a47-0a65-4d6e-a4e6-7ad5ea5e3afc','78a517bb-9686-4049-9dc2-43bba20916cb','fcf999fe-bbbf-462a-be89-2b6993501c6f','44f9ec05-5408-4778-9dc8-adbd43443af7','25766690-819a-42c1-84af-04149691a852','a923dfec-b368-49bb-af76-7a542bb5b3fb','fee7d0ca-3d74-46a1-8c22-31285af660fb','3b24f58d-1203-45d5-b718-1e2bc51bd811','48d36f2f-aa31-4018-8318-1a0e8c7d20ba','b43093b2-50ad-498f-84b1-b0181ac54d0e','b0310d94-f516-49ea-97f1-2289725a7bdb','9a58a202-91e0-47b5-bc2c-346885ab21ab','0430cc10-7141-4cc3-b4e0-07a4327e9f75','63986387-5157-4f7e-9a22-b667bc82de8f','78339b8b-1351-401d-8c03-1b5674c87f9c','9b97dadb-366a-431f-9af4-192844e9ea86','b5504148-8231-41dd-a316-96b2ec2b4b24','58ab8320-5a21-42f6-b8f2-c487cff59116','a322eb6e-6fb6-4a2a-9dee-293bf9285ae5','2f621d24-7927-4be5-a31b-8ac896cb5c21','865bfbbc-c2d7-466d-8ab3-3b1e30e87b68','f33fb2d8-9a12-4aea-bfa4-55eadbbe63c8','7f929ff1-da47-41e9-abc9-dc26f9158ae2','7a33ccb8-5728-4153-97bc-9bf85b715b20','1736fe52-9a78-442d-8d3d-d14e50791b47','9eb276eb-273d-4e20-8bf3-7fa59ab41cc5','1767d575-c13d-46be-903c-23c667341968','ad6c5b99-4840-4970-b621-0b24992ddcd7','f6b6e795-e53b-4443-8922-3011814651d9','91e17445-4349-4577-b327-90482baa177c','90c42f66-40c4-4121-901f-e27aa94818b8','7aa8f73a-b6f8-4b76-9ad7-c0e0e9ca5158')
order by "$$meta.created" asc,"key" asc limit 500
takes about 4 minutes, because it is using the order by index (https://explain.depesz.com/s/FXqx). if, however, i change it to limit 5000, it is done in <100ms (https://explain.depesz.com/s/OajM)
i've ran vaccuum analyze etc, but it doesn't seem to change the execution path.
i'm looking for advice, without changing the query for limit 500 vs 5000. ideally the sorted index is only used if there is no job filter.
Edit:
i have these indexes:
CREATE INDEX "emails_$$meta.created_key_idx"
ON vsko_mailer_api_prod.emails USING btree
("$$meta.created" ASC NULLS LAST, key ASC NULLS LAST)
TABLESPACE pg_default
WHERE NOT "$$meta.deleted";
also:
CREATE INDEX emails_emailjob
ON vsko_mailer_api_prod.emails USING btree
(job ASC NULLS LAST)
TABLESPACE pg_default;
and a few others that aren't relevant, i think.
I just added this one:
CREATE INDEX emails_emailjob_not_deleted
ON vsko_mailer_api_prod.emails USING hash
(job)
TABLESPACE pg_default
WHERE NOT "$$meta.deleted";
it made the limit 5000 even faster, but no difference on limit 500
edit 2:
https://explain.depesz.com/s/DOyT (limit 5000)
https://explain.depesz.com/s/Pi93 (limit 500)
all indexes:
"emails" "emails_$$meta.created_key_idx" "CREATE INDEX ""emails_$$meta.created_key_idx"" ON vsko_mailer_api_prod.emails USING btree (""$$meta.created"", key) WHERE (NOT ""$$meta.deleted"")"
"emails" "emails_created" "CREATE INDEX emails_created ON vsko_mailer_api_prod.emails USING btree (""$$meta.created"")"
"emails" "emails_deleted" "CREATE INDEX emails_deleted ON vsko_mailer_api_prod.emails USING btree (""$$meta.deleted"")"
"emails" "emails_emailjob" "CREATE INDEX emails_emailjob ON vsko_mailer_api_prod.emails USING btree (job)"
"emails" "emails_emailjob_not_deleted" "CREATE INDEX emails_emailjob_not_deleted ON vsko_mailer_api_prod.emails USING hash (job) WHERE (NOT ""$$meta.deleted"")"
"emails" "emails_lowered_job" "CREATE INDEX emails_lowered_job ON vsko_mailer_api_prod.emails USING btree (lower((job)::text))"
"emails" "emails_modified" "CREATE INDEX emails_modified ON vsko_mailer_api_prod.emails USING btree (""$$meta.modified"")"
"emails" "emails_ordered_created" "CREATE INDEX emails_ordered_created ON vsko_mailer_api_prod.emails USING btree (""$$meta.created"") WHERE (""$$meta.deleted"" = false)"
"emails" "emails_ordered_created_and_keys" "CREATE INDEX emails_ordered_created_and_keys ON vsko_mailer_api_prod.emails USING btree (""$$meta.created"", key)"
"emails" "emails_ordered_sentdate" "CREATE INDEX emails_ordered_sentdate ON vsko_mailer_api_prod.emails USING btree (""sentDate"" DESC)"
"emails" "emails_pkey" "CREATE UNIQUE INDEX emails_pkey ON vsko_mailer_api_prod.emails USING btree (key)"
"emails" "emails_status" "CREATE INDEX emails_status ON vsko_mailer_api_prod.emails USING btree (status)"
"emails" "lowered_recipient_emailaddress_emails" "CREATE INDEX lowered_recipient_emailaddress_emails ON vsko_mailer_api_prod.emails USING btree (lower(((recipient)::json ->> 'emailAddress'::text)))"
"emails" "lowered_recipient_person_href" "CREATE INDEX lowered_recipient_person_href ON vsko_mailer_api_prod.emails USING btree (lower(((((recipient)::json ->> 'person'::text))::json ->> 'href'::text)))"
Your first try should be to improve the estimate, so that PostgreSQL chooses the correct plan. This could be done with better statistics:
ALTER TABLE emails ALTER job SET STATISTICS 1000;
ANALYZE emails;
You can experiment with values up to 10000.
If that fails, you can change the ORDER BY clause so that it cannot be supported by the index, then PostgreSQL will always use the index on job:
...
ORDER BY "$$meta.created" + INTERVAL '0 days', key
Here I assume that "$$meta.created" is a timestamp; add something else if it is not.
Creating test data:
BEGIN;
CREATE TABLE foo( id INTEGER NOT NULL,
key INTEGER NOT NULL, job INTEGER NOT NULL,
created INTEGER NOT NULL, dummy INTEGER NOT NULL,
deleted BOOL NOT NULL );
INSERT INTO foo SELECT n, random()*10000, random()*10000, n+random()*10000, 1,
random()>0.1 FROM generate_series(1,1000000) n;
ALTER TABLE foo ADD PRIMARY KEY (id);
COMMIT;
VACUUM ANALYZE foo;
CREATE INDEX foo_job_not_deleted ON foo(job) WHERE NOT deleted;
CREATE INDEX foo_created ON foo(created,key) WHERE NOT deleted;
CREATE INDEX foo_created1 ON foo(created);
With these, I also get your bad plan.
One solution is to force a nested loop by using a LATERAL JOIN:
SELECT foo2.* FROM (VALUES (6479),(672),(6264),(5911),(6161),(7704),(2609),(4095),(271),(2363),(7299),(7330),(1990),(6523),(9261),(9490),(5013),(1131),(585),(8881),(8379),(1543),(5911),(7243),(3608),(9199),(8950),(1485),(7159),(2126),(2876),(779),(6890),(4315),(2253),(3909),(7355),(2876),(9981),(6653),(8407),(1772),(1348),(5689),(2857),(3535),(7607),(6275),(7596),(1885),(6827),(4180),(4638),(1876),(9403),(4195),(2548),(2827),(7972),(5571),(8426),(7761),(6400),(9175),(7486),(589),(3538),(8495),(2864),(5349),(4834),(1357),(6778),(6232),(7457),(6740),(5011),(946),(2918),(9981),(6903),(5565),(9396),(4482),(9796),(5925),(4971),(1304),(71),(7926),(2173),(3439),(7508),(7763),(4890),(5660),(8436),(8828),(5524),(6418)) jobs
JOIN LATERAL (SELECT * FROM foo WHERE foo.job=jobs.column1 AND NOT foo.deleted) foo2 ON (true)
ORDER BY created,key LIMIT 500;
For each job in the VALUES clause, the LATERAL JOIN subquery will be evaluated independently. Since it hits only one job value, which is a tiny fraction of the table, this forces the optimizer to use the index to execute the subquery.
If the table contains lots of columns, most notably large TEXT columns, and the subquery returns lots of rows that will be fetched then away by the LIMIT clause, it may be beneficial to fetch only the primary key in the subquery, then after the LIMIT, join back with the main table to get all the columns that you want, only from the rows that will actually be in the final result.

PostgreSQL does not choose my indexes well

create table public.tabla
(
cod_tabla bigint not null,
tabla varchar(31) not null,
constraint pk_tabla primary key (cod_tabla)
);
create table public.entidad
(
cod_entidad bigint not null,
cod_tabla bigint not null,
cod_entidad_tabla bigint not null,
constraint pk_entidad primary key (cod_entidad),
constraint fk_tabla_entidad foreign key (cod_tabla)
references public.tabla (cod_tabla) match simple
on update cascade
on delete cascade
);
CREATE INDEX idx_tabla_entidad
ON public.entidad USING btree
(cod_tabla ASC NULLS LAST);
CREATE INDEX idx_entidad_tabla_4
ON public.entidad USING btree
(cod_entidad_tabla ASC NULLS LAST)
INCLUDE(cod_entidad, cod_tabla, cod_entidad_tabla)
WHERE cod_tabla::bigint = 4;
I think postgresql doesn't use the index idx_entidad_tabla_4,
Postgresql is sequentially scanning the entire table applying the where condition
explain (analyze, buffers, format text) select * from entidad where cod_tabla = 4
Index Scan using idx_tabla_entidad on entidad (cost=0.56..51121.41 rows=1405216 width=20) (actual time=0.037..242.609 rows=1409985 loops=1)
Index Cond: ((cod_tabla)::bigint = 4)
Buffers: shared hit=12839
Planning Time: 0.158 ms
Execution Time: 311.828 ms
SELECT count(*) from entidad;
34.413.354
SELECT count(*) from entidad where cod_tabla = 4;
1.409.985
My questions are:
Why doesn't it use the index idx_entidad_tabla_4?
How could I force its use?

Postgresql very slow query

This query runs very slow. Why? Others are fine. Indexes are good, I think.
explain analyze
select "e_inst"."si_id" as "c0"
from "e_inst" as "e_inst"
group by "e_inst"."si_id"
order by "e_inst"."si_id" ASC NULLS LAST
Query Plan:
Sort (cost=12221.87..12221.90 rows=68 width=4) (actual time=1115.377..1115.433 rows=81 loops=1)
Sort Key: si_id
Sort Method: quicksort Memory: 28kB
-> HashAggregate (cost=12221.25..12221.45 rows=68 width=4) (actual time=1115.198..1115.261 rows=81 loops=1)
-> Seq Scan on e_inst (cost=0.00..11920.07 rows=602357 width=4) (actual time=0.021..611.570 rows=602357 loops=1)
Total runtime: 1115.538 ms
Create table and indexes:
CREATE TABLE e_inst (
id integer NOT NULL,
ip numeric,
gu character varying,
referrer character varying,
proc integer,
loke_id integer,
top_id integer,
si_id integer,
kop integer,
count integer,
created integer,
modified integer,
timepop integer,
count_active character varying,
country character(3),
info character varying
);
CREATE INDEX "topEnhance" ON e_inst USING btree (created, top_id);
CREATE INDEX "procEnhance" ON e_inst USING btree (created, proc);
CREATE INDEX "countryEnhance" ON e_install USING btree (created, country);
CREATE INDEX "createdE" ON e_inst USING btree (created);
ALTER TABLE e_inst CLUSTER ON "createdE";
CREATE INDEX "lokeE" ON e_inst USING btree (loke_id);
CREATE INDEX "lokeEnhance" ON e_inst USING btree (created, loke_id);
CREATE INDEX "siE" ON e_inst USING btree (si_id);
CREATE INDEX "siEnhance" ON e_inst USING btree (created, si_id);
CREATE INDEX "kopEnhance" ON e_inst USING btree (created, kop);
Indexes aren't going to be used by a query which processes the whole table.
The fact is you are retrieving and processing 600k records. That it does this in just over a second is actually kind of impressive.
Now in this case, you are trying to pull out the 81 distinct values from the 600k records. what you may want to do is to construct a recursive query such that it fetches one row 81 times. This may be faster but there is no guarantee. Normally I use these where there are far fewer rows returned. However here is an example:
WITH RECURSIVE sparse_scan AS (
SELECT min(si_id) as si_id FROM e_inst
UNION ALL
SELECT min(si_id) as si_id
FROM e_inst
JOIN (select max(si_id) as last FROM sparse_scan) s
WHERE s.last < si_id
)
SELECT si_id as c0 FROM sparse_scan;
Note that this replaces a sequential scan with 81 index scans.
Upgraded to PostgreSQL 9.2. That's now an index only scan!
Works good, thanks for a_horse_with_no_name who advised me to upgrade.