Postgres not using index in simple query - postgresql

I have a simple table storing a few values.
create table customer_usage
(
id bigserial not null
constraint customer_usage_pk
primary key,
customer_id bigint not null
constraint customer_usage_customer_id_fk
references crm.customer,
profile_id bigint not null,
supplier_id bigint not null,
direction smallint not null,
pod varchar not null,
trunced_date date not null,
period_end timestamp with time zone not null,
usage_wh real not null,
created_at timestamp with time zone default now() not null
);
create unique index customer_usage_id_uindex
on customer_usage (id);
create index customer_usage_profile_id_index
on customer_usage (profile_id);
create index customer_usage_supplier_id_index
on customer_usage (supplier_id);
create index customer_usage_trunced_date_index
on customer_usage (trunced_date);
alter table edm.customer_usage cluster on "customer_usage_trunced_date_index";
When I try to query data for for a specific "trunced_date" the explain plan shows, that it is NOT using the clustered index for this very column.
explain analyze select * from edm.customer_usage where trunced_date = '2021-05-26';
Explain plan:
QUERY PLAN
Seq Scan on customer_usage (cost=0.00..48792.40 rows=1495664 width=92) (actual time=0.053..20115.107 rows=1494912 loops=1)
Filter: (trunced_date = '2021-05-26'::date)
Rows Removed by Filter: 254880
Planning Time: 0.370 ms
Execution Time: 37914.739 ms
I don't understand, why -- in the simplest query asking specifically for one column that has a clustered index -- this index is not used.
Thank you very much for your help.
Fritz

Related

Why isn't PostgreSQL using index for this join query

explain (analyze) select
event_user_detail.*
from event_user_detail
inner join guest_list on event_user_detail.guest_list_id = guest_list.id
where
guest_list.event_id=2985739029
Results in the following query plan:
Gather (cost=1052.56..43408.58 rows=244 width=97) (actual time=66.570..67.810 rows=0 loops=1)
Workers Planned: 2
Workers Launched: 2
-> Nested Loop (cost=52.56..42384.18 rows=102 width=97) (actual time=57.183..57.183 rows=0 loops=3)
-> Parallel Seq Scan on guest_list (cost=0.00..13151.33 rows=5 width=8) (actual time=56.941..57.169 rows=2 loops=3)
Filter: (event_id = '2985739029'::bigint)
Rows Removed by Filter: 254489
-> Bitmap Heap Scan on event_user_detail (cost=52.56..5830.93 rows=1564 width=97) (actual time=0.007..0.007 rows=0 loops=5)
Recheck Cond: (guest_list_id = guest_list.id)
-> Bitmap Index Scan on idx_event_user_detail_guest_list_id (cost=0.00..52.17 rows=1564 width=0) (actual time=0.005..0.005 rows=0 loops=5)
Index Cond: (guest_list_id = guest_list.id)
Planning time: 0.252 ms
Execution time: 67.838 ms
Even tho there is an index on guest_list(event_id). Can someone explain why this is happening and if there is some way to fix it?
If I split this up in 2 queries, of which one is just to get the guest_list ids, and then do a simple in (...ids) then the query is super quick. I tried doing the same with a subquery, but I think the optimiser made it into a join.
-- ----------------------------
-- Table structure for guest_list
-- ----------------------------
DROP TABLE IF EXISTS "public"."guest_list";
CREATE TABLE "public"."guest_list" (
"id" int8 NOT NULL,
"creation_date" timestamp(6),
"last_modification_date" timestamp(6),
"uuid" uuid,
"deleted" bool NOT NULL,
"name" varchar(255) COLLATE "pg_catalog"."default",
"version" int4,
"event_id" int8,
"permanent_guest_list_id" int8,
"color" varchar(255) COLLATE "pg_catalog"."default"
)
;
-- ----------------------------
-- Indexes structure for table guest_list
-- ----------------------------
CREATE INDEX "idx_guest_list_event_id" ON "public"."guest_list" USING btree (
"event_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
CREATE INDEX "idx_guest_list_permanent_guest_list_id" ON "public"."guest_list" USING btree (
"permanent_guest_list_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
-- ----------------------------
-- Uniques structure for table guest_list
-- ----------------------------
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "uk_o4sa0dw6lcdjv96gl2p96xwki" UNIQUE ("uuid");
-- ----------------------------
-- Primary Key structure for table guest_list
-- ----------------------------
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "guest_list_pkey" PRIMARY KEY ("id");
-- ----------------------------
-- Foreign Keys structure for table guest_list
-- ----------------------------
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "fk7tk6fxgyo4h7ykelb9c0pe5ap" FOREIGN KEY ("event_id") REFERENCES "public"."event" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."guest_list" ADD CONSTRAINT "guest_list_permanent_guest_list_id_fkey" FOREIGN KEY ("permanent_guest_list_id") REFERENCES "public"."permanent_guest_list" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
-- ----------------------------
-- Table structure for event_user_detail
-- ----------------------------
DROP TABLE IF EXISTS "public"."event_user_detail";
CREATE TABLE "public"."event_user_detail" (
"id" int8 NOT NULL,
"creation_date" timestamp(6),
"last_modification_date" timestamp(6),
"uuid" uuid,
"deleted" bool NOT NULL,
"name" varchar(255) COLLATE "pg_catalog"."default",
"value" text COLLATE "pg_catalog"."default",
"version" int4,
"event_id" int8,
"user_id" int8,
"guest_list_id" int8,
"reference_user_id" int8
)
;
-- ----------------------------
-- Indexes structure for table event_user_detail
-- ----------------------------
CREATE INDEX "idx_event_user_detail_deleted" ON "public"."event_user_detail" USING btree (
"deleted" "pg_catalog"."bool_ops" ASC NULLS LAST
);
CREATE INDEX "idx_event_user_detail_event_id" ON "public"."event_user_detail" USING btree (
"event_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
CREATE INDEX "idx_event_user_detail_guest_list_id" ON "public"."event_user_detail" USING btree (
"guest_list_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
CREATE INDEX "idx_event_user_detail_user_id" ON "public"."event_user_detail" USING btree (
"user_id" "pg_catalog"."int8_ops" ASC NULLS LAST
);
-- ----------------------------
-- Uniques structure for table event_user_detail
-- ----------------------------
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "uk_orfh8fkwtk681af38a65everr" UNIQUE ("uuid");
-- ----------------------------
-- Primary Key structure for table event_user_detail
-- ----------------------------
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "event_user_detail_pkey" PRIMARY KEY ("id");
-- ----------------------------
-- Foreign Keys structure for table event_user_detail
-- ----------------------------
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fk8bffonom9l1fgcanegl9nm641" FOREIGN KEY ("user_id") REFERENCES "public"."user" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fk_event_user_detail_guest_list_id" FOREIGN KEY ("guest_list_id") REFERENCES "public"."guest_list" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fk_event_user_detail_reference_user_id" FOREIGN KEY ("reference_user_id") REFERENCES "public"."user" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE "public"."event_user_detail" ADD CONSTRAINT "fkisr2ccpapw537ntw4c0mlytcw" FOREIGN KEY ("event_id") REFERENCES "public"."event" ("id") ON DELETE NO ACTION ON UPDATE NO ACTION;
It vastly overestimates how many rows in event_user_detail it is going to find for each row in guest_list (probably because there is some row(s) in guest_list which does have a lot of entries in event_user_detail, just not the ones you are selecting here). The large number of rows it thinks it is going to find makes parallel query look attractive, but the way to get that parallel query is by using the seq scan on guest_list. So that is what it does.
You can disable parallel queries by setting max_parallel_workers_per_gather to 0. If you don't get much benefit from parallel query anyway, this may be a good "real" solution for you. If you do get a benefit from it and don't want to disable it, then you can at least do this just in the current session to see if my theory is correct.
I concur with jjanes' answer, but I want to suggest these additional experiments:
Try to ANALYZE event_user_detail; and see if that improves the estimate.
It could be that random_page_cost is set too high: it is designed for spinning disks and estimates index scans as comparatively expensive. If you lower that parameter, PostgreSQL will be more ready to use index scans.
You can do using CTE:
WITH guest AS (
SELECT id FROM guest_list WHERE event_id=2985739029 LIMIT 1
)
SELECT * FROM event_user_detail WHERE guest_list_id IN (SELECT id FROM guest)
CTE in older versions of postgresql runs like separate queries in one transactions and planned independently but doesn't send results from CTE to client.
You can read about them in the docs. Beware that this behaviour changed since 12 version of postgres and if you want to preserve old you should write it like:
WITH guest AS MATERIALIZED (
SELECT id FROM guest_list WHERE event_id=2985739029 LIMIT 1
)
SELECT * FROM event_user_detail WHERE guest_list_id IN (SELECT id FROM guest)
Also they are very useful to avoid deadlocks in updates:
WITH to_update AS (
SELECT * FROM my_table WHERE condition
ORDER BY id ASC FOR UPDATE
)
UPDATE my_table SET ... WHERE condition;
This would make all rows lock in certain order which prevents deadlocks which possible with plain update queries (e.g. both queries need to modify ids 1 and 2, and with this CTE there cannot be that first lock 1 and wait 2 while second lock 2 and wait for 1).

PostgreSQL does not choose my indexes well

create table public.tabla
(
cod_tabla bigint not null,
tabla varchar(31) not null,
constraint pk_tabla primary key (cod_tabla)
);
create table public.entidad
(
cod_entidad bigint not null,
cod_tabla bigint not null,
cod_entidad_tabla bigint not null,
constraint pk_entidad primary key (cod_entidad),
constraint fk_tabla_entidad foreign key (cod_tabla)
references public.tabla (cod_tabla) match simple
on update cascade
on delete cascade
);
CREATE INDEX idx_tabla_entidad
ON public.entidad USING btree
(cod_tabla ASC NULLS LAST);
CREATE INDEX idx_entidad_tabla_4
ON public.entidad USING btree
(cod_entidad_tabla ASC NULLS LAST)
INCLUDE(cod_entidad, cod_tabla, cod_entidad_tabla)
WHERE cod_tabla::bigint = 4;
I think postgresql doesn't use the index idx_entidad_tabla_4,
Postgresql is sequentially scanning the entire table applying the where condition
explain (analyze, buffers, format text) select * from entidad where cod_tabla = 4
Index Scan using idx_tabla_entidad on entidad (cost=0.56..51121.41 rows=1405216 width=20) (actual time=0.037..242.609 rows=1409985 loops=1)
Index Cond: ((cod_tabla)::bigint = 4)
Buffers: shared hit=12839
Planning Time: 0.158 ms
Execution Time: 311.828 ms
SELECT count(*) from entidad;
34.413.354
SELECT count(*) from entidad where cod_tabla = 4;
1.409.985
My questions are:
Why doesn't it use the index idx_entidad_tabla_4?
How could I force its use?

PostgreSQL query does not use index

Table definition is as follows:
CREATE TABLE public.the_table
(
id integer NOT NULL DEFAULT nextval('the_table_id_seq'::regclass),
report_timestamp timestamp without time zone NOT NULL,
value_id integer NOT NULL,
text_value character varying(255),
numeric_value double precision,
bool_value boolean,
dt_value timestamp with time zone,
exported boolean NOT NULL DEFAULT false,
CONSTRAINT the_table_fkey_valdef FOREIGN KEY (value_id)
REFERENCES public.value_defs (value_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE RESTRICT
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.the_table
OWNER TO postgres;
Indices:
CREATE INDEX the_table_idx_id ON public.the_table USING brin (id);
CREATE INDEX the_table_idx_timestamp ON public.the_table USING btree (report_timestamp);
CREATE INDEX the_table_idx_tsvid ON public.the_table USING brin (report_timestamp, value_id);
CREATE INDEX the_table_idx_valueid ON public.the_table USING btree (value_id);
The query is:
SELECT * FROM the_table r WHERE r.value_id = 1064 ORDER BY r.report_timestamp desc LIMIT 1;
While running the query PostgreSQL does not use the_table_idx_valueid index.
Why?
If anything, this index will help:
CREATE INDEX ON the_table (value_id, report_timestamp);
Depending on the selectivity of the condition and the number of rows in the table, PostgreSQL may correctly deduce that a sequential scan and a sort is faster than an index scan.

Why does this GIN index (jsonb) not used in my case

My code is here https://rextester.com/CBYD42261, I use '#>' with gin index to search, but postgresql using Seq Scan.
DROP TABLE IF EXISTS sponsor_projects;
CREATE TABLE sponsor_projects (
project_id BIGSERIAL PRIMARY KEY,
sponsor_id bigint NOT NULL,
status smallint NOT NULL DEFAULT 0,
name character varying(64) NOT NULL,
category character varying(10) NOT NULL,
purpose jsonb NOT NULL,
qrcode character varying(200) NOT NULL,
plaform character varying(10) NOT NULL,
budget double precision NOT NULL,
budget_used double precision NOT NULL,
sticker character varying(10) NOT NULL,
spread character varying(10) NOT NULL,
areas jsonb NOT NULL,
paused boolean DEFAULT false,
terminated boolean DEFAULT false,
start_time timestamp(0) without time zone NOT NULL,
end_time timestamp(0) without time zone,
shops jsonb NOT NULL,
created_by integer,
created_at timestamp(0) without time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp(0) without time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
deleted boolean DEFAULT false,
deleted_at timestamp(0) without time zone DEFAULT NULL::timestamp without time zone
);
-- Indices -------------------------------------------------------
CREATE INDEX idx_sponsor_projects_sponsor_id ON sponsor_projects(sponsor_id int8_ops);
CREATE INDEX idx_sponsor_projects ON sponsor_projects USING GIN (shops);
-- Insert Data ---------------------------------------------------
INSERT INTO "sponsor_projects"("sponsor_id","status","name","category","purpose","qrcode","plaform","budget","budget_used","sticker","spread","areas","paused","terminated","start_time","end_time","shops","created_by","created_at","updated_at","deleted","deleted_at")
VALUES
(1,0,E'京东广告',E'3C数码产品',E'["品牌宣传", "流量拉新"]',E'https://jd.com',E'不限',1000,0,E'标准桌贴',E'CPC',E'[{"id": "fc5d1d71-14b1-473d-9db2-c804b0d9ab6c", "path": [[116.68767, 39.877689], [116.712303, 39.868862], [116.704149, 39.8601], [116.6887, 39.865041]]}]',FALSE,FALSE,E'2019-08-20 06:51:26',NULL,E'[{"shop_id": 5}]',NULL,E'2019-08-21 14:55:01',E'2019-08-21 14:55:01',FALSE,NULL);
EXPLAIN ANALYZE
SELECT * FROM sponsor_projects t WHERE t.shops #> '[{"shop_id": 5}]';
It shows:
Seq Scan on sponsor_projects t (cost=0.00..11.00 rows=1 width=893) (actual time=0.013..0.016 rows=1 loops=1)
Filter: (shops #> '[{"shop_id": 1}]'::jsonb)
Rows Removed by Filter: 8
Planning Time: 0.076 ms
Execution Time: 0.033 ms
Using Seq Scan, not gin index.
What happend with my code?
Please someone can help me?
That is because the table contains only a single row, and a sequential scan is always faster with small tables like that.
To test if the index can be used, discourage the use of sequential scans in your database session:
SET enable_seqscan = off;
Then PostgreSQL will use the index if possible.
Make sure to reset the setting afterwards:
RESET enable_seqscan;

postgresql simple select is slow

i have a table:
CREATE TABLE my_table
(
id integer NOT NULL DEFAULT nextval('seq_my_table_id'::regclass),
fk_id1 integer NOT NULL,
fk_id2 smallint NOT NULL,
name character varying(255) NOT NULL,
description text,
currency_name character varying(3) NOT NULL,
created timestamp with time zone NOT NULL DEFAULT now(),
updated timestamp with time zone NOT NULL DEFAULT now(),
CONSTRAINT "PK_my_table_id" PRIMARY KEY (id ),
CONSTRAINT "FK_my_table_fk_id1" FOREIGN KEY (fk_id1)
REFERENCES my_table2 (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED,
CONSTRAINT "FK_my_table_fk_id2" FOREIGN KEY (fk_id2)
REFERENCES my_table3 (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED
)
WITH (
OIDS=FALSE,
autovacuum_enabled=true,
autovacuum_vacuum_threshold=50,
autovacuum_vacuum_scale_factor=0.2,
autovacuum_analyze_threshold=50,
autovacuum_analyze_scale_factor=0.1,
autovacuum_vacuum_cost_delay=20,
autovacuum_vacuum_cost_limit=200,
autovacuum_freeze_min_age=50000000,
autovacuum_freeze_max_age=200000000,
autovacuum_freeze_table_age=150000000
);
ALTER TABLE my_table
OWNER TO postgres;
CREATE INDEX my_table_fk_id1
ON my_table
USING btree
(fk_id1 );
CREATE INDEX my_table_fk_id2
ON my_table
USING btree
(fk_id2 );
tables records count
select count(id) from my_table; --24061
select count(id) from my_table2; --24061
select count(id) from my_table3; --123
execution time
select * from my_table -- ~17sec
vacuum/analyze - no effect
description - length ~ 4000 chars in each row
postgres.conf - standart settings
Version: 9.1
select all fields except description reduce execution time to ~1,5 sec
How to icrease select speed with description ?
upd
--explain analyze select * from my_table
"Seq Scan on my_table (cost=0.00..3425.79 rows=24079 width=1015) (actual time=0.019..17.238 rows=24079 loops=1)"
"Total runtime: 18.649 ms"
The question is how to make this fast. The issue is not on the server since it takes 18ms there. The simple solution is to select fewer columns so that there is less to transfer over the network. My guess is that you have long descriptions on some. Leave that column off your select and try again.