My code is here https://rextester.com/CBYD42261, I use '#>' with gin index to search, but postgresql using Seq Scan.
DROP TABLE IF EXISTS sponsor_projects;
CREATE TABLE sponsor_projects (
project_id BIGSERIAL PRIMARY KEY,
sponsor_id bigint NOT NULL,
status smallint NOT NULL DEFAULT 0,
name character varying(64) NOT NULL,
category character varying(10) NOT NULL,
purpose jsonb NOT NULL,
qrcode character varying(200) NOT NULL,
plaform character varying(10) NOT NULL,
budget double precision NOT NULL,
budget_used double precision NOT NULL,
sticker character varying(10) NOT NULL,
spread character varying(10) NOT NULL,
areas jsonb NOT NULL,
paused boolean DEFAULT false,
terminated boolean DEFAULT false,
start_time timestamp(0) without time zone NOT NULL,
end_time timestamp(0) without time zone,
shops jsonb NOT NULL,
created_by integer,
created_at timestamp(0) without time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp(0) without time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
deleted boolean DEFAULT false,
deleted_at timestamp(0) without time zone DEFAULT NULL::timestamp without time zone
);
-- Indices -------------------------------------------------------
CREATE INDEX idx_sponsor_projects_sponsor_id ON sponsor_projects(sponsor_id int8_ops);
CREATE INDEX idx_sponsor_projects ON sponsor_projects USING GIN (shops);
-- Insert Data ---------------------------------------------------
INSERT INTO "sponsor_projects"("sponsor_id","status","name","category","purpose","qrcode","plaform","budget","budget_used","sticker","spread","areas","paused","terminated","start_time","end_time","shops","created_by","created_at","updated_at","deleted","deleted_at")
VALUES
(1,0,E'京东广告',E'3C数码产品',E'["品牌宣传", "流量拉新"]',E'https://jd.com',E'不限',1000,0,E'标准桌贴',E'CPC',E'[{"id": "fc5d1d71-14b1-473d-9db2-c804b0d9ab6c", "path": [[116.68767, 39.877689], [116.712303, 39.868862], [116.704149, 39.8601], [116.6887, 39.865041]]}]',FALSE,FALSE,E'2019-08-20 06:51:26',NULL,E'[{"shop_id": 5}]',NULL,E'2019-08-21 14:55:01',E'2019-08-21 14:55:01',FALSE,NULL);
EXPLAIN ANALYZE
SELECT * FROM sponsor_projects t WHERE t.shops #> '[{"shop_id": 5}]';
It shows:
Seq Scan on sponsor_projects t (cost=0.00..11.00 rows=1 width=893) (actual time=0.013..0.016 rows=1 loops=1)
Filter: (shops #> '[{"shop_id": 1}]'::jsonb)
Rows Removed by Filter: 8
Planning Time: 0.076 ms
Execution Time: 0.033 ms
Using Seq Scan, not gin index.
What happend with my code?
Please someone can help me?
That is because the table contains only a single row, and a sequential scan is always faster with small tables like that.
To test if the index can be used, discourage the use of sequential scans in your database session:
SET enable_seqscan = off;
Then PostgreSQL will use the index if possible.
Make sure to reset the setting afterwards:
RESET enable_seqscan;
Related
I have a simple table storing a few values.
create table customer_usage
(
id bigserial not null
constraint customer_usage_pk
primary key,
customer_id bigint not null
constraint customer_usage_customer_id_fk
references crm.customer,
profile_id bigint not null,
supplier_id bigint not null,
direction smallint not null,
pod varchar not null,
trunced_date date not null,
period_end timestamp with time zone not null,
usage_wh real not null,
created_at timestamp with time zone default now() not null
);
create unique index customer_usage_id_uindex
on customer_usage (id);
create index customer_usage_profile_id_index
on customer_usage (profile_id);
create index customer_usage_supplier_id_index
on customer_usage (supplier_id);
create index customer_usage_trunced_date_index
on customer_usage (trunced_date);
alter table edm.customer_usage cluster on "customer_usage_trunced_date_index";
When I try to query data for for a specific "trunced_date" the explain plan shows, that it is NOT using the clustered index for this very column.
explain analyze select * from edm.customer_usage where trunced_date = '2021-05-26';
Explain plan:
QUERY PLAN
Seq Scan on customer_usage (cost=0.00..48792.40 rows=1495664 width=92) (actual time=0.053..20115.107 rows=1494912 loops=1)
Filter: (trunced_date = '2021-05-26'::date)
Rows Removed by Filter: 254880
Planning Time: 0.370 ms
Execution Time: 37914.739 ms
I don't understand, why -- in the simplest query asking specifically for one column that has a clustered index -- this index is not used.
Thank you very much for your help.
Fritz
I've been struggling for hours and I can't find why this query takes too long (> 60 minutes). All 4 tables have less than 50.000 records.
Also if I remove any table (gel6, gf6 or ger6) the query takes less than 500 ms to execute. What am I doing wrong?
Explain plan:
https://explain.depesz.com/s/ldm2
SELECT COUNT(*)
FROM agroapp.ganado g
INNER JOIN (SELECT gel5.ganado_id, gel5.estado_leche
FROM agroapp.ganado_estado_leche gel5
INNER JOIN (SELECT MAX(gel3.ganado_estado_leche_id) ganado_estado_leche_id
FROM agroapp.ganado_estado_leche gel3
INNER JOIN (SELECT gel.ganado_id, MAX(gel.created) created
FROM agroapp.ganado_estado_leche gel
GROUP BY gel.ganado_id) gel2 ON (gel2.ganado_id = gel3.ganado_id AND gel2.created = gel3.created)
GROUP BY gel3.ganado_id) gel4 ON gel4.ganado_estado_leche_id = gel5.ganado_estado_leche_id
) gel6 ON gel6.ganado_id = g.ganado_id
INNER JOIN (SELECT gf5.ganado_id, gf5.fundo_id
FROM agroapp.ganado_fundo gf5
INNER JOIN (SELECT MAX(gf3.ganado_fundo_id) ganado_fundo_id
FROM agroapp.ganado_fundo gf3
INNER JOIN (SELECT gf.ganado_id, MAX(gf.created) created
FROM agroapp.ganado_fundo gf
GROUP BY gf.ganado_id) gf2 ON (gf2.ganado_id = gf3.ganado_id AND gf2.created = gf3.created)
GROUP BY gf3.ganado_id) gf4 ON gf4.ganado_fundo_id = gf5.ganado_fundo_id
) gf6 ON gf6.ganado_id = g.ganado_id
INNER JOIN (SELECT ger5.ganado_id, ger5.estado_reproductivo
FROM agroapp.ganado_estado_reproductivo ger5
INNER JOIN (SELECT MAX(ger3.ganado_estado_reproductivo_id) ganado_estado_reproductivo_id
FROM agroapp.ganado_estado_reproductivo ger3
INNER JOIN (SELECT ger.ganado_id, MAX(ger.created) created
FROM agroapp.ganado_estado_reproductivo ger
GROUP BY ger.ganado_id) ger2 ON (ger2.ganado_id = ger3.ganado_id AND ger2.created = ger3.created)
GROUP BY ger3.ganado_id) ger4 ON ger4.ganado_estado_reproductivo_id = ger5.ganado_estado_reproductivo_id
) ger6 ON ger6.ganado_id = g.ganado_id
WHERE g.organizacion_id = 21
Tables
CREATE TABLE agroapp.ganado_estado_leche
(
ganado_estado_leche_id serial NOT NULL,
organizacion_id integer NOT NULL,
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar,
created timestamp without time zone NOT NULL DEFAULT now(),
createdby numeric(10,0) NOT NULL,
updated timestamp without time zone NOT NULL DEFAULT now(),
updatedby numeric(10,0) NOT NULL,
estado_leche character varying(80) NOT NULL,
ganado_id integer NOT NULL,
fecha_manejo timestamp without time zone NOT NULL,
CONSTRAINT ganado_estado_leche_pk PRIMARY KEY (ganado_estado_leche_id),
CONSTRAINT ganado_fk FOREIGN KEY (ganado_id)
REFERENCES agroapp.ganado (ganado_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
CREATE TABLE agroapp.ganado_fundo
(
ganado_fundo_id serial NOT NULL,
organizacion_id integer NOT NULL,
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar,
created timestamp without time zone NOT NULL DEFAULT now(),
createdby numeric(10,0) NOT NULL,
updated timestamp without time zone NOT NULL DEFAULT now(),
updatedby numeric(10,0) NOT NULL,
fundo_id integer NOT NULL,
ganado_id integer NOT NULL,
CONSTRAINT ganado_fundo_pk PRIMARY KEY (ganado_fundo_id),
CONSTRAINT ganado_fk FOREIGN KEY (ganado_id)
REFERENCES agroapp.ganado (ganado_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
CREATE TABLE agroapp.ganado_estado_reproductivo
(
ganado_estado_reproductivo_id serial NOT NULL,
organizacion_id integer NOT NULL,
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar,
created timestamp without time zone NOT NULL DEFAULT now(),
createdby numeric(10,0) NOT NULL,
updated timestamp without time zone NOT NULL DEFAULT now(),
updatedby numeric(10,0) NOT NULL,
estado_reproductivo character varying(80) NOT NULL,
ganado_id integer NOT NULL,
fecha_manejo timestamp without time zone NOT NULL,
CONSTRAINT ganado_estado_reproductivo_pk PRIMARY KEY (ganado_estado_reproductivo_id),
CONSTRAINT ganado_fk FOREIGN KEY (ganado_id)
REFERENCES agroapp.ganado (ganado_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
CREATE TABLE agroapp.ganado
(
ganado_id serial NOT NULL,
organizacion_id integer NOT NULL,
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar,
created timestamp without time zone NOT NULL DEFAULT now(),
createdby numeric(10,0) NOT NULL,
updated timestamp without time zone NOT NULL DEFAULT now(),
updatedby numeric(10,0) NOT NULL,
fecha_nacimiento timestamp without time zone NOT NULL,
tipo_ganado character varying(80) NOT NULL,
diio_id integer NOT NULL,
fundo_id integer NOT NULL,
raza_id integer NOT NULL,
estado_reproductivo character varying(80) NOT NULL,
estado_leche character varying(80),
CONSTRAINT ganado_pk PRIMARY KEY (ganado_id),
CONSTRAINT diio_fk FOREIGN KEY (diio_id)
REFERENCES agroapp.diio (diio_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT fundo_fk FOREIGN KEY (fundo_id)
REFERENCES agroapp.fundo (fundo_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT raza_fk FOREIGN KEY (raza_id)
REFERENCES agroapp.raza (raza_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
Table design
This looks very much like a boolean column (yes / no):
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar
If so, replace with:
isactive bool NOT NULL DEFAULT TRUE
If you might involve multiple times zones in any way, use timestamptz instead of timestamp here:
created timestamp without time zone NOT NULL DEFAULT now(),
The default now() produces timestamptz and after the assignment cast results in the current time according to the time zone of the session. I.e., the value changes with the timezone of the session, which is a sneaky point of failure. See:
- Ignoring time zones altogether in Rails and PostgreSQL
And:
createdby numeric(10,0) NOT NULL
et al. look like they should really be just integer. (Or maybe bigint if you really think you might burn through more than 2147483648 numbers ...)
Query
Looking at the first subquery:
SELECT gel5.ganado_id, gel5.estado_leche
FROM agroapp.ganado_estado_leche gel5
INNER JOIN (
SELECT MAX(gel3.ganado_estado_leche_id) ganado_estado_leche_id
FROM agroapp.ganado_estado_leche gel3
INNER JOIN (
SELECT gel.ganado_id, MAX(gel.created) created
FROM agroapp.ganado_estado_leche gel
GROUP BY gel.ganado_id
) gel2 ON (gel2.ganado_id = gel3.ganado_id AND gel2.created = gel3.created)
GROUP BY gel3.ganado_id
) gel4 ON gel4.ganado_estado_leche_id = gel5.ganado_estado_leche_id
The innermost subquery gets the max. created per ganado_id, the next one the max ganado_estado_leche_id of those rows. And finally you join back and retrieve all ganado_id that appear in combination with the identified max ganado_estado_leche_id per partition. I have a hard time making sense of this, but it can be simplified to:
SELECT gel2.ganado_id
FROM agroapp.ganado_estado_leche gel2
JOIN (
SELECT DISTINCT ON (ganado_id) ganado_estado_leche_id
FROM agroapp.ganado_estado_leche
ORDER BY ganado_id, created DESC NULLS LAST, ganado_estado_leche_id DESC NULLS LAST
) gel1 USING (ganado_estado_leche_id)
See:
Select first row in each GROUP BY group?
Looks like an incorrect query to me. Same with the rest of the query: the joins multiply rows in an odd fashion. Not sure what you are trying to count, but I doubt the query counts just that. You did not provide enough information to make sense of it.
The query in this state takes more than 5 minutes to execute. If I remove any of the ::DATE conversions (see comment in code) the execution time goes < 500 ms.
For example, if I change gf.created::DATE to gf.created the performance is dramatically increased. Same happens if I change gtg.created::DATE to gtg.created.
Why is there a huge difference when using both ::DATE conversions if each shows great performance on its own?
SELECT gtg6.tipo_ganado, COUNT(gtg6.tipo_ganado) animales
FROM agroapp.ganado g
INNER JOIN (SELECT gf5.ganado_id, gf5.fundo_id
FROM agroapp.ganado_fundo gf5
INNER JOIN (SELECT MAX(gf3.ganado_fundo_id) ganado_fundo_id
FROM agroapp.ganado_fundo gf3
INNER JOIN (SELECT gf.ganado_id, MAX(gf.created) created
FROM agroapp.ganado_fundo gf
WHERE gf.isactive = 'Y'
-- HERE CHANGING gf.created::DATE TO gf.created
AND gf.created::DATE <= '20181030'::DATE
GROUP BY gf.ganado_id) gf2 ON (gf2.ganado_id = gf3.ganado_id AND gf2.created = gf3.created)
WHERE gf3.isactive = 'Y'
GROUP BY gf3.ganado_id) gf4 ON gf4.ganado_fundo_id = gf5.ganado_fundo_id
) gf6 ON gf6.ganado_id = g.ganado_id
INNER JOIN (SELECT gtg5.ganado_id, gtg5.tipo_ganado
FROM agroapp.ganado_tipo_ganado gtg5
INNER JOIN (SELECT MAX(gtg3.ganado_tipo_ganado_id) ganado_tipo_ganado_id
FROM agroapp.ganado_tipo_ganado gtg3
INNER JOIN (SELECT gtg.ganado_id, MAX(gtg.created) created
FROM agroapp.ganado_tipo_ganado gtg
WHERE gtg.isactive = 'Y'
-- OR HERE CHANGING gtg.created::DATE TO gtg.created
AND gtg.created::DATE <= '20181030'::DATE
GROUP BY gtg.ganado_id) gtg2 ON (gtg2.ganado_id = gtg3.ganado_id AND gtg2.created = gtg3.created)
WHERE gtg3.isactive = 'Y'
GROUP BY gtg3.ganado_id) gtg4 ON gtg4.ganado_tipo_ganado_id = gtg5.ganado_tipo_ganado_id
) gtg6 ON gtg6.ganado_id = g.ganado_id
WHERE g.organizacion_id = 21
GROUP BY gtg6.tipo_ganado
ORDER BY gtg6.tipo_ganado;
Table definitions
All 3 tables have around 50000 rows:
CREATE TABLE agroapp.ganado_fundo
(
ganado_fundo_id serial NOT NULL,
organizacion_id integer NOT NULL,
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar,
created timestamp without time zone NOT NULL DEFAULT now(),
createdby numeric(10,0) NOT NULL,
updated timestamp without time zone NOT NULL DEFAULT now(),
updatedby numeric(10,0) NOT NULL,
fundo_id integer NOT NULL,
ganado_id integer NOT NULL,
CONSTRAINT ganado_fundo_pk PRIMARY KEY (ganado_fundo_id),
CONSTRAINT ganado_fk FOREIGN KEY (ganado_id)
REFERENCES agroapp.ganado (ganado_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
CREATE TABLE agroapp.ganado_tipo_ganado
(
ganado_tipo_ganado_id serial NOT NULL,
organizacion_id integer NOT NULL,
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar,
created timestamp without time zone NOT NULL DEFAULT now(),
createdby numeric(10,0) NOT NULL,
updated timestamp without time zone NOT NULL DEFAULT now(),
updatedby numeric(10,0) NOT NULL,
tipo_ganado character varying(80) NOT NULL,
ganado_id integer NOT NULL,
CONSTRAINT ganado_tipo_ganado_pk PRIMARY KEY (ganado_tipo_ganado_id),
CONSTRAINT ganado_fk FOREIGN KEY (ganado_id)
REFERENCES agroapp.ganado (ganado_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
CREATE TABLE agroapp.ganado
(
ganado_id serial NOT NULL,
organizacion_id integer NOT NULL,
isactive character(1) NOT NULL DEFAULT 'Y'::bpchar,
created timestamp without time zone NOT NULL DEFAULT now(),
createdby numeric(10,0) NOT NULL,
updated timestamp without time zone NOT NULL DEFAULT now(),
updatedby numeric(10,0) NOT NULL,
fecha_nacimiento timestamp without time zone NOT NULL,
tipo_ganado character varying(80) NOT NULL,
diio_id integer NOT NULL,
fundo_id integer NOT NULL,
raza_id integer NOT NULL,
estado_reproductivo character varying(80) NOT NULL,
estado_leche character varying(80),
CONSTRAINT ganado_pk PRIMARY KEY (ganado_id),
CONSTRAINT diio_fk FOREIGN KEY (diio_id)
REFERENCES agroapp.diio (diio_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT fundo_fk FOREIGN KEY (fundo_id)
REFERENCES agroapp.fundo (fundo_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT raza_fk FOREIGN KEY (raza_id)
REFERENCES agroapp.raza (raza_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
Most probably because the forced cast voids the option to use an index on the column agroapp.ganado_fundo.created
Guessing (for lack of information) that gf.created is of type timestamp with time zone (or timestamp), replace
AND gf.created::DATE <= '20181030'::DATE
with:
AND gf.created < '2018-10-31'::timestamp -- match the data type of the column!
to achieve the same result, but with index support.
If you operate with timestamtptz, be aware of implications on the date: it depends on the current time zone. Details:
Ignoring time zones altogether in Rails and PostgreSQL
The below ALTER command is taking long time, but not executing.
alter table DETAILS alter column row_id type numeric(20);
DDL is as follows:
CREATE TABLE Details
(
row_id numeric(15,0) NOT NULL,
intfid character varying(20) NOT NULL,
seqno numeric(15,0) NOT NULL,
record_id numeric(15,0) NOT NULL,
lstmoddate timestamp without time zone NOT NULL,
rcvddate timestamp without time zone NOT NULL DEFAULT current_date,
record_type character varying(60),
xmldata bytea,
CONSTRAINT mrd_pk PRIMARY KEY (rcvddate, intfid, seqno, record_id)
)
i have a table:
CREATE TABLE my_table
(
id integer NOT NULL DEFAULT nextval('seq_my_table_id'::regclass),
fk_id1 integer NOT NULL,
fk_id2 smallint NOT NULL,
name character varying(255) NOT NULL,
description text,
currency_name character varying(3) NOT NULL,
created timestamp with time zone NOT NULL DEFAULT now(),
updated timestamp with time zone NOT NULL DEFAULT now(),
CONSTRAINT "PK_my_table_id" PRIMARY KEY (id ),
CONSTRAINT "FK_my_table_fk_id1" FOREIGN KEY (fk_id1)
REFERENCES my_table2 (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED,
CONSTRAINT "FK_my_table_fk_id2" FOREIGN KEY (fk_id2)
REFERENCES my_table3 (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED
)
WITH (
OIDS=FALSE,
autovacuum_enabled=true,
autovacuum_vacuum_threshold=50,
autovacuum_vacuum_scale_factor=0.2,
autovacuum_analyze_threshold=50,
autovacuum_analyze_scale_factor=0.1,
autovacuum_vacuum_cost_delay=20,
autovacuum_vacuum_cost_limit=200,
autovacuum_freeze_min_age=50000000,
autovacuum_freeze_max_age=200000000,
autovacuum_freeze_table_age=150000000
);
ALTER TABLE my_table
OWNER TO postgres;
CREATE INDEX my_table_fk_id1
ON my_table
USING btree
(fk_id1 );
CREATE INDEX my_table_fk_id2
ON my_table
USING btree
(fk_id2 );
tables records count
select count(id) from my_table; --24061
select count(id) from my_table2; --24061
select count(id) from my_table3; --123
execution time
select * from my_table -- ~17sec
vacuum/analyze - no effect
description - length ~ 4000 chars in each row
postgres.conf - standart settings
Version: 9.1
select all fields except description reduce execution time to ~1,5 sec
How to icrease select speed with description ?
upd
--explain analyze select * from my_table
"Seq Scan on my_table (cost=0.00..3425.79 rows=24079 width=1015) (actual time=0.019..17.238 rows=24079 loops=1)"
"Total runtime: 18.649 ms"
The question is how to make this fast. The issue is not on the server since it takes 18ms there. The simple solution is to select fewer columns so that there is less to transfer over the network. My guess is that you have long descriptions on some. Leave that column off your select and try again.