Query on Postgres totaly holds down server - postgresql

Now, I'am moving our database from Microsoft SQL Server to PostgreSQL 9.1.
There are a simple query, to calculate some summary of our store:
SELECT DISTINCT p.part_name_id,
(SELECT SUM(p1.quantity)
FROM parts.spareparts p1
WHERE p1.part_name_id = p.part_name_id) AS AllQuantity,
(SELECT SUM(p2.price * p2.quantity)
FROM parts.spareparts p2
WHERE p2.part_name_id = p.part_name_id) AS AllPrice
FROM parts.spareparts p
It working very fast on MSSQL, less than one second, there are about 150 000 records in spareparts table.
In PostgreSQL I waited for 200,000 milliseconds and not wait for the result.
Where I was wrong?
P.S.: table definitions:
-- Table: parts.spareparts
-- DROP TABLE parts.spareparts;
CREATE TABLE parts.spareparts
(
id serial NOT NULL,
big_id bigint NOT NULL,
part_unique integer NOT NULL,
store_address integer,
brand_id integer,
model_id integer,
category_id integer,
part_name_id integer,
price money,
quantity integer,
description character varying(250),
private_info character varying(600),
manager_id integer,
company_id integer,
part_type smallint,
box_number integer,
com_person character varying(200),
com_phone character varying(200),
vendor_id integer,
is_publish boolean DEFAULT true,
is_comission boolean DEFAULT false,
is_new boolean DEFAULT false,
is_warning boolean DEFAULT false,
catalog_no character varying(200),
disc_id integer,
is_set boolean,
w_height numeric(3,2),
w_width numeric(3,2),
w_diam numeric(3,2),
w_type integer,
page_url character varying(150),
last_edit_manager_id integer,
CONSTRAINT spareparts_pk PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE parts.spareparts
OWNER TO asap;
-- Index: parts.sparepart_part_unique_idx
-- DROP INDEX parts.sparepart_part_unique_idx;
CREATE INDEX sparepart_part_unique_idx
ON parts.spareparts
USING btree
(part_unique, company_id);
-- Index: parts.spareparts_4param_idx
-- DROP INDEX parts.spareparts_4param_idx;
CREATE INDEX spareparts_4param_idx
ON parts.spareparts
USING btree
(brand_id, model_id, category_id, part_name_id);
-- Index: parts.spareparts_bigid_idx
-- DROP INDEX parts.spareparts_bigid_idx;
CREATE INDEX spareparts_bigid_idx
ON parts.spareparts
USING btree
(big_id);
-- Index: parts.spareparts_brand_id_part_id_quantity_idx
-- DROP INDEX parts.spareparts_brand_id_part_id_quantity_idx;
CREATE INDEX spareparts_brand_id_part_id_quantity_idx
ON parts.spareparts
USING btree
(brand_id, part_name_id, quantity);
-- Index: parts.spareparts_brand_id_quantity_idx
-- DROP INDEX parts.spareparts_brand_id_quantity_idx;
CREATE INDEX spareparts_brand_id_quantity_idx
ON parts.spareparts
USING btree
(brand_id, quantity);
-- Index: parts.spareparts_company_id_part_unique_idx
-- DROP INDEX parts.spareparts_company_id_part_unique_idx;
CREATE INDEX spareparts_company_id_part_unique_idx
ON parts.spareparts
USING btree
(company_id, part_unique);
-- Index: parts.spareparts_model_id_company_id
-- DROP INDEX parts.spareparts_model_id_company_id;
CREATE INDEX spareparts_model_id_company_id
ON parts.spareparts
USING btree
(model_id, company_id);
COMMENT ON INDEX parts.spareparts_model_id_company_id
IS 'Для frmFilter';
-- Index: parts.spareparts_url_idx
-- DROP INDEX parts.spareparts_url_idx;
CREATE INDEX spareparts_url_idx
ON parts.spareparts
USING btree
(page_url COLLATE pg_catalog."default");
-- Trigger: spareparts_delete_trigger on parts.spareparts
-- DROP TRIGGER spareparts_delete_trigger ON parts.spareparts;
CREATE TRIGGER spareparts_delete_trigger
AFTER DELETE
ON parts.spareparts
FOR EACH ROW
EXECUTE PROCEDURE parts.spareparts_delete_fn();
-- Trigger: spareparts_update_trigger on parts.spareparts
-- DROP TRIGGER spareparts_update_trigger ON parts.spareparts;
CREATE TRIGGER spareparts_update_trigger
AFTER INSERT OR UPDATE
ON parts.spareparts
FOR EACH ROW
EXECUTE PROCEDURE parts.spareparts_update_fn();

I think you can rewrite the query without the need of the nested selects:
SELECT p.part_name_id,
SUM(p.quantity) AS AllQuantity,
SUM(p.price * p.quantity) AS AllPrice
FROM parts.spareparts p
group by p.part_name_id

I don't think you actually need the subqueries; you can write simply:
SELECT part_name_id,
SUM(quantity) AS AllQuantity,
SUM(price * quantity) AS AllPrice
FROM parts.spare_parts
GROUP
BY part_name_id
;
which should be much more efficient.

Related

Simple POSTGRESQL SELECT query too slow

I have a table that stores logs from an Electronic Invoicing System webservice, this is my SQL Structure
CREATE TABLE public.eis_transactions
(
id bigint NOT NULL DEFAULT nextval('eis_transactions_id_seq'::regclass),
operation_type character varying COLLATE pg_catalog."default",
sale_id integer,
delivery_note_id integer,
sale_credit_note_id integer,
debit_note_id integer,
cdc text COLLATE pg_catalog."default",
transaction_id text COLLATE pg_catalog."default",
response_code character varying COLLATE pg_catalog."default",
response_description text COLLATE pg_catalog."default",
xml text COLLATE pg_catalog."default",
response_xml text COLLATE pg_catalog."default",
response_datetime timestamp without time zone,
created timestamp without time zone,
modified timestamp without time zone,
user_id integer,
async boolean DEFAULT false,
url character varying COLLATE pg_catalog."default",
final_xml text COLLATE pg_catalog."default",
CONSTRAINT eis_transactions_pkey PRIMARY KEY (id),
CONSTRAINT eis_transactions_debit_note_id_fkey FOREIGN KEY (debit_note_id)
REFERENCES public.debit_notes (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_delivery_note_id_fkey FOREIGN KEY (delivery_note_id)
REFERENCES public.delivery_notes (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_sale_credit_note_id_fkey FOREIGN KEY (sale_credit_note_id)
REFERENCES public.sale_credit_notes (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_sale_id_fkey FOREIGN KEY (sale_id)
REFERENCES public.sales (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CONSTRAINT eis_transactions_user_id_fkey FOREIGN KEY (user_id)
REFERENCES public.users (id) MATCH SIMPLE
ON UPDATE RESTRICT
ON DELETE RESTRICT
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.eis_transactions
OWNER to postgres;
-- Index: eis_transactions_id_idx
-- DROP INDEX public.eis_transactions_id_idx;
CREATE INDEX eis_transactions_id_idx
ON public.eis_transactions USING btree
(id ASC NULLS LAST)
TABLESPACE pg_default;
-- Index: eis_transactions_id_idx1
-- DROP INDEX public.eis_transactions_id_idx1;
CREATE INDEX eis_transactions_id_idx1
ON public.eis_transactions USING btree
(id ASC NULLS FIRST)
TABLESPACE pg_default;
-- Index: eis_transactions_id_idx2
-- DROP INDEX public.eis_transactions_id_idx2;
CREATE INDEX eis_transactions_id_idx2
ON public.eis_transactions USING btree
(id DESC NULLS FIRST)
TABLESPACE pg_default;
-- Index: eis_transactions_sale_id_delivery_note_id_sale_credit_note__idx
-- DROP INDEX public.eis_transactions_sale_id_delivery_note_id_sale_credit_note__idx;
CREATE INDEX eis_transactions_sale_id_delivery_note_id_sale_credit_note__idx
ON public.eis_transactions USING btree
(sale_id ASC NULLS LAST, delivery_note_id ASC NULLS LAST, sale_credit_note_id ASC NULLS LAST, debit_note_id ASC NULLS LAST, user_id ASC NULLS LAST)
TABLESPACE pg_default;
Cointains ~800 rows, this is the query:
SELECT * FROM eis_transactions LIMIT 1000;
It takes more than 60 seconds to complete the query.
And this is the EXPLAIN ANALYZE result i got:
EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM eis_transactions LIMIT 100;
Limit (cost=0.00..15.94 rows=100 width=1108) (actual time=0.013..0.121 rows=100 loops=1)
Buffers: shared read=15
-> Seq Scan on eis_transactions (cost=0.00..128.03 rows=803 width=1108) (actual time=0.012..0.106 rows=100 loops=1)
Buffers: shared read=15
Total runtime: 0.180 ms
But doing a SELECT * FROM eis_transactions (With or without LIMIT) will take more than 60 seconds. While i have other tables with more than 1000 and they don't take so long as this particular table.
What could be wrong ?
Thank you !

PostgreSQL query does not use index

Table definition is as follows:
CREATE TABLE public.the_table
(
id integer NOT NULL DEFAULT nextval('the_table_id_seq'::regclass),
report_timestamp timestamp without time zone NOT NULL,
value_id integer NOT NULL,
text_value character varying(255),
numeric_value double precision,
bool_value boolean,
dt_value timestamp with time zone,
exported boolean NOT NULL DEFAULT false,
CONSTRAINT the_table_fkey_valdef FOREIGN KEY (value_id)
REFERENCES public.value_defs (value_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE RESTRICT
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.the_table
OWNER TO postgres;
Indices:
CREATE INDEX the_table_idx_id ON public.the_table USING brin (id);
CREATE INDEX the_table_idx_timestamp ON public.the_table USING btree (report_timestamp);
CREATE INDEX the_table_idx_tsvid ON public.the_table USING brin (report_timestamp, value_id);
CREATE INDEX the_table_idx_valueid ON public.the_table USING btree (value_id);
The query is:
SELECT * FROM the_table r WHERE r.value_id = 1064 ORDER BY r.report_timestamp desc LIMIT 1;
While running the query PostgreSQL does not use the_table_idx_valueid index.
Why?
If anything, this index will help:
CREATE INDEX ON the_table (value_id, report_timestamp);
Depending on the selectivity of the condition and the number of rows in the table, PostgreSQL may correctly deduce that a sequential scan and a sort is faster than an index scan.

Postgres date search slower for less than vs greater than

I am dealing with a weird issue where a date based query runs much slower when using >= vs <=. The execution plans are here:
Slow
Fast
It looks like when it is doing the slow one, it does 3 nested loops and when it is doing the fast one it does a join but I don't get why. I've done vacuum, analyze etc to no result.
Here are the SQLs too
-- Table: public.hfj_spidx_date
-- DROP TABLE public.hfj_spidx_date;
CREATE TABLE public.hfj_spidx_date
(
sp_id bigint NOT NULL,
sp_missing boolean,
sp_name character varying(100) COLLATE pg_catalog."default" NOT NULL,
res_id bigint,
res_type character varying(255) COLLATE pg_catalog."default" NOT NULL,
sp_updated timestamp without time zone,
hash_identity bigint,
sp_value_high timestamp without time zone,
sp_value_low timestamp without time zone,
CONSTRAINT hfj_spidx_date_pkey PRIMARY KEY (sp_id),
CONSTRAINT fk17s70oa59rm9n61k9thjqrsqm FOREIGN KEY (res_id)
REFERENCES public.hfj_resource (res_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.hfj_spidx_date
OWNER to dbadmin;
-- Index: idx_sp_date_hash
-- DROP INDEX public.idx_sp_date_hash;
CREATE INDEX idx_sp_date_hash
ON public.hfj_spidx_date USING btree
(hash_identity, sp_value_low, sp_value_high)
TABLESPACE pg_default;
-- Index: idx_sp_date_resid
-- DROP INDEX public.idx_sp_date_resid;
CREATE INDEX idx_sp_date_resid
ON public.hfj_spidx_date USING btree
(res_id)
TABLESPACE pg_default;
-- Index: idx_sp_date_updated
-- DROP INDEX public.idx_sp_date_updated;
CREATE INDEX idx_sp_date_updated
ON public.hfj_spidx_date USING btree
(sp_updated)
TABLESPACE pg_default;
-------------------------------------
-- Table: public.hfj_res_link
-- DROP TABLE public.hfj_res_link;
CREATE TABLE public.hfj_res_link
(
pid bigint NOT NULL,
src_path character varying(200) COLLATE pg_catalog."default" NOT NULL,
src_resource_id bigint NOT NULL,
source_resource_type character varying(30) COLLATE pg_catalog."default" NOT NULL,
target_resource_id bigint,
target_resource_type character varying(30) COLLATE pg_catalog."default" NOT NULL,
target_resource_url character varying(200) COLLATE pg_catalog."default",
sp_updated timestamp without time zone,
CONSTRAINT hfj_res_link_pkey PRIMARY KEY (pid),
CONSTRAINT fk_reslink_source FOREIGN KEY (src_resource_id)
REFERENCES public.hfj_resource (res_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION,
CONSTRAINT fk_reslink_target FOREIGN KEY (target_resource_id)
REFERENCES public.hfj_resource (res_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.hfj_res_link
OWNER to dbadmin;
-- Index: idx_rl_dest
-- DROP INDEX public.idx_rl_dest;
CREATE INDEX idx_rl_dest
ON public.hfj_res_link USING btree
(target_resource_id)
TABLESPACE pg_default;
-- Index: idx_rl_src
-- DROP INDEX public.idx_rl_src;
CREATE INDEX idx_rl_src
ON public.hfj_res_link USING btree
(src_resource_id)
TABLESPACE pg_default;
-- Index: idx_rl_tpathres
-- DROP INDEX public.idx_rl_tpathres;
CREATE INDEX idx_rl_tpathres
ON public.hfj_res_link USING btree
(src_path COLLATE pg_catalog."default", target_resource_id)
TABLESPACE pg_default;
As I said in my answer to what is pretty much the same question, the problem is the bad estimate in the slow query.
In the fast query PostgreSQL obviously doesn't make the mistake to think that the condition is very selective, so it chooses a different and better plan.

postgres inner JOIN query out of memory

I am trying to consult a database using pgAdmin3 and I need to join to tables. I am using the following code:
SELECT table1.species, table1.trait, table1.value, table1.units, table2.id, table2.family, table2.latitude, table2.longitude, table2.species as speciescheck
FROM table1 INNER JOIN table2
ON table1.species = table2.species
But I keep running this error:
an out of memory error
So I've tried to insert my result in a new table, as follow:
CREATE TABLE new_table AS
SELECT table1.species, table1.trait, table1.value, table1.units, table2.id, table2.family, table2.latitude, table2.longitude, table2.species as speciescheck
FROM table1 INNER JOIN table2
ON table1.species = table2.species
And still got an error:
ERROR: could not extend file "base/17675/43101.15": No space left on device
SQL state: 53100
Hint: Check free disk space.
I am very very new at this (is the first time I have to deal with PostgreSQL) and I guess I can do something to optimize this query and avoid this type of error. I have no privileges in the database. Can anyone help??
Thanks in advance!
Updated:
Table 1 description
-- Table: table1
-- DROP TABLE table1;
CREATE TABLE table1
(
species character varying(100),
trait character varying(50),
value double precision,
units character varying(50)
)
WITH (
OIDS=FALSE
);
ALTER TABLE table1
OWNER TO postgres;
GRANT ALL ON TABLE table1 TO postgres;
GRANT SELECT ON TABLE table1 TO banco;
-- Index: speciestable1_idx
-- DROP INDEX speciestable1_idx;
CREATE INDEX speciestable1_idx
ON table1
USING btree
(species COLLATE pg_catalog."default");
-- Index: traittype_idx
-- DROP INDEX traittype_idx;
CREATE INDEX traittype_idx
ON table1
USING btree
(trait COLLATE pg_catalog."default");
and table2 as:
-- Table: table2
-- DROP TABLE table2;
CREATE TABLE table2
(
id integer NOT NULL,
family character varying(40),
species character varying(100),
plotarea real,
latitude double precision,
longitude double precision,
source integer,
latlon geometry,
CONSTRAINT table2_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE table2
OWNER TO postgres;
GRANT ALL ON TABLE table2 TO postgres;
GRANT SELECT ON TABLE table2 TO banco;
-- Index: latlon_gist
-- DROP INDEX latlon_gist;
CREATE INDEX latlon_gist
ON table2
USING gist
(latlon);
-- Index: species_idx
-- DROP INDEX species_idx;
CREATE INDEX species_idx
ON table2
USING btree
(species COLLATE pg_catalog."default");
You're performing a join between two tables on the column species.
Not sure what's in your data, but if species is a column with significantly fewer values than the number of records (e.g. if species is "elephant", "giraffe" and you're analyzing all animals in Africa), this join will match every elephant with every elephant.
When joining two tables most of the time you try to use a unique or close to unique attribute, like id (not sure what id means in your case, but could be it).

postgresql simple select is slow

i have a table:
CREATE TABLE my_table
(
id integer NOT NULL DEFAULT nextval('seq_my_table_id'::regclass),
fk_id1 integer NOT NULL,
fk_id2 smallint NOT NULL,
name character varying(255) NOT NULL,
description text,
currency_name character varying(3) NOT NULL,
created timestamp with time zone NOT NULL DEFAULT now(),
updated timestamp with time zone NOT NULL DEFAULT now(),
CONSTRAINT "PK_my_table_id" PRIMARY KEY (id ),
CONSTRAINT "FK_my_table_fk_id1" FOREIGN KEY (fk_id1)
REFERENCES my_table2 (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED,
CONSTRAINT "FK_my_table_fk_id2" FOREIGN KEY (fk_id2)
REFERENCES my_table3 (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED
)
WITH (
OIDS=FALSE,
autovacuum_enabled=true,
autovacuum_vacuum_threshold=50,
autovacuum_vacuum_scale_factor=0.2,
autovacuum_analyze_threshold=50,
autovacuum_analyze_scale_factor=0.1,
autovacuum_vacuum_cost_delay=20,
autovacuum_vacuum_cost_limit=200,
autovacuum_freeze_min_age=50000000,
autovacuum_freeze_max_age=200000000,
autovacuum_freeze_table_age=150000000
);
ALTER TABLE my_table
OWNER TO postgres;
CREATE INDEX my_table_fk_id1
ON my_table
USING btree
(fk_id1 );
CREATE INDEX my_table_fk_id2
ON my_table
USING btree
(fk_id2 );
tables records count
select count(id) from my_table; --24061
select count(id) from my_table2; --24061
select count(id) from my_table3; --123
execution time
select * from my_table -- ~17sec
vacuum/analyze - no effect
description - length ~ 4000 chars in each row
postgres.conf - standart settings
Version: 9.1
select all fields except description reduce execution time to ~1,5 sec
How to icrease select speed with description ?
upd
--explain analyze select * from my_table
"Seq Scan on my_table (cost=0.00..3425.79 rows=24079 width=1015) (actual time=0.019..17.238 rows=24079 loops=1)"
"Total runtime: 18.649 ms"
The question is how to make this fast. The issue is not on the server since it takes 18ms there. The simple solution is to select fewer columns so that there is less to transfer over the network. My guess is that you have long descriptions on some. Leave that column off your select and try again.