**Second issue - productivity*4.
There are two tables test.purchase and test.stock. After filling test.purchase with INSERT query and 4 triggers which are calculate values in this table.
INSERT INTO test.purchase(
import_id, product_id, euro_price, qty, expiry_date)
VALUES (2,125,16,27,'2021-03-03');
Everything calculated and inserted all values well, but each new query executed much slowly than previous, for example, this query for the row #87 was executed in more than two minutes:
Query returned successfully in 2 min 22 secs.
Delay progressed so fast that looks like if it will be query for the row #1000, it will be executed tomorrow (((.
Please advise.
CREATE TABLE test.purchase
(
import_id integer,
product_id integer,
usd_price numeric(10,2),
euro_price numeric(10,2),
qty integer,
euro_stock_price numeric(10,2),
expiry_date date,
euro_stock_amt numeric(10,2),
CONSTRAINT purchase_product_id_import_id_key UNIQUE (product_id, import_id),
CONSTRAINT purchase_import_id_fkey FOREIGN KEY (import_id)
REFERENCES test.imports (import_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION,
CONSTRAINT purchase_product_id_fkey FOREIGN KEY (product_id)
REFERENCES test.products (product_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION
);
CREATE TRIGGER "0_tr_purchase_convert_price_to_euro"
AFTER INSERT OR UPDATE OF usd_price
ON test.purchase
FOR EACH ROW
EXECUTE PROCEDURE test.fn_purchase_convert_price_to_euro();
CREATE FUNCTION test.fn_purchase_convert_price_to_euro()
RETURNS trigger
LANGUAGE 'plpgsql'
COST 100
VOLATILE NOT LEAKPROOF
AS $BODY$
BEGIN
UPDATE test.purchase pr
SET euro_price = usd_price / i.rate
FROM test.imports i
WHERE pr.import_id = i.import_id
AND pr.usd_price IS NOT NULL;
RETURN NEW;
END
$BODY$;
CREATE TRIGGER "1_tr_purchase_euro_stock_price"
AFTER INSERT OR UPDATE OF euro_price
ON test.purchase
FOR EACH ROW
EXECUTE PROCEDURE test.fn_purchase_euro_stock_price();
CREATE FUNCTION test.fn_purchase_euro_stock_price()
RETURNS trigger
LANGUAGE 'plpgsql'
COST 100
VOLATILE NOT LEAKPROOF
AS $BODY$
BEGIN
UPDATE test.purchase pr
SET euro_stock_price = euro_price + euro_price * i.costs_per_euro
FROM test.imports i
WHERE pr.import_id = i.import_id;
RETURN NEW;
END
$BODY$;
CREATE TRIGGER "2_tr_purchase_euro_stock_amt"
AFTER INSERT OR UPDATE OF qty, euro_stock_price
ON test.purchase
FOR EACH ROW
EXECUTE PROCEDURE test.fn_purchase_euro_stock_amt();
CREATE FUNCTION test.fn_purchase_stock_amount_in()
RETURNS trigger
LANGUAGE 'plpgsql'
COST 100
VOLATILE NOT LEAKPROOF
AS $BODY$
BEGIN
UPDATE test.stock s
SET
amount_in = euro_stock_amt
FROM test.purchase pr
WHERE
pr.import_id = s.import_id
AND
pr.product_id = s.product_id
;
RETURN NEW;
END
$BODY$;
CREATE TRIGGER "3_tr_purchase_stock_plus_new"
AFTER INSERT
ON test.purchase
FOR EACH ROW
EXECUTE PROCEDURE test.fn_purchase_stock_plus_new();
CREATE FUNCTION test.fn_purchase_stock_plus_new()
RETURNS trigger
LANGUAGE 'plpgsql'
COST 100
VOLATILE NOT LEAKPROOF
AS $BODY$
BEGIN
INSERT INTO test.stock(
product_id,
import_id,
expiry_date,
stock_in
)
VALUES(
NEW.product_id,
NEW.import_id,
NEW.expiry_date,
NEW.qty
);
RETURN NEW;
END
$BODY$;
CREATE TRIGGER "4_tr_purchase_stock_amount_in"
AFTER INSERT
ON test.purchase
FOR EACH ROW
EXECUTE PROCEDURE test.fn_purchase_stock_amount_in();
CREATE FUNCTION test.fn_purchase_stock_amount_in()
RETURNS trigger
LANGUAGE 'plpgsql'
COST 100
VOLATILE NOT LEAKPROOF
AS $BODY$
BEGIN
UPDATE test.stock s
SET
amount_in = euro_stock_amt
FROM test.purchase pr
WHERE
pr.import_id = s.import_id
AND
pr.product_id = s.product_id
;
RETURN NEW;
END
$BODY$;
CREATE TABLE test.stock
(
product_id integer,
import_id integer,
expiry_date date,
stock_in integer,
stock_out integer,
stock_balance integer,
amount_in numeric(10,2),
amount_sold numeric(10,2),
amount_balance numeric(10,2),
CONSTRAINT stock_product_id_import_id_key UNIQUE (product_id, import_id),
CONSTRAINT stock_import_id_fkey FOREIGN KEY (import_id)
REFERENCES test.imports (import_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION,
CONSTRAINT stock_product_id_fkey FOREIGN KEY (product_id)
REFERENCES test.products (product_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION,
CONSTRAINT stock_nonnegative CHECK (stock_balance >= 0)
);
CREATE TRIGGER tr_stock_balance_update
AFTER INSERT OR UPDATE OF stock_out
ON test.stock
FOR EACH ROW
EXECUTE PROCEDURE test.fn_stock_balance_update();
CREATE FUNCTION test.fn_stock_balance_update()
RETURNS trigger
LANGUAGE 'plpgsql'
COST 100
VOLATILE NOT LEAKPROOF
AS $BODY$
BEGIN
UPDATE
test.stock s
SET
stock_balance = stock_in - stock_out,
amount_balance = amount_sold - amount_in
WHERE
NEW.product_id = s.product_id
AND
NEW.expiry_date = s.expiry_date;
RETURN NEW;
END
$BODY$;
EXPLAIN (ANALYSE,BUFFERS,SETTINGS)
UPDATE test.stock s
SET
amount_in = euro_stock_amt
FROM test.purchase pr
WHERE
pr.import_id = s.import_id
AND
pr.product_id = s.product_id
;
Update on stock s (cost=18.18..1951.72 rows=45 width=54) (actual time=1.781..1.782 rows=0 loops=1).
Buffers: shared hit=540
-> Nested Loop (cost=18.18..1951.72 rows=45 width=54) (actual time=0.051..1.108 rows=88 loops=1)
Buffers: shared hit=354
-> Seq Scan on stock s (cost=0.00..2.88 rows=88 width=42) (actual time=0.015..0.118 rows=88 loops=1)
Buffers: shared hit=2
-> Bitmap Heap Scan on purchase pr (cost=18.18..22.19 rows=1 width=20) (actual time=0.007..0.007 rows=1 loops=88)
Recheck Cond: ((product_id = s.product_id) AND (import_id = s.import_id))
Heap Blocks: exact=88
Buffers: shared hit=352
-> Bitmap Index Scan on purchase_product_id_import_id_key (cost=0.00..18.18 rows=1 width=0) (actual time=0004..0.004 rows=1 loops=88)
Index Cond: ((product_id = s.product_id) AND (import_id = s.import_id))
Buffers: shared hit=264
Planning Time: 0.751 ms
Execution Time: 1.914 ms
(15 rows)
EXPLAIN (ANALYSE,BUFFERS,SETTINGS)
INSERT INTO test.stock(
product_id,
import_id,
expiry_date,
stock_in
)
VALUES
(
NEW.product_id,
NEW.import_id,
NEW.expiry_date,
NEW.qty
);
ERROR: missing FROM-clause entry for table "new"
LINE 10: NEW.product_id,
It's very strange result because it's direct insert from one table another and it works well
EXPLAIN (ANALYSE,BUFFERS,SETTINGS)
UPDATE test.purchase pr
SET euro_stock_price = euro_price + euro_price * i.costs_per_euro
FROM test.imports i
WHERE pr.import_id = i.import_id;
RETURN NEW;
END;
Update on purchase pr (cost=2.07..139.16 rows=88 width=60) (actual time=1.128..1.129 rows=0 loops=1)
Buffers: shared hit=627
-> Hash Join (cost=2.07..139.16 rows=88 width=60) (actual time=0.179..0.469 rows=88 loops=1)
Hash Cond: (pr.import_id = i.import_id)
Buffers: shared hit=140
-> Seq Scan on purchase pr (cost=0.00..135.88 rows=88 width=38) (actual time=0.117..0.199 rows=88 loops=1)
Buffers: shared hit=135
-> Hash (cost=2.03..2.03 rows=3 width=14) (actual time=0.029..0.030 rows=3 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
Buffers: shared hit=2
-> Seq Scan on imports i (cost=0.00..2.03 rows=3 width=14) (actual time=0.004..0.013 rows=3 loops=1)
Buffers: shared hit=2
Time: 0.303 ms
Trigger 2_tr_purchase_euro_stock_amt: time=319.528 calls=88
Execution Time: 320.858 ms
(15 rows)
EXPLAIN (ANALYSE,BUFFERS,SETTINGS)
UPDATE test.purchase pr
SET euro_stock_amt = euro_stock_price * qty
;
Update on purchase pr (cost=0.00..69.54 rows=88 width=54) (actual time=0.684..0.684 rows=0 loops=1)
Buffers: shared hit=535 dirtied=61
-> Seq Scan on purchase pr (cost=0.00..69.54 rows=88 width=54) (actual time=0.059..0.164 rows=88 loops=1)
Buffers: shared hit=68
Planning Time: 0.140 ms
Execution Time: 0.734 ms
(6 rows)
EXPLAIN (ANALYSE,BUFFERS,SETTINGS)
UPDATE test.purchase pr
SET euro_price = usd_price / i.rate
FROM test.imports i
WHERE pr.import_id = i.import_id
AND pr.usd_price IS NOT NULL;
Update on purchase pr (cost=2.07..71.28 rows=29 width=61) (actual time=0.448..0.449 rows=0 loops=1)
Buffers: shared hit=130 dirtied=3
-> Hash Join (cost=2.07..71.28 rows=29 width=61) (actual time=0.047..0.258 rows=29 loops=1)
Hash Cond: (pr.import_id = i.import_id)
Buffers: shared hit=70
-> Seq Scan on purchase pr (cost=0.00..68.88 rows=29 width=39) (actual time=0.012..0.138 rows=29 loops=1)
Filter: (usd_price IS NOT NULL)
Rows Removed by Filter: 59
Buffers: shared hit=68
-> Hash (cost=2.03..2.03 rows=3 width=17) (actual time=0.017..0.018 rows=3 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
Buffers: shared hit=2
-> Seq Scan on imports i (cost=0.00..2.03 rows=3 width=17) (actual time=0.004..0.009 rows=3 loops=1)
Buffers: shared hit=2
Planning Time: 0.447 ms
Trigger 1_tr_purchase_euro_stock_price: time=148624.503 calls=29
Execution Time: 148625.095 ms
(17 rows)
Looks like fn_purchase_convert_price_to_euro is the source of slowing, so I need to re-write it.
Thanks to everybody for advises )
I'm having some doubts about the logic in the triggers. This is just one example of an update, that is not needed (I think):
UPDATE test.purchase pr
SET euro_price = usd_price / i.rate
FROM test.imports i
WHERE pr.import_id = i.import_id
AND pr.usd_price IS NOT NULL;
This one fires after the INSERT, see trigger 0_tr_purchase_convert_price_to_euro. You first INSERT the data, then the TRIGGER fires and it updates the record you just inserted. Why? I would have created the euro_price before the INSERT and then INSERT the correct value. And there wouldn't be a need for an UPDATE.
The other functions that do updates, have the same issues.
Related
I would like to have a function that returns a TABLE. I know the user can use the function call in selects and joins just like a table. However, will the select/join be able to use the indexes of the source table that the function TABLE returned?
For example:
Will "select id from permitted_resources() where id = 1" be the same as "select id from resources where id = 5"? (Assuming that there is an index on resources table id column.)
CREATE OR REPLACE FUNCTION permitted_resources()
RETURNS TABLE (id int, name varchar(10)) AS
$func$
BEGIN
RETURN QUERY
SELECT r.id, r.name from resources r;
END
$func$ LANGUAGE plpgsql;
Will "select id from permitted_resources() where id = 1" be the same as "select id from resources where id = 5"?
No it will not. A PL/pgSQL function is a black box for the optimizer.
If you want to achieve something like that, use a language sql function:
CREATE OR REPLACE FUNCTION permitted_resources()
RETURNS TABLE (id int, name varchar(10)) AS
$func$
SELECT r.id, r.name from resources r;
$func$
LANGUAGE sql
stable;
We can test this with the following setup:
create table test
(
id integer primary key,
some_nr integer default random() * 1000 + 1,
some_date date default current_date,
some_text text default md5(random()::text)
);
insert into test (id)
select *
from generate_series(1,1e6);
Now create one PL/pgSQL function:
create function get_data1()
returns setof test
as
$$
begin
return query
select *
from test;
end;
$$
language plpgsql
stable;
And a SQL function:
create function get_data2()
returns setof test
as
$$
select *
from test;
$$
language sql
stable;
Let's see how the execution plans look:
explain (analyze)
select *
from get_data1() -- this is the PL/pgSQL function
where id = 1234;
Yields the following execution plan:
Function Scan on get_data1 (cost=0.25..4.75 rows=5 width=44) (actual time=261.033..361.218 rows=1 loops=1)
Filter: (id = 1234)
Rows Removed by Filter: 999999
Planning Time: 0.033 ms
Execution Time: 371.302 ms
Apparently it first retrieves all rows, then discards them again
However,
explain (analyze)
select *
from get_data2() -- the "SQL" function
where id = 1234;
Yields the following execution plan:
Index Scan using test_pkey on test (cost=0.42..2.43 rows=1 width=45) (actual time=0.015..0.017 rows=1 loops=1)
Index Cond: (id = 1234)
Planning Time: 0.119 ms
Execution Time: 0.031 ms
The function isn't even mentioned anymore in the plan. Not surprisingly, a plain select yields the same plan:
explain (analyze)
select *
from test
where id = 1234;
Index Scan using test_pkey on test (cost=0.42..2.43 rows=1 width=45) (actual time=0.014..0.014 rows=1 loops=1)
Index Cond: (id = 1234)
Planning Time: 0.058 ms
Execution Time: 0.026 ms
I don't know if this holds true for more complex queries, but a simple join between such a function and another table shows the same behaviour.
create table public.tabla
(
cod_tabla bigint not null,
tabla varchar(31) not null,
constraint pk_tabla primary key (cod_tabla)
);
create table public.entidad
(
cod_entidad bigint not null,
cod_tabla bigint not null,
cod_entidad_tabla bigint not null,
constraint pk_entidad primary key (cod_entidad),
constraint fk_tabla_entidad foreign key (cod_tabla)
references public.tabla (cod_tabla) match simple
on update cascade
on delete cascade
);
CREATE INDEX idx_tabla_entidad
ON public.entidad USING btree
(cod_tabla ASC NULLS LAST);
CREATE INDEX idx_entidad_tabla_4
ON public.entidad USING btree
(cod_entidad_tabla ASC NULLS LAST)
INCLUDE(cod_entidad, cod_tabla, cod_entidad_tabla)
WHERE cod_tabla::bigint = 4;
I think postgresql doesn't use the index idx_entidad_tabla_4,
Postgresql is sequentially scanning the entire table applying the where condition
explain (analyze, buffers, format text) select * from entidad where cod_tabla = 4
Index Scan using idx_tabla_entidad on entidad (cost=0.56..51121.41 rows=1405216 width=20) (actual time=0.037..242.609 rows=1409985 loops=1)
Index Cond: ((cod_tabla)::bigint = 4)
Buffers: shared hit=12839
Planning Time: 0.158 ms
Execution Time: 311.828 ms
SELECT count(*) from entidad;
34.413.354
SELECT count(*) from entidad where cod_tabla = 4;
1.409.985
My questions are:
Why doesn't it use the index idx_entidad_tabla_4?
How could I force its use?
It is working properly insert update delete copy etc work as planed
But for some reason select is slow.
This is partitioned table:
/** TABLE PARTITIONING EVENT RECORD **/
-- CREATE PROPER SCHEMA
CREATE SCHEMA IF NOT EXISTS test_par_pool;
-- CREATE PROPER TABLE
CREATE TABLE test_part
(
id bigserial not null
constraint test_part_pkey
primary key,
device_id bigint,
device_type bigint,
record_time timestamp,
module_serial_number bigint,
module_id bigint,
message_type bigint,
event_code bigint,
device_status bytea,
sequence_number bigint,
data_bytes bigint,
device_data bytea,
active boolean,
deleted boolean,
created_time timestamp default now() not null,
created_on timestamp with time zone default now() not null,
updated_on timestamp with time zone default now() not null
);
-- CREATE MINIMAL INDEXES
CREATE INDEX idx_device_id
ON public.test_part USING brin
(device_id)
TABLESPACE pg_default;
CREATE INDEX idx_module_id
ON public.test_part USING brin
(module_id)
TABLESPACE pg_default;
CREATE INDEX idx_er_created_time
ON public.test_part (cast(created_time as DATE));
-- CREATE INSERT FUNCTIONS
CREATE OR REPLACE FUNCTION test_par_insert_function()
RETURNS TRIGGER AS
$$
DECLARE
partition_date TEXT;
partition TEXT;
start_of_month TEXT;
end_of_next_month TEXT;
stmt TEXT;
BEGIN
partition_date := to_char(NEW.created_time, 'YYYY_MM');
partition := TG_RELNAME || '_' || partition_date;
start_of_month := to_char((NEW.created_time), 'YYYY-MM') || '-01';
end_of_next_month := to_char((NEW.created_time + interval '1 month'), 'YYYY-MM') || '-01';
IF NOT EXISTS(SELECT relname FROM pg_class WHERE relname = partition) THEN
RAISE NOTICE 'A partition has been created %',partition;
stmt = 'CREATE TABLE test_par_pool.' || partition || ' (check (date_trunc(''day'', created_time) >= '
|| chr(39) || start_of_month || chr(39)
|| ' AND date_trunc(''day'', created_time) < '
|| chr(39) || end_of_next_month
|| chr(39) || ' )) INHERITS ( public.' || TG_RELNAME ||
');';
EXECUTE stmt;
END IF;
EXECUTE 'INSERT INTO test_par_pool.' || partition ||
' SELECT( public.' || TG_RELNAME || ' ' || quote_literal(NEW) || ').* RETURNING id;';
RETURN NULL;
END
$$
LANGUAGE plpgsql;
-- CREATE TRIGGER
CREATE TRIGGER insert_test_part_trigger
BEFORE INSERT ON public.test_part
FOR EACH ROW EXECUTE PROCEDURE public.test_par_insert_function();
Simple partitioned table, trying to analyze select count(*);
There is about 1.5 million records in database. Is there a valid reason why the query would take so much time?
Explain analyse select count(*) from public.test_part;
Finalize Aggregate (cost=41076.07..41076.08 rows=1 width=8) (actual time=243.842..243.842 rows=1 loops=1)
-> Gather (cost=41075.75..41076.06 rows=3 width=8) (actual time=243.477..267.547 rows=4 loops=1)
Workers Planned: 3
Workers Launched: 3
-> Partial Aggregate (cost=40075.75..40075.76 rows=1 width=8) (actual time=165.999..165.999 rows=1 loops=4)
-> Parallel Append (cost=0.00..38793.96 rows=512716 width=0) (actual time=0.025..130.111 rows=397354 loops=4)
-> Parallel Seq Scan on test_part_2019_11 (cost=0.00..11934.27 rows=171427 width=0) (actual time=0.022..41.875 rows=132856 loops=4)
-> Parallel Seq Scan on test_part_2019_10 (cost=0.00..10984.80 rows=157780 width=0) (actual time=0.018..56.238 rows=244560 loops=2)
-> Parallel Seq Scan on test_part_2019_12 (cost=0.00..8505.66 rows=151466 width=0) (actual time=0.017..47.168 rows=181759 loops=2)
-> Parallel Seq Scan on test_part_2019_09 (cost=0.00..4805.65 rows=85565 width=0) (actual time=0.009..36.941 rows=205356 loops=1)
-> Parallel Seq Scan on test_part (cost=0.00..0.00 rows=1 width=0) (actual time=0.002..0.002 rows=0 loops=1)
Planning Time: 0.179 ms
Execution Time: 267.587 ms
Partitioned tables can gain performance by allowing the planner to eliminate partitions when planning queries.
For this reason you should try to always include your partition key in the where clause of your queries, such that it overlaps as few partitions as possible (ideally 1).
Because you're making a query that does not use the partition key as a filter, postgres has to query every partition, which is possibly even slower than just using a single large table.
I suggest that you give this page a read, as it provides a lot of good information on partitioning in postgres: https://www.postgresql.org/docs/12/ddl-partitioning.html
I have stumbled upon very weird issue with my SQL functions. They appear to have different execution plans between function language SQL and language plpgsql but I cannot tell what execution plan is set for SQL version, since it requires this: Function's final statement must be SELECT or INSERT/UPDATE/DELETE RETURNING. and won't let me use EXPLAIN.
As for why I know they have different plans, it is because SQL version fails to execute, complaining it cannot connect to one of the foreign servers that is currently taken down. Connection is done using foreign tables and that table is partitioned by date (column date_col) with some of its partitions being physically on the same server and some on foreign. Date parameter used in function makes sure it should only scan one partition and that partition is on the same server. This is also shown in explain below used on plain SQL (not in function):
Append (cost=2.77..39.52 rows=2 width=36)
CTE ct
-> Result (cost=0.00..0.51 rows=100 width=4)
InitPlan 2 (returns $1)
-> Aggregate (cost=2.25..2.26 rows=1 width=32)
-> CTE Scan on ct (cost=0.00..2.00 rows=100 width=4)
-> Seq Scan on table1 (cost=0.00..0.00 rows=1 width=36)
Filter: ((date_col = '2017-07-30'::date) AND (some_col = ANY ($1)))
-> Seq Scan on "part$_table1_201707" (cost=0.00..36.75 rows=1 width=36)
Filter: ((date_col = '2017-07-30'::date) AND (some_col = ANY ($1)))
Foreign partitions are before year 2017 and it shows that planner chooses correct partition and does not bother scanning any others. This is true for plain SQL and plpgsql function but not for sql function. Why could that be and can I avoid it without rewriting my functions?
From what I figured, there must be some difference between how parameters are passed in SQL function, since hard coding date in it prevents query from scanning unnecessary partitions. Maybe something like that happens:
WITH ct AS (SELECT unnest(array[1,2]) AS arr)
SELECT col1, col2
FROM table1
WHERE date_col = (SELECT '2017-07-30'::date)
AND some_col = ANY((SELECT array_agg(arr) FROM ct)::int[])
Producing such EXPLAIN:
Append (cost=2.78..183.67 rows=3 width=36)
CTE ct
-> Result (cost=0.00..0.51 rows=100 width=4)
InitPlan 2 (returns $1)
-> Result (cost=0.00..0.01 rows=1 width=4)
InitPlan 3 (returns $2)
-> Aggregate (cost=2.25..2.26 rows=1 width=32)
-> CTE Scan on ct (cost=0.00..2.00 rows=100 width=4)
-> Seq Scan on table1 (cost=0.00..0.00 rows=1 width=36)
Filter: ((date_col = $1) AND (some_col = ANY ($2)))
-> Seq Scan on "part$_table1_201707" (cost=0.00..36.75 rows=1 width=36)
Filter: ((date_col = $1) AND (some_col = ANY ($2)))
-> Foreign Scan on "part$_table1_201603" (cost=100.00..144.14 rows=1 width=36)
For the reference, you can reproduce issue on PostgreSQL 9.6.4 using code below:
CREATE SERVER broken_server FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (host 'broken_server', dbname 'postgres',
port '5432');
CREATE USER MAPPING FOR postgres SERVER broken_server
OPTIONS (user 'foreign_username', password 'foreign_password');
CREATE TABLE table1 (id serial PRIMARY KEY, date_col date,
some_col int, col1 int, col2 text);
CREATE TABLE part$_table1_201707 ()
INHERITS (table1);
ALTER TABLE part$_table1_201707 ADD CONSTRAINT part$_table1_201707_date_chk
CHECK (date_col BETWEEN '2017-07-01'::date AND '2017-07-31'::date);
CREATE FOREIGN TABLE part$_table1_201603 ()
INHERITS (table1) SERVER broken_server
OPTIONS (schema_name 'public', table_name 'part$_table1_201603');
ALTER TABLE part$_table1_201603 ADD CONSTRAINT part$_table1_201603_date_chk
CHECK (date_col BETWEEN '2016-03-01'::date AND '2016-03-31'::date);
CREATE OR REPLACE FUNCTION function_plpgsql(param1 date, param2 int[])
RETURNS TABLE(col1 int, col2 text)
LANGUAGE plpgsql
SECURITY DEFINER
AS $function$
BEGIN
--
RETURN QUERY
WITH ct AS (SELECT unnest(param2) AS arr)
SELECT t.col1, t.col2
FROM table1 AS t
WHERE date_col = param1
AND some_col = ANY((SELECT array_agg(arr) FROM ct)::int[]); --reasons
--
END;
$function$;
CREATE OR REPLACE FUNCTION function_sql(param1 date, param2 int[])
RETURNS TABLE(col1 int, col2 text)
LANGUAGE SQL
SECURITY DEFINER
AS $function$
--
WITH ct AS (SELECT unnest(param2) AS arr)
SELECT t.col1, t.col2
FROM table1 AS t
WHERE date_col = param1
AND some_col = ANY((SELECT array_agg(arr) FROM ct)::int[])
--
$function$;
CREATE OR REPLACE FUNCTION function_sql_hardcoded(param1 date, param2 int[])
RETURNS TABLE(col1 int, col2 text)
LANGUAGE SQL
SECURITY DEFINER
AS $function$
--
WITH ct AS (SELECT unnest(param2) AS arr)
SELECT t.col1, t.col2
FROM table1 AS t
WHERE date_col = '2017-07-30'::date
AND some_col = ANY((SELECT array_agg(arr) FROM ct)::int[])
--
$function$;
EXPLAIN ANALYZE
SELECT * FROM function_sql('2017-07-30'::date, array[1,2]);
-- ERROR: could not connect to server "broken_server"
EXPLAIN ANALYZE
SELECT * FROM function_plpgsql('2017-07-30'::date, array[1,2]);
--works
EXPLAIN ANALYZE
SELECT * FROM function_sql_hardcoded('2017-07-30'::date, array[1,2]);
--works, but useless
https://www.postgresql.org/docs/current/static/ddl-partitioning.html
Constraint exclusion only works when the query's WHERE clause contains
constants (or externally supplied parameters). For example, a
comparison against a non-immutable function such as CURRENT_TIMESTAMP
cannot be optimized, since the planner cannot know which partition the
function value might fall into at run time.
that would explain scanning unnecessary partitions - plpgsql processes query before giving it to optimyzer I assume, and sql function with constant should work. as well as prepared statement I guess. but comparing attribute value to function parameter is probably not the suitable case :)
This query runs very slow. Why? Others are fine. Indexes are good, I think.
explain analyze
select "e_inst"."si_id" as "c0"
from "e_inst" as "e_inst"
group by "e_inst"."si_id"
order by "e_inst"."si_id" ASC NULLS LAST
Query Plan:
Sort (cost=12221.87..12221.90 rows=68 width=4) (actual time=1115.377..1115.433 rows=81 loops=1)
Sort Key: si_id
Sort Method: quicksort Memory: 28kB
-> HashAggregate (cost=12221.25..12221.45 rows=68 width=4) (actual time=1115.198..1115.261 rows=81 loops=1)
-> Seq Scan on e_inst (cost=0.00..11920.07 rows=602357 width=4) (actual time=0.021..611.570 rows=602357 loops=1)
Total runtime: 1115.538 ms
Create table and indexes:
CREATE TABLE e_inst (
id integer NOT NULL,
ip numeric,
gu character varying,
referrer character varying,
proc integer,
loke_id integer,
top_id integer,
si_id integer,
kop integer,
count integer,
created integer,
modified integer,
timepop integer,
count_active character varying,
country character(3),
info character varying
);
CREATE INDEX "topEnhance" ON e_inst USING btree (created, top_id);
CREATE INDEX "procEnhance" ON e_inst USING btree (created, proc);
CREATE INDEX "countryEnhance" ON e_install USING btree (created, country);
CREATE INDEX "createdE" ON e_inst USING btree (created);
ALTER TABLE e_inst CLUSTER ON "createdE";
CREATE INDEX "lokeE" ON e_inst USING btree (loke_id);
CREATE INDEX "lokeEnhance" ON e_inst USING btree (created, loke_id);
CREATE INDEX "siE" ON e_inst USING btree (si_id);
CREATE INDEX "siEnhance" ON e_inst USING btree (created, si_id);
CREATE INDEX "kopEnhance" ON e_inst USING btree (created, kop);
Indexes aren't going to be used by a query which processes the whole table.
The fact is you are retrieving and processing 600k records. That it does this in just over a second is actually kind of impressive.
Now in this case, you are trying to pull out the 81 distinct values from the 600k records. what you may want to do is to construct a recursive query such that it fetches one row 81 times. This may be faster but there is no guarantee. Normally I use these where there are far fewer rows returned. However here is an example:
WITH RECURSIVE sparse_scan AS (
SELECT min(si_id) as si_id FROM e_inst
UNION ALL
SELECT min(si_id) as si_id
FROM e_inst
JOIN (select max(si_id) as last FROM sparse_scan) s
WHERE s.last < si_id
)
SELECT si_id as c0 FROM sparse_scan;
Note that this replaces a sequential scan with 81 index scans.
Upgraded to PostgreSQL 9.2. That's now an index only scan!
Works good, thanks for a_horse_with_no_name who advised me to upgrade.