User Defined Function performance tuning PostgreSQL - postgresql

I have the following function which is taking around 6secs time to complete the execution on 25,000 records.
Function:
CREATE OR REPLACE FUNCTION public.fun_getData
(
p_pin VARCHAR(15),
p_datetime TIMESTAMP
)
RETURNS NUMERIC(5,1)
AS
$BODY$
DECLARE v_pfcode VARCHAR(20);
DECLARE v_starttime TIMESTAMP;
DECLARE v_endtime TIMESTAMP;
DECLARE v_num NUMERIC(5,2);
DECLARE v_gpdata NUMERIC(5,2);
DECLARE v_timedata NUMERIC(5,2);
DECLARE v_sales VARCHAR(10);
BEGIN
SELECT col_sale INTO v_sales FROM public.tbl_Sales WHERE sales_id ='A01';
IF v_sales = '-'
THEN
RETURN 0;
END IF;
SELECT pfcode, gpdata INTO v_pfcode,v_gpdata
FROM public.tbl_fdata
WHERE fpin=p_pin;
v_gpdata := COALESCE(v_gpdata,0);
IF v_pfcode IS NULL
THEN
RETURN v_gpdata;
END IF;
SELECT StartTime, EndTime, num INTO v_starttime,v_endtime,v_num
FROM public.salesyears
WHERE s_pfcode =v_pfcode AND y.year = date_part('year',p_datetime);
IF v_starttime IS NULL OR v_endtime IS NULL
THEN
RETURN v_gpdata;
END IF;
IF v_starttime < v_endtime
THEN
IF p_datetime >= v_starttime AND p_datetime < v_endtime THEN
v_timedata := v_gpdata + v_num;
ELSE
v_timedata := v_gpdata;
END IF;
ELSE
IF p_datetime >= v_endtime AND p_datetime < v_starttime THEN
v_timedata := v_gpdata;
ELSE
v_timedata := v_gpdata + v_num;
END IF;
END IF;
RETURN v_timedata;
END;
$BODY$
LANGUAGE plpgsql;
Function Call:
select 120*public.fun_getData(col_pin, modifieddate)
from public.tbl_prddata
where code = 'XMOP';
Taking around 00:00:06 time to execute.
Note: Same function I have created in SQL Server environment on same data set, it gets executed with second.
Execution Plan: explain(analyze,verbose,buffers)
"Seq Scan on public.tbl_prddata (cost=0.00..8141.61 rows=23647 width=32) (actual time=0.253..5970.663 rows=25011 loops=1)"
" Output: ('120'::numeric * public.fun_getData((col_pin)::character varying, (modifieddate)::timestamp without time zone))"
" Filter: ((tbl_prddata.code)::sys.""varchar"" = 'XMOP'::sys.""varchar"")"
" Rows Removed by Filter: 86508"
" Buffers: shared hit=116845"
"Query Identifier: 5592079453045444499"
"Planning Time: 0.076 ms"
"Execution Time: 5980.916 ms"
Edit: Added more execution plan's:
Query 1:
EXPLAIN(ANALYZE, VERBOSE, BUFFERS)
SELECT col_sale FROM public.tbl_Sales WHERE sales_id ='A01';
--Output:
"Index Scan using idx_tbl_Sales_colob on public.tbl_Sales (cost=0.14..8.16 rows=1 width=7) (actual time=0.250..0.251 rows=1 loops=1)"
" Output: col_sale"
" Index Cond: ((tbl_Sales.sales_id)::sys.""varchar"" = 'A01'::sys.""varchar"")"
" Buffers: shared hit=9"
"Query Identifier: 6306884246769163476"
"Planning:"
" Buffers: shared hit=50"
"Planning Time: 0.183 ms"
"Execution Time: 0.266 ms"
Query 2:
EXPLAIN(ANALYZE, VERBOSE, BUFFERS)
SELECT pfcode, gpdata FROM public.tbl_fdata WHERE fpin='PRD01';
--Output:
"Index Scan using idx_tbl_fdata_colx on public.tbl_fdata (cost=0.12..8.14 rows=1 width=46) (actual time=0.027..0.027 rows=1 loops=1)"
" Output: pfcode, gpdata"
" Index Cond: ((tbl_fdata.fpin)::sys.""varchar"" = 'PRD01'::sys.""varchar"")"
" Buffers: shared hit=2"
"Query Identifier: -4137289472330654813"
"Planning:"
" Buffers: shared hit=655"
"Planning Time: 1.415 ms"
"Execution Time: 0.047 ms"
Query 3:
EXPLAIN(ANALYZE, VERBOSE, BUFFERS)
SELECT StartTime, EndTime, num
FROM public.salesyears
WHERE s_pfcode ='all' AND year = date_part('year','2023-01-01 00:20:00');
--Output:
"Seq Scan on public.salesyears y (cost=0.00..1.47 rows=1 width=30) (actual time=0.064..0.064 rows=0 loops=1)"
" Output: StartTime, EndTime, num"
" Filter: (((s_pfcode)::sys.""varchar"" = 'all'::sys.""varchar"") AND ((year)::double precision = '2023'::double precision))"
" Rows Removed by Filter: 27"
" Buffers: shared hit=1"
"Query Identifier: 7071532767670856741"
"Planning:"
" Buffers: shared hit=56"
"Planning Time: 0.280 ms"
"Execution Time: 0.074 ms"
Query 4:
explain(analyze,verbose,buffers)
select * from public.tbl_prddata
where code= 'XMOP';
--Output:
"Seq Scan on public.tbl_prddata (cost=0.00..2372.74 rows=22847 width=263) (actual time=0.075..104.757 rows=23031 loops=1)"
" Output: cola,colb,colc,cold,cole,colf,colg,colh"
" Filter: ((tbl_prddata.code)::sys.""varchar"" = 'XMOP'::sys.""varchar"")"
" Rows Removed by Filter: 31588"
" Buffers: shared hit=1690"
"Query Identifier: 7714647889345744025"
"Planning Time: 0.093 ms"
"Execution Time: 106.360 ms"

Related

How to speed up the query in GCP PostgreSQL

My postgres server details are below using
postgres 13 version, RAM 52GB,SSD 1000GB And the DB size is 300GB
Here is my Query
select distinct "col2","col3","col1"
from table1(foreign table)
where "col2" not in (select "col4"
from table2(foreign table)
where "col9" = 'data1'
and "col10"='A')
and "col2" not in (select "col13"
from table5(foreign table)
where "col11" = 'A'
and "col12" in ('data1', 'data2', 'data3', 'data4'))
and "col6" > '2022-01-01' and "col10" = 'A' and "col18" = 'P'
and not "col7" = 'V' and "Type" = 'A'
order by "col1"
Here is my Explain Plan
"Unique (cost=372.13..372.14 rows=1 width=1074) (actual time=145329.010..145329.136 rows=336 loops=1)"
" Output: table1.""col2"", table1.""col3"", table1.""col1"""
" Buffers: shared hit=3"
" -> Sort (cost=372.13..372.14 rows=1 width=1074) (actual time=145329.008..145329.027 rows=336 loops=1)"
" Output: table1.""col2"", table1.""col3"", table1.""col1"""
" Sort Key: table1.""col1"", table1.""col2"", table1.""col3"""
" Sort Method: quicksort Memory: 63kB"
" Buffers: shared hit=3"
" -> Foreign Scan on public.table1 (cost=360.38..372.12 rows=1 width=1074) (actual time=144430.980..145327.532 rows=336 loops=1)"
" Output: table1.""col2"", table1.""col3"", table1.""col1"""
" Filter: ((NOT (hashed SubPlan 1)) AND (NOT (hashed SubPlan 2)))"
" Rows Removed by Filter: 253144"
" Remote SQL: SELECT ""col2"", ""col3"", ""col1"" FROM dbo.table4 WHERE ((""col6"" > '2022-01-01 00:00:00'::timestamp without time zone)) AND ((""col7"" <> 'V'::text)) AND ((""col8"" = 'A'::text))"
" SubPlan 1"
" -> Foreign Scan on public.table2 (cost=100.00..128.63 rows=1 width=42) (actual time=2.169..104702.862 rows=50573365 loops=1)"
" Output: table2.""col4"""
" Remote SQL: SELECT ""col5"" FROM dbo.table3 WHERE ((""col9"" = 'data1'::text)) AND ((""col10"" = 'A'::text))"
" SubPlan 2"
" -> Foreign Scan on public.table5 (cost=100.00..131.74 rows=1 width=42) (actual time=75.363..1015.498 rows=360240 loops=1)"
" Output: table5.""col13"""
" Remote SQL: SELECT ""col14"" FROM dbo.table6 WHERE ((""col11"" = 'A'::text)) AND ((""col12"" = ANY ('{data1,data2,data3,data4}'::text[])))"
"Planning:"
" Buffers: shared hit=142"
"Planning Time: 1.887 ms"
"Execution Time: 145620.958 ms"
table1 - 4mln row count
table2 - 250mln row count
table3 - 400mln row count
Table Definition table1
CREATE TABLE IF NOT EXISTS table1
(
"col1" character varying(12) ,
"col" character varying(1) ,
"col" character varying(1) ,
...
...
);
Indexes are exist on other column not having on query columns "col2","col3","col1"
Table Definition table2
CREATE TABLE IF NOT EXISTS table2
(
"col4" character varying(12) ,
"col9" character varying(1) ,
"col10" character varying(1) ,
...
...
);
Indexes are exist on table2
CREATE INDEX index1 ON table2("col4" ASC,"col9" ASC,"col" ASC,"col10" ASC);
CREATE INDEX index1 ON table2("col" ASC,"col9" ASC,"col4" ASC,"col10" ASC);
CREATE INDEX index1 ON table2("col9" ASC,"col4" ASC,"col" ASC,"col10" ASC);
CREATE INDEX index1 ON table2("col" ASC,"col9" ASC,"col10" ASC,"col" ASC);
Table Definition table5
CREATE TABLE IF NOT EXISTS table5
(
"col11" character varying(12) ,
"col13" character varying(1) ,
"col" character varying(1) ,
...
...
);
Indexes are exist on table5
CREATE INDEX index ON table5("col" ASC, "col" ASC,"col11" ASC);
CREATE INDEX index ON table5("col13" ASC,"col11" ASC);
CREATE INDEX index ON table5("col" ASC,"col13" ASC,"col11" ASC)INCLUDE ("col");
CREATE INDEX index ON table5("col" ASC, "col" ASC,"col11" ASC);
How to speed up the following query execution? it took 3 minutes just to retrieve 365 records.
Here is my EXPLAIN (ANALYZE, BUFFERS)
"Unique (cost=372.13..372.14 rows=1 width=1074) (actual time=110631.114..110631.262 rows=336 loops=1)"
" -> Sort (cost=372.13..372.14 rows=1 width=1074) (actual time=110631.111..110631.142 rows=336 loops=1)"
" Sort Key: table1.""col1"", table1.""col2"", table1.""col3"""
" Sort Method: quicksort Memory: 63kB"
" -> Foreign Scan on table1 (cost=360.38..372.12 rows=1 width=1074) (actual time=110432.132..110629.640 rows=336 loops=1)"
" Filter: ((NOT (hashed SubPlan 1)) AND (NOT (hashed SubPlan 2)))"
" Rows Removed by Filter: 253144"
" SubPlan 1"
" -> Foreign Scan on table2 (cost=100.00..128.63 rows=1 width=42) (actual time=63638.173..71979.772 rows=50573365 loops=1)"
" SubPlan 2"
" -> Foreign Scan on table5 (cost=100.00..131.74 rows=1 width=42) (actual time=569.126..630.782 rows=360240 loops=1)"
"Planning Time: 0.266 ms"
"Execution Time: 111748.715 ms"
Here is my EXPLAIN (ANALYZE, BUFFERS) of the "remote SQL" when executed on the remote database
"Limit (cost=4157478.69..4157602.66 rows=1000 width=47) (actual time=68356.908..68681.831 rows=336 loops=1)"
" Buffers: shared hit=66205118"
" -> Unique (cost=4157478.69..4164948.04 rows=60253 width=47) (actual time=68356.905..68681.801 rows=336 loops=1)"
" Buffers: shared hit=66205118"
" -> Gather Merge (cost=4157478.69..4164496.14 rows=60253 width=47) (actual time=68356.901..68681.718 rows=336 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" Buffers: shared hit=66205118"
" -> Sort (cost=4156478.66..4156541.43 rows=25105 width=47) (actual time=66154.447..66154.459 rows=112 loops=3)"
" Sort Key: table4.""col1"", table4.""col2"", table4.""col3"""
" Sort Method: quicksort Memory: 63kB"
" Buffers: shared hit=66205118"
" Worker 0: Sort Method: quicksort Memory: 25kB"
" Worker 1: Sort Method: quicksort Memory: 25kB"
" -> Parallel Seq Scan on table4 (cost=3986703.25..4154644.03 rows=25105 width=47) (actual time=66041.929..66153.663 rows=112 loops=3)"
" Filter: ((NOT (hashed SubPlan 1)) AND (NOT (hashed SubPlan 2)) AND (""col6"" > '2022-01-01 00:00:00'::timestamp without time zone) AND ((""col7"")::text <> 'V'::text) AND ((""col8"")::text = 'A'::text))"
" Rows Removed by Filter: 1236606"
" Buffers: shared hit=66205102"
" SubPlan 1"
" -> Index Only Scan using col20 on table3 (cost=0.70..2696555.01 rows=50283867 width=13) (actual time=0.134..25085.583 rows=50573365 loops=3)"
" Index Cond: ((""col9"" = 'data1'::text) AND (""col10"" = 'A'::text))"
" Heap Fetches: 0"
" Buffers: shared hit=65737946"
" SubPlan 2"
" -> Bitmap Heap Scan on table6 (cost=4962.91..1163549.12 rows=355779 width=13) (actual time=160.770..440.978 rows=360240 loops=3)"
" Recheck Cond: (((""col12"")::text = ANY ('{data1,data2,data3,data4}'::text[])) AND ((""col11"")::text = 'A'::text))"
" Heap Blocks: exact=110992"
" Buffers: shared hit=333992"
" -> Bitmap Index Scan on col21 (cost=0.00..4873.97 rows=355779 width=0) (actual time=120.354..120.354 rows=360240 loops=3)"
" Index Cond: (((""col12"")::text = ANY ('{data1,data2,data3,data4}'::text[])) AND ((""col11"")::text = 'A'::text))"
" Buffers: shared hit=1016"
"Planning:"
" Buffers: shared hit=451"
"Planning Time: 4.039 ms"
"Execution Time: 69001.171 ms"

Stuck with timeout issue. Here is the Query , I am getting timeout for:

I am getting this timeout error:
Message: SQLSTATE[57014]: Query canceled: 7 ERROR: canceling statement due to statement timeout
This is the query that is timing out:
SELECT
log.id,
integration.id AS intid,
log.integration_id AS integration_id,
integration.name,
log.createddate
FROM integration log
LEFT JOIN integration__sf integration on ( integration.id = log.integration_id)
LEFT JOIN property prop on ( log.property_id = prop.id )
LEFT JOIN account acc on ( acc.sfid = integration.account )
WHERE
log.id IS NOT NULL
AND log.script_type = 'Pull'
AND log.script_name = 'ModifyTags'
AND log.createddate >= '2018-11-01 00:00:00'
AND log.createddate <= '2018-11-30 23:59:59'
ORDER BY log.id desc LIMIT 100 OFFSET 0;
Is there any scope to optimize this query any more?
Here is the EXPLAIN (ANALYZE, BUFFERS) output:
"Limit (cost=30809.27..30820.93 rows=100 width=262) (actual time=11.793..11.803 rows=21 loops=1)"
" Buffers: shared hit=5 read=935"
" -> Gather Merge (cost=30809.27..31199.66 rows=3346 width=262) (actual time=11.791..11.799 rows=21 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" Buffers: shared hit=5 read=935"
" -> Sort (cost=29809.24..29813.43 rows=1673 width=262) (actual time=6.844..6.844 rows=7 loops=3)"
" Sort Key: log.id DESC"
" Sort Method: quicksort Memory: 27kB"
" Buffers: shared hit=1967 read=937"
" -> Hash Left Join (cost=3003.36..29719.67 rows=1673 width=262) (actual time=6.774..6.819 rows=7 loops=3)"
" Hash Cond: ((integration.account__c)::text = (acc.sfid)::text)"
" Buffers: shared hit=1953 read=937"
" -> Nested Loop Left Join (cost=2472.13..29167.33 rows=1673 width=254) (actual time=3.643..3.686 rows=7 loops=3)"
" Buffers: shared hit=969 read=468"
" -> Hash Left Join (cost=2471.71..17895.82 rows=1673 width=228) (actual time=3.635..3.673 rows=7 loops=3)"
" Hash Cond: (log.integration_id = integration.id)"
" Buffers: shared hit=969 read=468"
" -> Parallel Bitmap Heap Scan on integration_log log (cost=1936.93..17339.92 rows=1673 width=148) (actual time=0.097..0.132 rows=7 loops=3)"
" Recheck Cond: (((script_name)::text = 'ModifyTags'::text) AND ((script_type)::text = 'Pull'::text) AND (createddate >= '2018-11-01 00:00:00+05:30'::timestamp with time zone) AND (createddate <= '2018-12-07 23:59:59+05: (...)"
" Filter: (id IS NOT NULL)"
" Heap Blocks: exact=19"
" Buffers: shared read=26"
" -> Bitmap Index Scan on ah_idx_integeration_log_script_name (cost=0.00..1935.93 rows=4016 width=0) (actual time=0.201..0.201 rows=21 loops=1)"
" Index Cond: (((script_name)::text = 'ModifyTags'::text) AND ((script_type)::text = 'Pull'::text) AND (createddate >= '2018-11-01 00:00:00+05:30'::timestamp with time zone) AND (createddate <= '2018-12-07 23:59:59 (...)"
" Buffers: shared read=5"
" -> Hash (cost=483.79..483.79 rows=4079 width=80) (actual time=3.463..3.463 rows=4079 loops=3)"
" Buckets: 4096 Batches: 1 Memory Usage: 481kB"
" Buffers: shared hit=887 read=442"
" -> Seq Scan on integration__c integration (cost=0.00..483.79 rows=4079 width=80) (actual time=0.012..2.495 rows=4079 loops=3)"
" Buffers: shared hit=887 read=442"
" -> Index Scan using property__c_pkey on property__c prop (cost=0.42..6.74 rows=1 width=30) (actual time=0.001..0.001 rows=0 loops=21)"
" Index Cond: (log.property_id = id)"
" -> Hash (cost=498.88..498.88 rows=2588 width=42) (actual time=3.098..3.098 rows=2577 loops=3)"
" Buckets: 4096 Batches: 1 Memory Usage: 220kB"
" Buffers: shared hit=950 read=469"
" -> Seq Scan on account acc (cost=0.00..498.88 rows=2588 width=42) (actual time=0.011..2.531 rows=2577 loops=3)"
" Buffers: shared hit=950 read=469"
"Planning time: 2.513 ms"
"Execution time: 13.904 ms"
Actually I have got the optimization solution, here the query would be like.
SELECT
log.id,
integration.id AS intid,
log.integration_id AS integration_id,
integration.name,
log.createddate
FROM integration log
LEFT JOIN integration__sf integration on ( integration.id = log.integration_id)
LEFT JOIN property prop on ( log.property_id = prop.id )
LEFT JOIN account acc on ( acc.sfid = integration.account AND prop.account = acc.sfid AND prop.group_membership = integration.grouping)
WHERE log.id IS NOT NULL
AND log.script_type = 'Pull'
AND log.script_name = 'ModifyTags'
AND log.createddate >= '2018-11-01 00:00:00'
AND log.createddate <= '2018-11-30 23:59:59'
ORDER BY log.id desc LIMIT 100 OFFSET 0
If you would suggest more, I will be grateful.

Boolean column in multicolumn index

Test table and indexes:
CREATE TABLE public.t (id serial, cb boolean, ci integer, co integer)
INSERT INTO t(cb, ci, co)
SELECT ((round(random()*1))::int)::boolean, round(random()*100), round(random()*100)
FROM generate_series(1, 1000000)
CREATE INDEX "right" ON public.t USING btree (ci, cb, co);
CREATE INDEX wrong ON public.t USING btree (ci, co);
CREATE INDEX right_hack ON public.t USING btree (ci, (cb::integer), co);
The problem is that I can't force PostgreSQL to use the "right" index. The next query uses the "wrong" index. It's not optimal because it uses "Filter" (condition: cb = TRUE) and so reads more data from memory (and execution becomes longer):
explain (analyze, buffers)
SELECT * FROM t WHERE cb = TRUE AND ci = 46 ORDER BY co LIMIT 1000
"Limit (cost=0.42..4063.87 rows=1000 width=13) (actual time=0.057..4.405 rows=1000 loops=1)"
" Buffers: shared hit=1960"
" -> Index Scan using wrong on t (cost=0.42..21784.57 rows=5361 width=13) (actual time=0.055..4.256 rows=1000 loops=1)"
" Index Cond: (ci = 46)"
" Filter: cb"
" Rows Removed by Filter: 967"
" Buffers: shared hit=1960"
"Planning time: 0.318 ms"
"Execution time: 4.530 ms"
But when I cast bool column to int, that works fine. This is unclear, because selectivity of both indexes (right and right_hack) remains the same.
explain (analyze, buffers)
SELECT * FROM t WHERE cb::int = 1 AND ci = 46 ORDER BY co LIMIT 1000
"Limit (cost=0.42..2709.91 rows=1000 width=13) (actual time=0.027..1.484 rows=1000 loops=1)"
" Buffers: shared hit=1003"
" -> Index Scan using right_hack on t (cost=0.42..14525.95 rows=5361 width=13) (actual time=0.025..1.391 rows=1000 loops=1)"
" Index Cond: ((ci = 46) AND ((cb)::integer = 1))"
" Buffers: shared hit=1003"
"Planning time: 0.202 ms"
"Execution time: 1.565 ms"
Are there any limitations of using boolean column inside multicolumn index?
A conditional index (or two) does seem to work:
CREATE INDEX true_bits ON ttt (ci, co)
WHERE cb = True ;
CREATE INDEX false_bits ON ttt (ci, co)
WHERE cb = False ;
VACUUM ANALYZE ttt;
EXPLAIN (ANALYZE, buffers)
SELECT * FROM ttt
WHERE cb = TRUE AND ci = 46 ORDER BY co LIMIT 1000
;
Plan
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.25..779.19 rows=1000 width=13) (actual time=0.024..1.804 rows=1000 loops=1)
Buffers: shared hit=1001
-> Index Scan using true_bits on ttt (cost=0.25..3653.46 rows=4690 width=13) (actual time=0.020..1.570 rows=1000 loops=1)
Index Cond: (ci = 46)
Buffers: shared hit=1001
Planning time: 0.468 ms
Execution time: 1.949 ms
(7 rows)
Still, there is very little gain in indexes on low-cardinality columns. The chance that an index-entry can avoid a page-read is very small. For a page size of 8K and a rowsize of ~20, there are ~400 records on a page. There will (almost) always be a true record on any page (and a false record), so the page will have to be read anyway.

postgres index not used

Postgres 9.5
I have a table having one of columns jsonb;
CREATE TABLE public.test
(
objectstate jsonb
)
and index on it:
CREATE INDEX "test.type"
ON public.test
USING btree
((objectstate ->> 'type'::text) COLLATE pg_catalog."default");
I also have function returning depended types.... its more complex, so i'll give an example....
CREATE OR REPLACE FUNCTION testfunc(sxtype text)
RETURNS text AS
$BODY$
BEGIN
return '{type1, type2}';
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
now what I've got:
select testfunc('type1') gives me '{type1, type2}'
Next syntax works well and DOES uses index:
select * from test where objectstate->>'type' = ANY('{type1, type2}'::text[])
But once I'm trying to combine them, index not used
select * from test
where objectstate->>'type' = ANY((select testfunc('type1'))::text[])
Wierd thing is next query DOES USE sytax again! (but I cant use this workaround everywhere)
select * from test
where objectstate->>'type' = ANY((select testfunc('type1'))::text[])
order by objectstate->>'type'
explain analyze gives me:
"Seq Scan on test (cost=0.26..530872.27 rows=2238634 width=743) (actual time=1107.155..7992.825 rows=129 loops=1)"
" Filter: ((test ->> 'type'::text) = ANY (($0)::text[]))"
" Rows Removed by Filter: 4063727"
" InitPlan 1 (returns $0)"
" -> Result (cost=0.00..0.26 rows=1 width=0) (actual time=0.718..0.718 rows=1 loops=1)"
"Planning time: 0.319 ms"
"Execution time: 7992.870 ms"
and when order applyed:
"Index Scan using "test.type" on test (cost=0.70..545058.44 rows=2238634 width=743) (actual time=0.645..0.740 rows=129 loops=1)"
" Index Cond: ((objectstate ->> 'type'::text) = ANY (($0)::text[]))"
" InitPlan 1 (returns $0)"
" -> Result (cost=0.00..0.26 rows=1 width=0) (actual time=0.617..0.617 rows=1 loops=1)"
"Planning time: 0.300 ms"
"Execution time: 0.782 ms"
Any Ideas how can I force postgres to use Index without applying order by?
May be it is not an answer, but seems you may change the function definition from VOLATILE to IMMUTABLE with
CREATE OR REPLACE FUNCTION testfunc(sxtype text)
RETURNS text AS
$BODY$
BEGIN
return '{type1, type2}';
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 100;
With VOLATILE function Postgres does not apply optimizations due to VOLATILE functions may change data and result of the function is not predictable. More at documentation https://www.postgresql.org/docs/9.5/static/sql-createfunction.html

Query too slow in Postgresql in table with > 12M rows

I have a simple table with more than 12 Million rows growing every time, in my web app.
+-----+-----+------+-------+--------+
| id | dtt | cus | event | server |
-------------------------------------
I'm getting the count of today events by customer using this query
SELECT COUNT(*) FROM events
WHERE dtt AT TIME ZONE 'America/Santiago' >=date(now() AT TIME ZONE 'America/Santiago') + interval '1s'
AND cus=2
And the performance is very bad for my web app : 22702 ms.
"Aggregate (cost=685814.54..685814.55 rows=1 width=0) (actual time=21773.451..21773.452 rows=1 loops=1)"
" -> Seq Scan on events (cost=0.00..675644.52 rows=4068008 width=0) (actual time=10277.508..21732.548 rows=409808 loops=1)"
" Filter: ((cus = 2) AND (timezone('America/Santiago'::text, dtt) >= (date(timezone('America/Santiago'::text, now())) + '00:00:01'::interval)))"
" Rows Removed by Filter: 12077798"
"Planning time: 0.127 ms"
"Execution time: 21773.509 ms"
I have the next Indexes created:
CREATE INDEX events_dtt_idx
ON events
USING btree
(dtt);
CREATE INDEX events_id_desc
ON events
USING btree
(id DESC NULLS LAST);
CREATE INDEX events_cus_idx
ON events
USING btree
(cus);
CREATE INDEX events_id_idx
ON events
USING btree
(id);
Using Postgresql 9.4, Linux x64
How can I improve that? Thanks in advance.
something like:
CREATE INDEX dtt_tz_idx ON events (DATE(dtt AT TIME ZONE 'America/Santiago'));
then query
SELECT COUNT(*) FROM events
WHERE DATE(TIMEZONE('America/Santiago'::text, dtt)) >=date(now() AT TIME ZONE 'America/Santiago') + interval '1s'
AND cus=2
If it doesn't work, try "\d dtt_tz_idx" in psql and try to match the datatypes on your query with the index.
Finally I could fix the problem with that index:
CREATE INDEX dtt_tz_idx ON events (TIMEZONE('America/Santiago'::text, dtt));
Thanks sivan & vyegorov for your guide, now the plan is:
"Aggregate (cost=567240.43..567240.44 rows=1 width=0) (actual time=238.440..238.440 rows=1 loops=1)"
" -> Bitmap Heap Scan on events (cost=82620.28..556463.97 rows=4310584 width=0) (actual time=41.445..208.870 rows=344453 loops=1)"
" Recheck Cond: (timezone('America/Santiago'::text, dtt) >= (date(timezone('America/Santiago'::text, now())) + '00:00:01'::interval))"
" Filter: (cus = 2)"
" Rows Removed by Filter: 9433"
" Heap Blocks: exact=9426"
" -> Bitmap Index Scan on dtt_tz_idx (cost=0.00..81542.63 rows=4415225 width=0) (actual time=38.866..38.866 rows=353886 loops=1)"
" Index Cond: (timezone('America/Santiago'::text, dtt) >= (date(timezone('America/Santiago'::text, now())) + '00:00:01'::interval))"
"Planning time: 0.221 ms"
"Execution time: 238.509 ms"