Why postgres function so slow but single query is fast? - postgresql

I have function to get employee in 'Create' status.
CREATE OR REPLACE FUNCTION get_probation_contract(AccountOrEmpcode TEXT, FromDate DATE,
ToDate DATE)
RETURNS TABLE("EmpId" INTEGER, "EmpCode" CHARACTER VARYING,
"DomainAccount" CHARACTER VARYING, "JoinDate" DATE,
"ContractTypeCode" CHARACTER VARYING, "ContractTypeName" CHARACTER VARYING,
"ContractFrom" DATE, "ContractTo" DATE, "ContractType" CHARACTER VARYING,
"Signal" CHARACTER VARYING) AS $$
BEGIN
RETURN QUERY
EXECUTE 'SELECT
he.id "EmpId",
rr.code "EmpCode",
he.login "DomainAccount",
he.join_date "JoinDate",
contract_type.code "ContractTypeCode",
contract_type.name "ContractTypeName",
contract.date_start "ContractFrom",
contract.date_end "ContractTo",
CASE WHEN contract_group.code = ''1'' THEN ''Probation''
WHEN contract_group.code IN (''3'', ''4'', ''5'') THEN ''Official''
WHEN contract_group.code = ''2'' THEN ''Collaborator'' END :: CHARACTER VARYING "ContractType",
''CREATE'' :: CHARACTER VARYING "Signal"
FROM
hr_employee he
INNER JOIN resource_resource rr
ON rr.id = he.resource_id
INNER JOIN hr_contract contract
ON contract.employee_id = he.id AND contract.date_start = (
SELECT max(date_start) "date_start"
FROM hr_contract cc
WHERE cc.employee_id = contract.employee_id
)
INNER JOIN hr_contract_type contract_type
ON contract_type.id = contract.type_id
INNER JOIN hr_contract_type_group contract_group
ON contract_group.id = contract_type.contract_type_group_id
WHERE
contract_group.code = ''1''
AND
($1 IS NULL OR $1 = '''' OR rr.code = $1 OR
he.login = $1)
AND (
(he.join_date BETWEEN $2 AND $3)
OR (he.join_date IS NOT NULL AND (contract.date_start BETWEEN $2 AND $3))
OR (he.create_date BETWEEN $2 AND $3 AND he.create_date > he.join_date)
)
AND rr.active = TRUE
'using AccountOrEmpcode, FromDate, ToDate ;
END;
$$ LANGUAGE plpgsql;
It took 37 second to execute
SELECT *
FROM get_probation_contract('', '2014-01-01', '2014-06-01');
When I use single query
SELECT
he.id "EmpId",
rr.code "EmpCode",
he.login "DomainAccount",
he.join_date "JoinDate",
contract_type.code "ContractTypeCode",
contract_type.name "ContractTypeName",
contract.date_start "ContractFrom",
contract.date_end "ContractTo",
CASE WHEN contract_group.code = '1' THEN 'Probation'
WHEN contract_group.code IN ('3', '4', '5') THEN 'Official'
WHEN contract_group.code = '2' THEN 'Collaborator' END :: CHARACTER VARYING "ContractType",
'CREATE' :: CHARACTER VARYING "Signal"
FROM
hr_employee he
INNER JOIN resource_resource rr
ON rr.id = he.resource_id
INNER JOIN hr_contract contract
ON contract.employee_id = he.id AND contract.date_start = (
SELECT max(date_start) "date_start"
FROM hr_contract
WHERE employee_id = he.id
)
INNER JOIN hr_contract_type contract_type
ON contract_type.id = contract.type_id
INNER JOIN hr_contract_type_group contract_group
ON contract_group.id = contract_type.contract_type_group_id
WHERE
contract_group.code = '1'
AND (
(he.join_date BETWEEN '2014-01-01' AND '2014-06-01')
OR (he.join_date IS NOT NULL AND (contract.date_start BETWEEN '2014-01-01' AND '2014-01-06'))
OR (he.create_date BETWEEN '2014-01-01' AND '2014-01-06' AND he.create_date > he.join_date)
)
AND rr.active = TRUE
It take 5 second to complete
How to optimize the function above.
and why function is slow than single query so much even I use execute 'select ...' in function.
Indexing in field id each table.

Possible reason is a blind optimization for prepared statements (embedded SQL). It is little bit better in new PostgreSQL releases, although it can be the issue there too. Execution plan in embedded SQL in PL/pgSQL is reused for more calls - and it is optimized for more often value (not for really used value). Sometimes this difference can make really big slowdowns.
Then you can use dynamic SQL - EXECUTE statement. Dynamic SQL uses only once executed plans and it uses real parameters. It should to fix this issue.
Example of embedded SQL with reused prepared plans.
CREATE OR REPLACE FUNCTION fx1(_surname text)
RETURNS int AS $$
BEGIN
RETURN (SELECT count(*) FROM people WHERE surname = _surname)
END;
Example with dynamic SQL:
CREATE OR REPLACE FUNCTION fx2(_surname text)
RETURNS int AS $$
DECLARE result int;
BEGIN
EXECUTE 'SELECT count(*) FROM people WHERE surname = $1' INTO result
USING _surname;
RETURN result;
END;
$$ LANGUAGE plpgsql;
Second function can be faster if your dataset contains some terrible often surname - then common plan will be seq scan, but lot of time you will ask some other surname, and you will want to use index scan. Dynamical query parametrization (like ($1 IS NULL OR $1 = '''' OR rr.code = $1 OR) has same effect.

Your queries are not the same.
The first one has
WHERE cc.employee_id = contract.employee_id
where the second one has:
WHERE employee_id = he.id
And also:
($1 IS NULL OR $1 = '''' OR rr.code = $1 OR
he.login = $1)
Please test again with identical queries and identical values.

Related

How to Write IF ELSE Statements inside RETURN QUERY EXECUTE in PostgreSql

I'm getting syntax error while writing IF ELSE statements inside RETURN QUERY EXECUTE in PostgreSQL 11.
CREATE OR REPLACE FUNCTION tds_master.a_report(
i_entity_id integer,
i_client_id integer,
i_branch_id integer,
i_name text,
i_finyear integer)
RETURNS TABLE(employee_name character varying(100), pan character varying(10), optfor115bac character varying(1),
taxable_income numeric, income_tax numeric, credit_us_87a numeric, surcharge numeric, education_cess numeric)
LANGUAGE 'plpgsql'AS
$BODY$
BEGIN
RETURN QUERY EXECUTE
'if i_name != '' then
select
ed.name as employee_name,
ed.pan,
sd.opting_for_us115bac,
sd.total_taxable_income,
sd.income_tax_on_total_income,
sd.rebate_us87a,
sd.surcharge,
sd.education_cess
from tds' || i_finyear || '.saldet sd
inner join tds' || i_finyear || '.employee_deductee ed on ed.ed_id = sd.employee_id
where sd.entity_id = $1 and sd.client_id = $2 and sd.branch_id = $3 and upper(ed.name)=upper(i_name);
else
select
ed.name as employee_name,
ed.pan,
sd.opting_for_us115bac,
sd.total_taxable_income,
sd.income_tax_on_total_income,
sd.rebate_us87a,
sd.surcharge,
sd.education_cess
from tds' || i_finyear || '.saldet sd
inner join tds' || i_finyear || '.employee_deductee ed on ed.ed_id = sd.employee_id
where sd.entity_id = $1 and sd.client_id = $2 and sd.branch_id = $3'
USING i_entity_id, i_client_id, i_branch_id;
END;
$BODY$;
CALL Function:
select *
from tds_master.a_report(1547,6393,0,'MADAKE VINOD BABURAO',2021);
OUTPUT:
Read up about PL/pgSQL syntax as hinted in the comments.
However, you can just merge your query variants and use a plain OR for the possibly empty / null input parameter i_name:
CREATE OR REPLACE FUNCTION tds_master.a_report(
i_entity_id integer,
i_client_id integer,
i_branch_id integer,
i_name text,
i_finyear integer)
RETURNS TABLE(employee_name varchar(100), pan varchar(10), optfor115bac varchar(1)
, taxable_income numeric, income_tax numeric, credit_us_87a numeric
, surcharge numeric, education_cess numeric)
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN QUERY EXECUTE format(
$q$
SELECT ed.name -- AS employee_name
, ed.pan
, sd.opting_for_us115bac
, sd.total_taxable_income
, sd.income_tax_on_total_income
, sd.rebate_us87a
, sd.surcharge
, sd.education_cess
FROM %1$I.saldet sd
JOIN %1$I.employee_deductee ed ON ed.ed_id = sd.employee_id
WHERE sd.entity_id = $1
AND sd.client_id = $2
AND sd.branch_id = $3
AND (i_name = '' OR i_name IS NULL OR upper(ed.name) = upper(i_name)) -- !
$q$, 'tds' || i_finyear::text)
USING i_entity_id, i_client_id, i_branch_id;
END
$func$;
Much simpler. Same performance.
I use format(), which is the clean approach. Granted, while you only concatenate an integer value, plain concatenation is reasonably safe.

unaccent() does not work with Greek letters in plpgsql dynamic query

I use PostgreSQL 10 and I run CREATE EXTENSION unaccent; succesfully. I have a plgsql function that contains the following
whereText := 'lower(unaccent(place.name)) LIKE lower(unaccent($1))';
later, according to what user chose, more clauses may be added to the whereText.
The whereText is finally used in the query:
placewithkeys := '%'||placename||'%';
RETURN QUERY EXECUTE format('SELECT id, name FROM '||fromText||' WHERE '||whereText)
USING placewithkeys , event, date;
The whereText := 'LOWER(unaccent(place.name)) LIKE LOWER(unaccent($1))'; does not work, even if I remove the LOWER part.
I do select __my_function('Τζι'); and I get nothing back, even though I should get back results, because in the database there is the name Τζίμα
If I remove the unaccent and leave the LOWER it works, but not for accents : τζ brings Τζίμα back as it should. It seems like the unaccent is causing a problem.
What am I missing? How can I fix this?
Since there were comments about the syntax and possible SQLi , I provide the whole function definition, now changed to work accent-insensitive and case-insensitive in Greek:
CREATE FUNCTION __a_search_place
(placename text, eventtype integer, eventdate integer, eventcentury integer, constructiondate integer, constructioncentury integer, arstyle integer, artype integer)
RETURNS TABLE
(place_id bigint, place_name text, place_geom geometry)
AS $$
DECLARE
selectText text;
fromText text;
whereText text;
usingText text;
placewithkeys text;
BEGIN
fromText := '
place
JOIN cep ON place.id = cep.place_id
JOIN event ON cep.event_id = event.id
';
whereText := 'unaccent(place.name) iLIKE unaccent($1)';
placewithkeys := '%'||placename||'%';
IF constructiondate IS NOT NULL OR constructioncentury IS NOT NULL OR arstyle IS NOT NULL OR artype IS NOT NULL THEN
fromText := fromText || '
JOIN construction ON cep.construction_id = construction.id
JOIN construction_atype ON construction.id = construction_atype.construction_id
JOIN construction_astyle ON construction.id = construction_astyle.construction_id
JOIN atype ON atype.id = construction_atype.atype_id
JOIN astyle ON astyle.id = construction_astyle.astyle_id
';
END IF;
IF eventtype IS NOT NULL THEN
whereText := whereText || 'AND event.type = $2 ';
END IF;
IF eventdate IS NOT NULL THEN
whereText := whereText || 'AND event.date = $3 ';
END IF;
IF eventcentury IS NOT NULL THEN
whereText := whereText || 'AND event.century = $4 ';
END IF;
IF constructiondate IS NOT NULL THEN
whereText := whereText || 'AND construction.date = $5 ';
END IF;
IF constructioncentury IS NOT NULL THEN
whereText := whereText || 'AND construction.century = $6 ';
END IF;
IF arstyle IS NOT NULL THEN
whereText := whereText || 'AND astyle.id = $7 ';
END IF;
IF artype IS NOT NULL THEN
whereText := whereText || 'AND atype.id = $8 ';
END IF;
whereText := whereText || '
GROUP BY place.id, place.geom, place.name
';
RETURN QUERY EXECUTE format('SELECT place.id, place.name, place.geom FROM '||fromText||' WHERE '||whereText)
USING placewithkeys, eventtype, eventdate, eventcentury, constructiondate, constructioncentury, arstyle, artype ;
END;
$$
LANGUAGE plpgsql;
Postgres 12
unaccent() now works for Greek letters, too. Diacritic signs are removed:
db<>fiddle here
Quoting the release notes:
Allow unaccent to remove accents from Greek characters (Tasos Maschalidis)
Postgres 11 or older
unaccent() does not yet work for Greek letters. The call:
SELECT unaccent('
ἀ ἁ ἂ ἃ ἄ ἅ ἆ ἇ Ἀ Ἁ Ἂ Ἃ Ἄ Ἅ Ἆ Ἇ
ἐ ἑ ἒ ἓ ἔ ἕ Ἐ Ἑ Ἒ Ἓ Ἔ Ἕ
ἠ ἡ ἢ ἣ ἤ ἥ ἦ ἧ Ἠ Ἡ Ἢ Ἣ Ἤ Ἥ Ἦ Ἧ
ἰ ἱ ἲ ἳ ἴ ἵ ἶ ἷ Ἰ Ἱ Ἲ Ἳ Ἴ Ἵ Ἶ Ἷ
ὀ ὁ ὂ ὃ ὄ ὅ Ὀ Ὁ Ὂ Ὃ Ὄ Ὅ
ὐ ὑ ὒ ὓ ὔ ὕ ὖ ὗ Ὑ Ὓ Ὕ Ὗ
ὠ ὡ ὢ ὣ ὤ ὥ ὦ ὧ Ὠ Ὡ Ὢ Ὣ Ὤ Ὥ Ὦ Ὧ
ὰ ά ὲ έ ὴ ή ὶ ί ὸ ό ὺ ύ ὼ ώ
ᾀ ᾁ ᾂ ᾃ ᾄ ᾅ ᾆ ᾇ ᾈ ᾉ ᾊ ᾋ ᾌ ᾍ ᾎ ᾏ
ᾐ ᾑ ᾒ ᾓ ᾔ ᾕ ᾖ ᾗ ᾘ ᾙ ᾚ ᾛ ᾜ ᾝ ᾞ ᾟ
ᾠ ᾡ ᾢ ᾣ ᾤ ᾥ ᾦ ᾧ ᾨ ᾩ ᾪ ᾫ ᾬ ᾭ ᾮ ᾯ
ᾰ ᾱ ᾲ ᾳ ᾴ ᾶ ᾷ Ᾰ Ᾱ Ὰ Ά ᾼ ᾽ ι ᾿
῀ ῁ ῂ ῃ ῄ ῆ ῇ Ὲ Έ Ὴ Ή ῌ ῍ ῎ ῏
ῐ ῑ ῒ ΐ ῖ ῗ Ῐ Ῑ Ὶ Ί ῝ ῞ ῟
ῠ ῡ ῢ ΰ ῤ ῥ ῦ ῧ Ῠ Ῡ Ὺ Ύ Ῥ ῭ ΅ `
ῲ ῳ ῴ ῶ ῷ Ὸ Ό Ὼ Ώ ῼ ´ ῾ ');
... returns all letters unchanged, no diacritic signs removed as we would expect.
(I extracted this list from the Wikipedia page on Greek diacritics.)
db<>fiddle here
Looks like a shortcoming of the unaccent module. You can extend the default unaccent dictionary or create your own. There are instructions in the manual. I created several dictionaries in the past and it's simple. And you are not to first to need this:
Postgres unaccent rules for greek characters:
https://gist.github.com/jfragoulis/9914900
Unaccent rules plus greek characters for Postgres 9.6:
https://gist.github.com/marinoszak/7d5d6a8670faae0f4589c2da988f2ba3
You need write access to the file system of the server, though - the directory containing the unaccent files. So, not possible on most cloud services ...
Or you might report a bug and ask to include Greek diacritic signs.
Aside: Dyamic SQL and SQLi
The code fragments you presented are not vulnerable to SQL injection. $1 is concatenated as literal string and only resolved in the EXECUTE command later, where the value is safely passed with the USING clause. So, no unsafe concatenation there. I would do it like this, though:
RETURN QUERY EXECUTE format(
$q$
SELECT id, name
FROM place ...
WHERE lower(unaccent(place.name)) LIKE '%' || lower(unaccent($1)) || '%'
$q$
)
USING placename, event, date;
Notes:
Less confusing - your original even confused Pavel in the comments, a professional in the field.
Assignments in plpgsql are slightly expensive (more so than in other PL), so adopt a coding style with few assignments.
Concatenate the two % symbols for LIKE into the main query directly, giving the query planner the information that the pattern is not anchored to start or end, which may help a more efficient plan. Only the user input is (safely) passed as variable.
Since your WHERE clause references table place, The FROM clause needs to include this table anyway. So you cannot concatenate the FROM clause independently to begin with. Probably better to keep it all in a single format().
Use dollar-quoting so you don't have to escape single quotes additionally.
Insert text with single quotes in PostgreSQL
What are '$$' used for in PL/pgSQL
Maybe just use ILIKE instead of lower(...) LIKE lower(...). If you work with trigram indexes (like would seem best for this query): those work with ILIKE as well:
LOWER LIKE vs iLIKE
I assume you are aware that you may need to escape characters with special meanings in LIKE pattern?
How to escape string while matching pattern in PostgreSQL
Escape function for regular expression or LIKE patterns
Audited function
After you provided your complete function ...
CREATE OR REPLACE FUNCTION __a_search_place(
placename text
, eventtype int = NULL
, eventdate int = NULL
, eventcentury int = NULL
, constructiondate int = NULL
, constructioncentury int = NULL
, arstyle int = NULL
, artype int = NULL)
RETURNS TABLE(place_id bigint, place_name text, place_geom geometry) AS
$func$
BEGIN
-- RAISE NOTICE '%', concat_ws(E'\n' -- to debug
RETURN QUERY EXECUTE concat_ws(E'\n'
,'SELECT p.id, p.name, p.geom
FROM place p
WHERE unaccent(p.name) ILIKE (''%'' || unaccent($1) || ''%'')' -- no $-quotes
-- any input besides placename ($1)
, CASE WHEN NOT ($2,$3,$4,$5,$6,$7,$8) IS NULL THEN
'AND EXISTS (
SELECT
FROM cep
JOIN event e ON e.id = cep.event_id' END
-- constructiondate, constructioncentury, arstyle, artype
, CASE WHEN NOT ($5,$6,$7,$8) IS NULL THEN
'JOIN construction con ON cep.construction_id = con.id
JOIN construction_atype ON con.id = construction_atype.construction_id
JOIN construction_astyle ON con.id = construction_astyle.construction_id' END
-- arstyle, artype
, CASE WHEN NOT ($7,$8) IS NULL THEN
'JOIN atype ON atype.id = construction_atype.atype_id
JOIN astyle ON astyle.id = construction_astyle.astyle_id' END
, CASE WHEN NOT ($2,$3,$4,$5,$6,$7,$8) IS NULL THEN
'WHERE cep.place_id = p.id' END
, CASE WHEN eventtype IS NOT NULL THEN 'AND e.type = $2' END
, CASE WHEN eventdate IS NOT NULL THEN 'AND e.date = $3' END
, CASE WHEN eventcentury IS NOT NULL THEN 'AND e.century = $4' END
, CASE WHEN constructiondate IS NOT NULL THEN 'AND con.date = $5' END
, CASE WHEN constructioncentury IS NOT NULL THEN 'AND con.century = $6' END
, CASE WHEN arstyle IS NOT NULL THEN 'AND astyle.id = $7' END
, CASE WHEN artype IS NOT NULL THEN 'AND atype.id = $8' END
, CASE WHEN NOT ($2,$3,$4,$5,$6,$7,$8) IS NULL THEN
')' END
);
USING placename
, eventtype
, eventdate
, eventcentury
, constructiondate
, constructioncentury
, arstyle
, artype;
END
$func$ LANGUAGE plpgsql;
This is a complete rewrite with several improvements. Should make the function considerably. Also SQLi-safe (like your original). Should be functionally identical except the cases where I join fewer tables, which might not filter rows that are filtered by joining to the tables alone.
Major features:
Use EXISTS() instead of lots of joins in the outer level plus GROUP BY. This contributes the lion share to the better performance. Related:
Search a JSON array for an object containing a value matching a pattern
format() is typically a good choice to concatenate SQL from user input. But since you encapsulated all code elements and only pass flags, you don't need it in this case. Instead, concat_ws() is of help. Related:
How to concatenate columns in a Postgres SELECT?
Only concatenate JOINs you actually need.
Fewer assignments, shorter code.
Default values for parameters. Allows simplified call with missing parameters. Like:
SELECT __a_search_place('foo', 2, 3, 4);
SELECT __a_search_place('foo');
Related:
Optional argument in PL/pgSQL function
About the short ROW() syntax for testing whether any value is NOT NULL:
Why is IS NOT NULL false when checking a row type?

Resolve PgAttribute and Where Conditional Clause

What I'm trying to do:
Modular function querying for time-range of any table I specify (pg_attribute as string input)
Chain multiple conditions as function inputs in where clause, after ::regclass is called
Reference the attrelid = _tablename::regclass in the chained where clauses
CREATE OR REPLACE FUNCTION the_func(
rngstart timestamptz,
rngdend timestamptz,
_tablename VARCHAR(16),
_id INT
) AS $$
SELECT *
FROM pg_attribute
WHERE attrelid = _tablename::regclass
AND id = _id
AND time > rngstart
AND time <= rngend
$$ LANGUAGE sql STABLE;
You have to write a PL/pgSQL function and use dynamic SQL, something along the lines of
BEGIN
RETURN QUERY EXECUTE format(
'SELECT ... FROM %I '
'WHERE id = $1 '
'AND time > $2 AND time <= $3',
_tablename::text)
USING _id, rngstart, rngend;
END;

How to use variable inside quoted string in postgres dynamic SQL

I created the following function with plpgsql which takes 3 parameters.
CREATE OR REPLACE FUNCTION public.most_service_calls(
comp_id integer,
calls integer,
months integer)
RETURNS TABLE(state character varying, city character varying, cust_name character varying, num_calls bigint, cost numeric)
LANGUAGE 'plpgsql'
COST 100
VOLATILE
ROWS 1000
AS $BODY$
Begin
return query execute
'select * from
(select l.state, l.city, l.cust_name, count(distinct a.svc_ord_nbr) num_calls,
round(avg(a.std_labr_net_amt_dcrncy) + avg(a.travel_net_amt_dcrncy), 2)
from dmt_mas_svc_ord_fact a
inner join dmt_mas_cust_dim b on a.shipto_cust_id = b.cust_id
inner join store_location l on b.cust_name = l.cust_name
inner join company co on b.cust_lvl_2_nbr = co.company_nbr
where b.sap_sls_org_name like ''%Stanley US%''
and a.create_dtm >= now() - interval '' $3 months''
and co.company_id = $1
group by l.state, l.city, l.cust_name
order by l.state, l.city, l.cust_name ) q
where num_calls >= $2'
using comp_id, calls, months;
end;
$BODY$;
Since the query is quoted, all the single quoted strings are double quoted. Three variables are represented by $1, $2, $3. It is the variable inside a string that is causing the trouble. a.create_dtm >= now() - interval '' $3 months''
When I run the function, it seems to ignore whatever third parameter I provided. Therefore, all the following return the same result.
select * from most_service_calls(1,5,1)
select * from most_service_calls(1,5,12)
select * from most_service_calls(1,5,24)
And it turned out, $3 inside '' '' is taken as 3 since the result matches that of the query with 3 months hardcoded in the query.
What is the correct way to include the variable inside a string in a quoted query like this?
Your problem is not specific to dynamic SQL - you can't refer to a placeholder within a quoted string even in a normal SQL query.
Instead you could use:
$3 * interval '1 month'
or:
($3 || ' months')::interval
The first form multiplies your supplied numeric value by a one month interval. The second constructs a string specifying the number of months and then casts it to an interval.

EXECUTE...INTO...USING statement in PL/pgSQL can't execute into a record?

I'm attempting to write an area of a function in PL/pgSQL that loops through an hstore and sets a record's column(the key of the hstore) to a specific value (the value of the hstore). I'm using Postgres 9.1.
The hstore will look like: ' "column1"=>"value1","column2"=>"value2" '
Generally, here is what I want from a function that takes in an hstore and has a record with values to modify:
FOR my_key, my_value IN
SELECT key,
value
FROM EACH( in_hstore )
LOOP
EXECUTE 'SELECT $1'
INTO my_row.my_key
USING my_value;
END LOOP;
The error which I am getting with this code:
"myrow" has no field "my_key". I've been searching for quite a while now for a solution, but everything else I've tried to achieve the same result hasn't worked.
Simpler alternative to your posted answer. Should perform much better.
This function retrieves a row from a given table (in_table_name) and primary key value (in_row_pk), and inserts it as new row into the same table, with some values replaced (in_override_values). The new primary key value as per default is returned (pk_new).
CREATE OR REPLACE FUNCTION f_clone_row(in_table_name regclass
, in_row_pk int
, in_override_values hstore
, OUT pk_new int)
LANGUAGE plpgsql AS
$func$
DECLARE
_pk text; -- name of PK column
_cols text; -- list of names of other columns
BEGIN
-- Get name of PK column
SELECT INTO _pk a.attname
FROM pg_catalog.pg_index i
JOIN pg_catalog.pg_attribute a ON a.attrelid = i.indrelid
AND a.attnum = i.indkey[0] -- single PK col!
WHERE i.indrelid = in_table_name
AND i.indisprimary;
-- Get list of columns excluding PK column
SELECT INTO _cols string_agg(quote_ident(attname), ',')
FROM pg_catalog.pg_attribute
WHERE attrelid = in_table_name -- regclass used as OID
AND attnum > 0 -- exclude system columns
AND attisdropped = FALSE -- exclude dropped columns
AND attname <> _pk; -- exclude PK column
-- INSERT cloned row with override values, returning new PK
EXECUTE format('
INSERT INTO %1$I (%2$s)
SELECT %2$s
FROM (SELECT (t #= $1).* FROM %1$I t WHERE %3$I = $2) x
RETURNING %3$I'
, in_table_name, _cols, _pk)
USING in_override_values, in_row_pk -- use override values directly
INTO pk_new; -- return new pk directly
END
$func$;
Call:
SELECT f_clone_row('tbl', 1, '"col1"=>"foo_new","col2"=>"bar_new"');
db<>fiddle here
Old sqlfiddle
Use regclass as input parameter type, so only valid table names can be used to begin with and SQL injection is ruled out. The function also fails earlier and more gracefully if you should provide an illegal table name.
Use an OUT parameter (pk_new) to simplify the syntax.
No need to figure out the next value for the primary key manually. It is inserted automatically and returned after the fact. That's not only simpler and faster, you also avoid wasted or out-of-order sequence numbers.
Use format() to simplify the assembly of the dynamic query string and make it less error-prone. Note how I use positional parameters for identifiers and unquoted strings respectively.
I build on your implicit assumption that allowed tables have a single primary key column of type integer with a column default. Typically serial columns.
Key element of the function is the final INSERT:
Merge override values with the existing row using the #= operator in a subselect and decompose the resulting row immediately.
Then you can select only relevant columns in the main SELECT.
Let Postgres assign the default value for the PK and get it back with the RETURNING clause.
Write the returned value into the OUT parameter directly.
All done in a single SQL command, that is generally fastest.
Since I didn't want to have to use any external functions for speed purposes, I created a solution using hstores to insert a record into a table:
CREATE OR REPLACE FUNCTION fn_clone_row(in_table_name character varying, in_row_pk integer, in_override_values hstore)
RETURNS integer
LANGUAGE plpgsql
AS $function$
DECLARE
my_table_pk_col_name varchar;
my_key text;
my_value text;
my_row record;
my_pk_default text;
my_pk_new integer;
my_pk_new_text text;
my_row_hstore hstore;
my_row_keys text[];
my_row_keys_list text;
my_row_values text[];
my_row_values_list text;
BEGIN
-- Get the next value of the pk column for the table.
SELECT ad.adsrc,
at.attname
INTO my_pk_default,
my_table_pk_col_name
FROM pg_attrdef ad
JOIN pg_attribute at
ON at.attnum = ad.adnum
AND at.attrelid = ad.adrelid
JOIN pg_class c
ON c.oid = at.attrelid
JOIN pg_constraint cn
ON cn.conrelid = c.oid
AND cn.contype = 'p'
AND cn.conkey[1] = at.attnum
JOIN pg_namespace n
ON n.oid = c.relnamespace
WHERE c.relname = in_table_name
AND n.nspname = 'public';
-- Get the next value of the pk in a local variable
EXECUTE ' SELECT ' || my_pk_default
INTO my_pk_new;
-- Set the integer value back to text for the hstore
my_pk_new_text := my_pk_new::text;
-- Add the next value statement to the hstore of changes to make.
in_override_values := in_override_values || hstore( my_table_pk_col_name, my_pk_new_text );
-- Copy over only the given row to the record.
EXECUTE ' SELECT * '
' FROM ' || quote_ident( in_table_name ) ||
' WHERE ' || quote_ident( my_table_pk_col_name ) ||
' = ' || quote_nullable( in_row_pk )
INTO my_row;
-- Replace the values that need to be changed in the column name array
my_row := my_row #= in_override_values;
-- Create an hstore of my record
my_row_hstore := hstore( my_row );
-- Create a string of comma-delimited, quote-enclosed column names
my_row_keys := akeys( my_row_hstore );
SELECT array_to_string( array_agg( quote_ident( x.colname ) ), ',' )
INTO my_row_keys_list
FROM ( SELECT unnest( my_row_keys ) AS colname ) x;
-- Create a string of comma-delimited, quote-enclosed column values
my_row_values := avals( my_row_hstore );
SELECT array_to_string( array_agg( quote_nullable( x.value ) ), ',' )
INTO my_row_values_list
FROM ( SELECT unnest( my_row_values ) AS value ) x;
-- Insert the values into the columns of a new row
EXECUTE 'INSERT INTO ' || in_table_name || '(' || my_row_keys_list || ')'
' VALUES (' || my_row_values_list || ')';
RETURN my_pk_new;
END
$function$;
It's quite a bit longer than what I had envisioned, but it works and is actually quite speedy.