PostgreSQL log trigger optimalization - postgresql

I spent a lot of time trying to optimize our pgsql log trigger which started to be a problem. I did huge progress (from 18min to 2.5min by inserting 3M rows) but I would like to know if some pgSql masters will be able to do it even better.
CREATE OR REPLACE FUNCTION table_log_trig()
RETURNS trigger AS
$BODY$
DECLARE
col TEXT; -- Single column name to save
newVal TEXT; -- New value for column
oldVal TEXT; -- Old value for column
colLimit TEXT[]; -- Columns that should be logged
BEGIN
IF TG_ARGV[0] IS NOT NULL THEN
-- Trigger specifies columns to log
SELECT array_agg(unnest)
FROM unnest(string_to_array(TG_ARGV[0], ','))
INTO colLimit;
ELSE
-- Trigger with no params. Log all columns
SELECT array_agg(json_object_keys)
FROM json_object_keys(row_to_json(NEW))
WHERE json_object_keys NOT IN ('id', 'created_at', 'updated_at') -- Exceptions
INTO colLimit;
END IF;
-- Loop over columns that should be saved in log
FOREACH col IN ARRAY colLimit
LOOP
-- INSERT & UPDATE
EXECUTE 'SELECT ($1).' || col || '::text' INTO newVal USING NEW;
-- UPDATE
IF TG_OP = 'UPDATE' THEN
EXECUTE 'SELECT ($1).' || col || '::text' INTO oldVal USING OLD;
END iF;
-- Add only new or changed data
IF
newVal != oldVal OR
(oldVal IS NULL AND newVal IS NOT NULL) OR
(oldVal IS NOT NULL AND newVal IS NULL)
THEN
INSERT INTO tab_logs (record_id, field_name, old_value, new_value, created_at, created_by, action)
VALUES (NEW.id, col, oldVal, newVal, NOW(), 999, 'O');
END IF;
END LOOP;
RETURN NEW;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;

row_to_json() returns both column names and values; you may as well make use of these values, rather than extracting them later via dynamic SQL.
I haven't thoroughly tested this, let alone benchmarked it, but here's the gist of it:
CREATE OR REPLACE FUNCTION table_log_trig() RETURNS trigger AS
$$
DECLARE
OldJson JSONB = NULL;
BEGIN
IF TG_OP <> 'INSERT' THEN
OldJson := to_jsonb(old);
END IF;
INSERT INTO tab_logs (record_id, field_name, old_value, new_value, created_at, created_by, action)
SELECT new.id, key, OldValues.value, NewValues.value, now(), 999, 'O'
FROM jsonb_each(to_jsonb(new)) NewValues
LEFT JOIN jsonb_each(OldJson) OldValues USING (key)
WHERE
(
(TG_ARGV[0] IS NULL AND key NOT IN ('id', 'created_at', 'updated_at')) OR
(TG_ARGV[0] IS NOT NULL AND key = ANY(string_to_array(TG_ARGV[0], ',')))
) AND
OldValues.value::text IS DISTINCT FROM NewValues.value::text;
RETURN NULL;
END
$$
LANGUAGE plpgsql VOLATILE;

Related

PL/pgSQL: How to use IF NEW.<variable_column_name> <> OLD.<variable_column_name>

I am pretty new to PL/pgSQL programming. I have a requirement of audit logging updated columns in my table
Table
create table sample_table(name varchar(15),city varchar(15),age int,mail varchar(20) primary key);
Audit table
create table sample_table__audits_dynamicols(mail varchar(20), columnchanged varchar(10), oldvalue varchar(10), changed_on timestamp(6) NOT NULL)
Trigger Function
CREATE FUNCTION public.log_sample_table_allchanges() RETURNS trigger AS $BODY$DECLARE
_colname text;
_tablename varchar(15) := 'sample_table';
_schema varchar(15) := 'public';
_changed_on time := now();
BEGIN
FOR _colname IN SELECT column_name FROM information_schema.Columns WHERE table_schema = _schema AND table_name = _tablename LOOP
IF NEW._colname <> OLD._colname THEN
INSERT INTO sample_table__audits_dynamicols(mail,columnchanged, oldvalue ,changed_on)
VALUES(OLD.mail,_colname,OLD.:_colname,_changed_on);
END IF;
END LOOP;
RETURN NEW;
END$BODY$
LANGUAGE plpgsql VOLATILE NOT LEAKPROOF;
Trigger
create TRIGGER log_sample_table_allchanges
BEFORE UPDATE
ON SAMPLE_TABLE
FOR EACH ROW
EXECUTE PROCEDURE log_sample_table_allchanges();
Requirement: Whenever a column value is changed i want to log it as
(mail, columnname, columnvalue, date)
E.g:
insert into sample_table (name, mail, city, age) values('kanta','mk#foo.com','hyd',23);
insert into sample_table (name, mail, city, age) values('kmk','mk#gmail.com','hyd',23);
So when i update like the following
update sample_table set age=24 where mail='mk#foo.com';
update sample_table set city='bza' where mail='mk#gmail.com'
I want audit table to record like
(mk#foo.com,age,23, timestamp)
(mk#gmail.com, city, hyd, timestamp)
Right now I am facing issue with column comparison in my Trigger function. Please help me rectifying my Trigger function to meet my requirement.
You may use EXECUTE to get the values of columns dynamically and do the comparison.
CREATE OR REPLACE FUNCTION public.log_sample_table_allchanges() RETURNS trigger AS
$BODY$
DECLARE
_colname text;
_tablename varchar(15) := 'sample_table';
_schema varchar(15) := 'public';
_changed_on timestamp := now();
_old_val text;
_new_val text;
BEGIN
FOR _colname IN SELECT column_name FROM information_schema.Columns WHERE table_schema = _schema AND table_name = _tablename
LOOP
EXECUTE 'SELECT $1.' || _colname || ', $2.' || _colname
USING OLD,NEW
INTO _old_val, _new_val; --get the old and new values for the column.
IF _new_val <> _old_val THEN
INSERT INTO sample_table__audits_dynamicols(mail,columnchanged, oldvalue ,changed_on)
VALUES(OLD.mail,_colname,_old_val,_changed_on);
END IF;
END LOOP;
RETURN NEW;
END$BODY$
LANGUAGE plpgsql VOLATILE NOT LEAKPROOF;
I'm not sure why you have defined mail as a PRIMARY KEY in the audits table, it will cause unique constraint violation if the same mail gets updated twice.

Having an ordinal column with no gaps

I want to have an ordinal column in which values always start from 1 and have no gaps. I have devised a solution with triggers, but I'd like to know if there is a better or more elegant way.
BEFORE INSERT trigger renumbers the rows that come after the inserted value. If value is not provided or too high, it is set to row count + 1. Similarly, AFTER DELETE trigger renumbers the rows that come after the deleted value. Both triggers lock rows before changing the value.
CREATE OR REPLACE FUNCTION ids_insert() RETURNS trigger AS $BODY$
DECLARE
_lock_sql text;
_id bigint;
BEGIN
IF TG_OP = 'INSERT' THEN
IF NEW.id < 1 THEN
RAISE EXCEPTION 'ID must be greater than zero.';
END IF;
EXECUTE format('SELECT COUNT(*) + 1 FROM %I', TG_TABLE_NAME)
INTO _id;
IF NEW.id IS NULL OR NEW.id > _id THEN
NEW.id := _id;
ELSE
_lock_sql := format(
'SELECT id FROM %I '
'WHERE id >= %s '
'ORDER BY id DESC '
'FOR UPDATE', TG_TABLE_NAME, NEW.id
);
FOR _id IN EXECUTE _lock_sql LOOP
EXECUTE format('UPDATE %I SET id = id + 1 WHERE id = %s', TG_TABLE_NAME, _id);
END LOOP;
END IF;
ELSE
IF NEW.id != OLD.id THEN
RAISE EXCEPTION 'Changing the ID directly is not allowed.';
END IF;
END IF;
RETURN NEW;
END;
$BODY$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION ids_delete() RETURNS trigger AS $BODY$
DECLARE
_lock_sql text;
_id bigint;
BEGIN
_lock_sql := format(
'SELECT id FROM %I '
'WHERE id > %s '
'ORDER BY id '
'FOR UPDATE', TG_TABLE_NAME, OLD.id
);
FOR _id IN EXECUTE _lock_sql LOOP
EXECUTE format('UPDATE %I SET id = id - 1 WHERE id = %s', TG_TABLE_NAME, _id);
END LOOP;
RETURN OLD;
END;
$BODY$ LANGUAGE plpgsql;
CREATE TABLE test (
id bigint PRIMARY KEY,
...
)
CREATE TRIGGER test_insert BEFORE INSERT OR UPDATE OF id ON test
FOR EACH ROW WHEN (pg_trigger_depth() < 1) EXECUTE PROCEDURE ids_insert();
CREATE TRIGGER test_delete AFTER DELETE ON test
FOR EACH ROW EXECUTE PROCEDURE ids_delete();

"INSERT INTO ... FETCH ALL FROM ..." can't be compiled

I have some function on PostgreSQL 9.6 returning a cursor (refcursor):
CREATE OR REPLACE FUNCTION public.test_returning_cursor()
RETURNS refcursor
IMMUTABLE
LANGUAGE plpgsql
AS $$
DECLARE
_ref refcursor = 'test_returning_cursor_ref1';
BEGIN
OPEN _ref FOR
SELECT 'a' :: text AS col1
UNION
SELECT 'b'
UNION
SELECT 'c';
RETURN _ref;
END
$$;
I need to write another function in which a temp table is created and all data from this refcursor are inserted to it. But INSERT INTO ... FETCH ALL FROM ... seems to be impossible. Such function can't be compiled:
CREATE OR REPLACE FUNCTION public.test_insert_from_cursor()
RETURNS table(col1 text)
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
CREATE TEMP TABLE _temptable (
col1 text
) ON COMMIT DROP;
INSERT INTO _temptable (col1)
FETCH ALL FROM "test_returning_cursor_ref1";
RETURN QUERY
SELECT col1
FROM _temptable;
END
$$;
I know that I can use:
FOR _rec IN
FETCH ALL FROM "test_returning_cursor_ref1"
LOOP
INSERT INTO ...
END LOOP;
But is there better way?
Unfortunately, INSERT and SELECT don't have access to cursors as a whole.
To avoid expensive single-row INSERT, you could have intermediary functions with RETURNS TABLE and return the cursor as table with RETURN QUERY. See:
Return a query from a function?
CREATE OR REPLACE FUNCTION f_cursor1_to_tbl()
RETURNS TABLE (col1 text) AS
$func$
BEGIN
-- MOVE BACKWARD ALL FROM test_returning_cursor_ref1; -- optional, see below
RETURN QUERY
FETCH ALL FROM test_returning_cursor_ref1;
END
$func$ LANGUAGE plpgsql; -- not IMMUTABLE
Then create the temporary table(s) directly like:
CREATE TEMP TABLE t1 ON COMMIT DROP
AS SELECT * FROM f_cursor1_to_tbl();
See:
Creating temporary tables in SQL
Still not very elegant, but much faster than single-row INSERT.
Note: Since the source is a cursor only the first call succeeds. Executing the function a second time would return an empty set. You would need a cursor with the SCROLL option and move to the start for repeated calls.
This function does INSERT INTO from refcursor. It is universal for all the tables. The only requirement is that all columns of table corresponds to columns of refcursor by types and order (not necessary by names).
to_json() does the trick to convert any primitive data types to string with double-quotes "", which are later replaced with ''.
CREATE OR REPLACE FUNCTION public.insert_into_from_refcursor(_table_name text, _ref refcursor)
RETURNS void
LANGUAGE plpgsql
AS $$
DECLARE
_sql text;
_sql_val text = '';
_row record;
_hasvalues boolean = FALSE;
BEGIN
LOOP --for each row
FETCH _ref INTO _row;
EXIT WHEN NOT found; --there are no rows more
_hasvalues = TRUE;
SELECT _sql_val || '
(' ||
STRING_AGG(val.value :: text, ',') ||
'),'
INTO _sql_val
FROM JSON_EACH(TO_JSON(_row)) val;
END LOOP;
_sql_val = REPLACE(_sql_val, '"', '''');
_sql_val = TRIM(TRAILING ',' FROM _sql_val);
_sql = '
INSERT INTO ' || _table_name || '
VALUES ' || _sql_val;
--RAISE NOTICE 'insert_into_from_refcursor(): SQL is: %', _sql;
IF _hasvalues THEN --to avoid error when trying to insert 0 values
EXECUTE (_sql);
END IF;
END;
$$;
Usage:
CREATE TABLE public.table1 (...);
PERFORM my_func_opening_refcursor();
PERFORM public.insert_into_from_refcursor('public.table1', 'name_of_refcursor_portal'::refcursor);
where my_func_opening_refcursor() contains
DECLARE
_ref refcursor = 'name_of_refcursor_portal';
OPEN _ref FOR
SELECT ...;

PostgreSQL update trigger Comparing Hstore values

I am creating trigger in PostgresSQL. On update I would like to compare all of the values in a Hstore column and update changes in my mirror table. I managed to get names of my columns in variable k but I am not able to get values using it from NEW and OLD.
CREATE OR REPLACE FUNCTION function_replication() RETURNS TRIGGER AS
$BODY$
DECLARE
k text;
BEGIN
FOR k IN SELECT key FROM EACH(hstore(NEW)) LOOP
IF NEW.k != OLD.k THEN
EXECUTE 'UPDATE ' || TG_TABLE_NAME || '_2' || 'SET ' || k || '=' || new.k || ' WHERE ID=$1.ID;' USING OLD;
END IF;
END LOOP;
RETURN NEW;
END;
$BODY$
language plpgsql;
You should operate on hstore representations of the records new and old. Also, use the format() function for better control and readibility.
create or replace function function_replication()
returns trigger as
$body$
declare
newh hstore = hstore(new);
oldh hstore = hstore(old);
key text;
begin
foreach key in array akeys(newh) loop
if newh->key != oldh->key then
execute format(
'update %s_2 set %s = %L where id = %s',
tg_table_name, key, newh->key, oldh->'id');
end if;
end loop;
return new;
end;
$body$
language plpgsql;
Another version - with minimalistic numbers of updates - in partially functional design (where it is possible).
This trigger should be AFTER trigger, to be ensured correct behave.
CREATE OR REPLACE FUNCTION function_replication()
RETURNS trigger AS $$
DECLARE
newh hstore;
oldh hstore;
update_vec text[];
pair text[];
BEGIN
IF new IS DISTINCT FROM old THEN
IF new.id <> old.id THEN
RAISE EXCEPTION 'id should be immutable';
END IF;
newh := hstore(new); oldh := hstore(old); update_vec := '{}';
FOREACH pair SLICE 1 IN ARRAY hstore_to_matrix(newh - oldh)
LOOP
update_vec := update_vec || format('%I = %L', pair[1], pair[2]);
END LOOP;
EXECUTE
format('UPDATE %I SET %s WHERE id = $1',
tg_table_name || '_2',
array_to_string(update_vec, ', '))
USING old.id;
END IF;
RETURN NEW; -- the value is not important in AFTER trg
END;
$$ LANGUAGE plpgsql;
CREATE TABLE foo(id int PRIMARY KEY, a int, b int);
CREATE TABLE foo_2(LIKE foo INCLUDING ALL);
CREATE TRIGGER xxx AFTER UPDATE ON foo
FOR EACH ROW EXECUTE PROCEDURE function_replication();
INSERT INTO foo VALUES(1, NULL, NULL);
INSERT INTO foo VALUES(2, 1,1);
INSERT INTO foo_2 VALUES(1, NULL, NULL);
INSERT INTO foo_2 VALUES(2, 1,1);
UPDATE foo SET a = 20, b = 30 WHERE id = 1;
UPDATE foo SET a = NULL WHERE id = 1;
This code is little bit more complex, but all what should be escaped is escaped and reduce number of executed UPDATE commands. UPDATE is full SQL command and the overhead of full SQL commands should be significantly higher than code that reduce number of full SQL commands.

Dynamic SELECT in trigger function gives syntax error

Just need help as to debug some syntax errors in the below code.The code is as below and have only couple of syntax errors near keywords like Insert, Select, etc
CREATE OR REPLACE FUNCTION audit_temp() RETURNS TRIGGER LANGUAGE plpgsql AS $BODY$
DECLARE
ri RECORD;
oldValue TEXT;
newValue TEXT;
isColumnSignificant BOOLEAN;
isValueModified BOOLEAN;
BEGIN
IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN
NEW.record_modified_ = clock_timestamp();
FOR ri IN
-- Fetch a ResultSet listing columns defined for this trigger's table.
SELECT ordinal_position, column_name, data_type
FROM information_schema.columns
WHERE table_schema = quote_ident(TG_TABLE_SCHEMA)
AND table_name = quote_ident(TG_TABLE_NAME)
ORDER BY ordinal_position
LOOP
-- For each column in this trigger's table, copy the OLD & NEW values into respective variables.
-- NEW value
EXECUTE 'SELECT ($1).' || ri.column_name || '::text' INTO newValue USING NEW;
-- OLD value
IF TG_OP = 'INSERT' THEN -- If operation is an INSERT, we have no OLD value, so use an empty string.
oldValue := ''::varchar;
ELSE -- Else operation is an UPDATE, so capture the OLD value.
EXECUTE 'SELECT ($1).' || ri.column_name || '::text' INTO oldValue USING OLD;
END IF;
isColumnSignificant := (position( '_x_' in ri.column_name ) < 1) AND
(ri.column_name <> 'pkey_') AND
(ri.column_name <> 'record_modified_');
IF isColumnSignificant THEN
isValueModified := oldValue <> newValue; -- If this nthField in the table was modified, make history.
IF isValueModified THEN
/*RAISE NOTICE E'Inserting history_ row for INSERT or UPDATE.\n';*/
INSERT INTO audit_temp( operation_, table_oid_, table_name_, uuid_, column_name_, ordinal_position_of_column_, old_value_, new_value_ )
VALUES ( TG_OP, TG_RELID, TG_TABLE_NAME, NEW.pkey_, ri.column_name::VARCHAR, ri.ordinal_position, oldValue::VARCHAR, newValue::VARCHAR);
END IF;
END IF;
END LOOP;
RETURN NEW;
ELSIF TG_OP = 'DELETE' THEN
/*RAISE NOTICE E'Inserting history_ row for DELETE.\n';*/
-- Similar to INSERT above, but refers to OLD instead of NEW, and passes empty values for last 4 fields.
INSERT INTO audit_temp ( operation_, table_oid_, table_name_, uuid_, column_name_, ordinal_position_of_column_, old_value_, new_value_ )
VALUES ( TG_OP, TG_RELID, TG_TABLE_NAME, OLD.pkey_, ''::VARCHAR, 0, ''::VARCHAR, ''::VARCHAR );
RETURN OLD;
END IF;
/* Should never reach this point. Branching in code above should always reach a call to RETURN. */
RAISE EXCEPTION 'Unexpectedly reached the bottom of this function without calling RETURN.';
END; $BODY$;
The error is as follows & mostly around Select Insert keywords only:
>[Error] Script lines: 1-42 -------------------------
ERROR: syntax error at or near "SELECT"
Any suggestions?????
Syntax error
The offending statement is this (and the others like it):
EXECUTE 'SELECT ($1).' || ri.column_name || '::text' INTO newValue USING NEW;
According to the documentation parameter symbols can only be used for data values — if you want to use dynamically determined table or column names, you must insert them into the command string textually. So the solution would be:
EXECUTE 'SELECT (' || NEW || ').' || ri.column_name || '::text' INTO newValue;
Improvements
You can make a few improvements to your trigger function to make it faster and more efficient:
You should check isColumnSignficant before you populate oldValue and newValue.
When updating, you do not have to record the OLD values: they are already in the audit table when the data was first inserted or later updated.
When deleting, don't store empty strings and 0, just leave the columns NULL.