PostgreSQL compare two jsonb objects - postgresql

With PostgreSQL(v9.5), the JSONB formats give awesome opportunities. But now I'm stuck with what seems like a relatively simple operation;
compare two jsonb objects; see what is different or missing in one document compared to the other.
What I have so far
WITH reports(id,DATA) AS (
VALUES (1,'{"a":"aaa", "b":"bbb", "c":"ccc"}'::jsonb),
(2,'{"a":"aaa", "b":"jjj", "d":"ddd"}'::jsonb) )
SELECT jsonb_object_agg(anon_1.key, anon_1.value)
FROM
(SELECT anon_2.key AS KEY,
reports.data -> anon_2.KEY AS value
FROM reports,
(SELECT DISTINCT jsonb_object_keys(reports.data) AS KEY
FROM reports) AS anon_2
ORDER BY reports.id DESC) AS anon_1
Should return the difference of row 1 compared to row 2:
'{"b":"bbb", "c":"ccc", "d":null}'
Instead it returns also duplicates ({"a": "aaa"}). Also; there might be a more elegant approach in general!

UPDATED
CREATE OR REPLACE FUNCTION jsonb_diff_val(val1 JSONB,val2 JSONB)
RETURNS JSONB AS $$
DECLARE
result JSONB;
v RECORD;
BEGIN
result = val1;
FOR v IN SELECT * FROM jsonb_each(val2) LOOP
IF result #> jsonb_build_object(v.key,v.value)
THEN result = result - v.key;
ELSIF result ? v.key THEN CONTINUE;
ELSE
result = result || jsonb_build_object(v.key,'null');
END IF;
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql;
Query:
SELECT jsonb_diff_val(
'{"a":"aaa", "b":"bbb", "c":"ccc"}'::jsonb,
'{"a":"aaa", "b":"jjj", "d":"ddd"}'::jsonb
);
jsonb_diff_val
---------------------------------------
{"b": "bbb", "c": "ccc", "d": "null"}
(1 row)

I have created similar function that would scan the object recursively and will return the difference between new object and old object. I was not able to find a 'nicer' way to determine if jsonb object 'is empty' - so would be grateful for any suggestion how to simplify that. I plan to use it to keep track of updates made to the jsonb objects, so I store only what have changed.
Here is the function:
CREATE OR REPLACE FUNCTION jsonb_diff_val(val1 JSONB,val2 JSONB)
RETURNS JSONB AS $$
DECLARE
result JSONB;
object_result JSONB;
i int;
v RECORD;
BEGIN
IF jsonb_typeof(val1) = 'null'
THEN
RETURN val2;
END IF;
result = val1;
FOR v IN SELECT * FROM jsonb_each(val1) LOOP
result = result || jsonb_build_object(v.key, null);
END LOOP;
FOR v IN SELECT * FROM jsonb_each(val2) LOOP
IF jsonb_typeof(val1->v.key) = 'object' AND jsonb_typeof(val2->v.key) = 'object'
THEN
object_result = jsonb_diff_val(val1->v.key, val2->v.key);
-- check if result is not empty
i := (SELECT count(*) FROM jsonb_each(object_result));
IF i = 0
THEN
result = result - v.key; --if empty remove
ELSE
result = result || jsonb_build_object(v.key,object_result);
END IF;
ELSIF val1->v.key = val2->v.key THEN
result = result - v.key;
ELSE
result = result || jsonb_build_object(v.key,v.value);
END IF;
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql;
Then simple query looks like this:
SELECT jsonb_diff_val(
'{"a":"aaa", "b":{"b1":"b","b2":"bb","b3":{"b3a":"aaa","b3c":"ccc"}}, "c":"ccc"}'::jsonb,
'{"a":"aaa", "b":{"b1":"b1","b3":{"b3a":"aaa","b3c":"cccc"}}, "d":"ddd"}'::jsonb
);
jsonb_diff_val
-------------------------------------------------------------------------------
{"b": {"b1": "b1", "b2": null, "b3": {"b3c": "cccc"}}, "c": null, "d": "ddd"}
(1 row)

Here is a solution without creating a new function;
SELECT
json_object_agg(COALESCE(old.key, new.key), old.value)
FROM json_each_text('{"a":"aaa", "b":"bbb", "c":"ccc"}') old
FULL OUTER JOIN json_each_text('{"a":"aaa", "b":"jjj", "d":"ddd"}') new ON new.key = old.key
WHERE
new.value IS DISTINCT FROM old.value
The result is;
{"b" : "bbb", "c" : "ccc", "d" : null}
This method only compares first level of json. It does NOT traverse the whole object tree.

My solution is not recursive but you can use it for detecting common key/values:
-- Diff two jsonb objects
CREATE TYPE jsonb_object_diff_result AS (
old jsonb,
new jsonb,
same jsonb
);
CREATE OR REPLACE FUNCTION jsonb_object_diff(in_old jsonb, in_new jsonb)
RETURNS jsonb_object_diff_result AS
$jsonb_object_diff$
DECLARE
_key text;
_value jsonb;
_old jsonb;
_new jsonb;
_same jsonb;
BEGIN
_old := in_old;
_new := in_new;
FOR _key, _value IN SELECT * FROM jsonb_each(_old) LOOP
IF (_new -> _key) = _value THEN
_old := _old - _key;
_new := _new - _key;
IF _same IS NULL THEN
_same := jsonb_build_object(_key, _value);
ELSE
_same := _same || jsonb_build_object(_key, _value);
END IF;
END IF;
END LOOP;
RETURN (_old, _new, _same);
END;
$jsonb_object_diff$
LANGUAGE plpgsql;
Result can look like this:
SELECT * FROM jsonb_object_diff(
'{"a": 1, "b": 5, "extra1": "woo", "old_null": null, "just_null": null}'::jsonb,
'{"a": 1, "b": 4, "extra2": "ahoj", "new_null": null, "just_null": null}'::jsonb);
-[ RECORD 1 ]--------------------------------------
old | {"b": 5, "extra1": "woo", "old_null": null}
new | {"b": 4, "extra2": "ahoj", "new_null": null}
same | {"a": 1, "just_null": null}

This does basically the same as what other folks have done, but reports changes in a right/left "delta" format.
Example:
SELECT jsonb_delta(
'{"a":"aaa", "b":"bbb", "c":"ccc"}'::jsonb,
'{"a":"aaa", "b":"jjj", "d":"ddd"}'::jsonb
);
Resolves to:
{"b": {"left": "bbb", "right": "jjj"}}
code:
CREATE OR REPLACE FUNCTION jsonb_delta(
IN json_left JSONB
, IN json_right JSONB
, OUT json_out JSONB
) AS
$$
BEGIN
IF json_left IS NULL OR json_right IS NULL THEN
RAISE EXCEPTION 'Non-null inputs required';
END IF
;
WITH
base as
(
SELECT
key
, CASE
WHEN a.value IS DISTINCT FROM b.value THEN jsonb_build_object('left', a.value, 'right', b.value)
ELSE NULL
END as changes
FROM jsonb_each_text(json_left) a
FULL OUTER JOIN jsonb_each_text(json_right) b using (key)
)
SELECT
jsonb_object_agg(key,changes)
INTO json_out
FROM base
WHERE
changes IS NOT NULL
;
json_out := coalesce(json_out, '{}');
END;
$$
LANGUAGE PLPGSQL
IMMUTABLE
PARALLEL SAFE
;

I achieved this by using the hstore extension.
CREATE EXTENSION hstore;
CREATE OR REPLACE FUNCTION log_history() RETURNS trigger
LANGUAGE plpgsql
AS $$
DECLARE
hs_new hstore;
hs_old hstore;
BEGIN
IF (TG_OP = 'DELETE') THEN
INSERT INTO history(item_id, old_values, new_values)
VALUES(OLD.id, row_to_json(OLD)::jsonb, NULL);
ELSIF (TG_OP = 'INSERT') THEN
INSERT INTO history(item_id, old_values, new_values)
VALUES(NEW.id, NULL, row_to_json(NEW)::jsonb);
ELSIF (TG_OP = 'UPDATE' AND NEW.* IS DISTINCT FROM OLD.*) THEN
hs_new := hstore(NEW);
hs_old := hstore(OLD);
INSERT INTO history(item_id, old_values, new_values)
VALUES(NEW.id, (hs_old - hs_new - 'updated_at'::text)::jsonb, (hs_new - hs_old - 'updated_at'::text)::jsonb);
END IF;
RETURN NULL;
END;
$$;
Notice that I used it to log history of any change happens on a specific table, and also I'm removing updated_at from the diff object.

Related

How to create a helper function for queries in postgresql?

I have a query similar to this:
SELECT COALESCE(
(SELECT jsonb_agg(s) FROM ((SELECT column FROM my_table)) AS s),
'[]'::jsonb
);
which returns all rows in json format:
[
{
"column": 1,
},
{
"column": 2
}
]
And if there are no rows, it returns a empty array instead of null.
I want to re-use this, but it would quickly become a mess to type out the whole thing everywhere. That's why I trying to create this as_json function:
-- DOESNT WORK
CREATE FUNCTION as_json(query ???)
RETURNS jsonb
LANGUAGE sql
AS $$
SELECT COALESCE((SELECT jsonb_agg(s) FROM query AS s), '[]'::jsonb);
$$;
If I could make it work, using it would be as simple as
as_json(SELECT column FROM my_table)
Can I achieve something like that in postgresql?
It's actually quite easy to write something similar to query_to_xml()
create or replace function query_to_jsonb(p_query text, p_include_nulls boolean default false)
returns jsonb
as
$$
declare
l_result jsonb;
l_sql text;
begin
if p_include_nulls then
l_sql := 'select jsonb_agg(to_jsonb(dt)) from ('||p_query||') as dt';
else
l_sql := 'select jsonb_agg(jsonb_strip_nulls(to_jsonb(dt))) from ('||p_query||') as dt';
end if;
execute l_sql
into l_result;
return coalesce(l_result, '{}');
end;
$$
language plpgsql;
Then you can return the result of a query (string) as a JSON value, e.g.
select query_to_jsonb('SELECT column FROM my_table');
To remove NULL values from the result, use:
select query_to_jsonb('select col1, col2 from some_table', true);

Push result of each query into array

I've created function:
CREATE FUNCTION citiesById(integer[]) RETURNS text[] AS
$$
DECLARE
element int;
result text[];
BEGIN
FOREACH element IN ARRAY $1
LOOP
WITH t1 as (SELECT city FROM cities WHERE id = element)
SELECT city FROM t1 INTO result;
END LOOP;
RETURN result;
END
$$
LANGUAGE plpgsql;
I'm trying to execute queries in a loop and insert result of each query into array to get something like ['London', 'Paris', 'Moscow']. But I'm getting an error:
Is there a correct way to do that?
CREATE FUNCTION citiesById(integer[]) RETURNS text[] AS
$$
DECLARE
element int;
result text[];
BEGIN
FOREACH element IN ARRAY $1
LOOP
result := array_append(result, (SELECT city FROM cities WHERE id = element)::text);
END LOOP;
RETURN result;
END
$$
LANGUAGE plpgsql;

Postgres 10 and malformatted array literal when using EXECUTE

I am trying to create a function on Postgres 10 that would check whether all the given ids exist
in the target table.
CREATE OR REPLACE FUNCTION "public"."has_mismatching_ids"(
_ids TEXT[],
_column_name TEXT,
_table_name TEXT,
_schema_name TEXT DEFAULT 'public'
)
RETURNS BOOLEAN AS
$$
DECLARE
result TEXT[] := '{}';
_query TEXT := '';
BEGIN
IF _ids IS NULL THEN
RETURN false;
END IF;
_query := CONCAT('SELECT UNNEST($1) EXCEPT SELECT "', _column_name, '" FROM "', _schema_name ,'"."', _table_name ,'"');
EXECUTE _query INTO result USING $1;
RAISE NOTICE 'Executed';
IF array_length(result, 1) > 0 THEN
RETURN true;
END IF;
RETURN false;
END;
$$
LANGUAGE plpgsql
STABLE
SECURITY INVOKER;
This is my function that returns correctly true if all the ids exist, but it throws an exception if any of
the ids does not exist.
CREATE TABLE "public"."categories" ("category_id" TEXT NOT NULL PRIMARY KEY);
INSERT INTO "public"."categories" ("category_id") VALUES ('foo');
INSERT INTO "public"."categories" ("category_id") VALUES ('bar');
This works
SELECT FROM "public"."has_mismatching_ids"(ARRAY['foo'], 'category_id', 'categories', 'public');
NOTICE: Executed
has_mismatching_ids
---------------------
f
(1 row)
but this fails to even
SELECT FROM "public"."has_mismatching_ids"(ARRAY['foo2'], 'category_id', 'categories', 'public');
ERROR: malformed array literal: "foo2"
DETAIL: Array value must start with "{" or dimension information.
CONTEXT: PL/pgSQL function public.has_mismatching_ids(text[],text,text,text) line 14 at EXECUTE
and strangely this works also if I write a function that uses explicit names and without EXECUTE
SELECT UNNEST(ARRAY['foo', 'foo3']) EXCEPT SELECT "category_id" FROM "public"."categories";
unnest
--------
foo3
(1 row)
Why doesn't my dynamic function work when comparing to values that do not exist in the target table?
You may use a simple SQL to compare if all array element exists.
CREATE OR replace FUNCTION "public"."has_mismatching_ids"(
_ids text[],
_column_name text,
_table_name text,
_schema_name text DEFAULT 'public' )
returns boolean AS $$
DECLARE res BOOLEAN;
BEGIN
IF _ids IS NULL then
RETURN false;
END IF;
EXECUTE format ( 'SELECT COUNT(*) = cardinality($1)
FROM %I.%I WHERE %I = ANY($2)',
_schema_name,_table_name,_column_name )
INTO res using _ids, _ids;
RETURN res;
END;
$$ language plpgsql stable security invoker;
Demo

PostgreSQL log trigger optimalization

I spent a lot of time trying to optimize our pgsql log trigger which started to be a problem. I did huge progress (from 18min to 2.5min by inserting 3M rows) but I would like to know if some pgSql masters will be able to do it even better.
CREATE OR REPLACE FUNCTION table_log_trig()
RETURNS trigger AS
$BODY$
DECLARE
col TEXT; -- Single column name to save
newVal TEXT; -- New value for column
oldVal TEXT; -- Old value for column
colLimit TEXT[]; -- Columns that should be logged
BEGIN
IF TG_ARGV[0] IS NOT NULL THEN
-- Trigger specifies columns to log
SELECT array_agg(unnest)
FROM unnest(string_to_array(TG_ARGV[0], ','))
INTO colLimit;
ELSE
-- Trigger with no params. Log all columns
SELECT array_agg(json_object_keys)
FROM json_object_keys(row_to_json(NEW))
WHERE json_object_keys NOT IN ('id', 'created_at', 'updated_at') -- Exceptions
INTO colLimit;
END IF;
-- Loop over columns that should be saved in log
FOREACH col IN ARRAY colLimit
LOOP
-- INSERT & UPDATE
EXECUTE 'SELECT ($1).' || col || '::text' INTO newVal USING NEW;
-- UPDATE
IF TG_OP = 'UPDATE' THEN
EXECUTE 'SELECT ($1).' || col || '::text' INTO oldVal USING OLD;
END iF;
-- Add only new or changed data
IF
newVal != oldVal OR
(oldVal IS NULL AND newVal IS NOT NULL) OR
(oldVal IS NOT NULL AND newVal IS NULL)
THEN
INSERT INTO tab_logs (record_id, field_name, old_value, new_value, created_at, created_by, action)
VALUES (NEW.id, col, oldVal, newVal, NOW(), 999, 'O');
END IF;
END LOOP;
RETURN NEW;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
row_to_json() returns both column names and values; you may as well make use of these values, rather than extracting them later via dynamic SQL.
I haven't thoroughly tested this, let alone benchmarked it, but here's the gist of it:
CREATE OR REPLACE FUNCTION table_log_trig() RETURNS trigger AS
$$
DECLARE
OldJson JSONB = NULL;
BEGIN
IF TG_OP <> 'INSERT' THEN
OldJson := to_jsonb(old);
END IF;
INSERT INTO tab_logs (record_id, field_name, old_value, new_value, created_at, created_by, action)
SELECT new.id, key, OldValues.value, NewValues.value, now(), 999, 'O'
FROM jsonb_each(to_jsonb(new)) NewValues
LEFT JOIN jsonb_each(OldJson) OldValues USING (key)
WHERE
(
(TG_ARGV[0] IS NULL AND key NOT IN ('id', 'created_at', 'updated_at')) OR
(TG_ARGV[0] IS NOT NULL AND key = ANY(string_to_array(TG_ARGV[0], ',')))
) AND
OldValues.value::text IS DISTINCT FROM NewValues.value::text;
RETURN NULL;
END
$$
LANGUAGE plpgsql VOLATILE;

PostgreSQL update trigger Comparing Hstore values

I am creating trigger in PostgresSQL. On update I would like to compare all of the values in a Hstore column and update changes in my mirror table. I managed to get names of my columns in variable k but I am not able to get values using it from NEW and OLD.
CREATE OR REPLACE FUNCTION function_replication() RETURNS TRIGGER AS
$BODY$
DECLARE
k text;
BEGIN
FOR k IN SELECT key FROM EACH(hstore(NEW)) LOOP
IF NEW.k != OLD.k THEN
EXECUTE 'UPDATE ' || TG_TABLE_NAME || '_2' || 'SET ' || k || '=' || new.k || ' WHERE ID=$1.ID;' USING OLD;
END IF;
END LOOP;
RETURN NEW;
END;
$BODY$
language plpgsql;
You should operate on hstore representations of the records new and old. Also, use the format() function for better control and readibility.
create or replace function function_replication()
returns trigger as
$body$
declare
newh hstore = hstore(new);
oldh hstore = hstore(old);
key text;
begin
foreach key in array akeys(newh) loop
if newh->key != oldh->key then
execute format(
'update %s_2 set %s = %L where id = %s',
tg_table_name, key, newh->key, oldh->'id');
end if;
end loop;
return new;
end;
$body$
language plpgsql;
Another version - with minimalistic numbers of updates - in partially functional design (where it is possible).
This trigger should be AFTER trigger, to be ensured correct behave.
CREATE OR REPLACE FUNCTION function_replication()
RETURNS trigger AS $$
DECLARE
newh hstore;
oldh hstore;
update_vec text[];
pair text[];
BEGIN
IF new IS DISTINCT FROM old THEN
IF new.id <> old.id THEN
RAISE EXCEPTION 'id should be immutable';
END IF;
newh := hstore(new); oldh := hstore(old); update_vec := '{}';
FOREACH pair SLICE 1 IN ARRAY hstore_to_matrix(newh - oldh)
LOOP
update_vec := update_vec || format('%I = %L', pair[1], pair[2]);
END LOOP;
EXECUTE
format('UPDATE %I SET %s WHERE id = $1',
tg_table_name || '_2',
array_to_string(update_vec, ', '))
USING old.id;
END IF;
RETURN NEW; -- the value is not important in AFTER trg
END;
$$ LANGUAGE plpgsql;
CREATE TABLE foo(id int PRIMARY KEY, a int, b int);
CREATE TABLE foo_2(LIKE foo INCLUDING ALL);
CREATE TRIGGER xxx AFTER UPDATE ON foo
FOR EACH ROW EXECUTE PROCEDURE function_replication();
INSERT INTO foo VALUES(1, NULL, NULL);
INSERT INTO foo VALUES(2, 1,1);
INSERT INTO foo_2 VALUES(1, NULL, NULL);
INSERT INTO foo_2 VALUES(2, 1,1);
UPDATE foo SET a = 20, b = 30 WHERE id = 1;
UPDATE foo SET a = NULL WHERE id = 1;
This code is little bit more complex, but all what should be escaped is escaped and reduce number of executed UPDATE commands. UPDATE is full SQL command and the overhead of full SQL commands should be significantly higher than code that reduce number of full SQL commands.