perform delete or update on unique violation in postgres - postgresql

On Unique_violation exception how to update or delete the row which raised the exception
table code and insert
create table test
(
id serial not null,
str character varying NOT NULL,
is_dup boolean DEFAULT false,
CONSTRAINT test_str_unq UNIQUE (str)
);
INSERT INTO test(str) VALUES ('apple'),('giant'),('company'),('ap*p*le');
Function
CREATE OR REPLACE FUNCTION rem_chars()
RETURNS void AS
$BODY$
BEGIN
begin
update test set str=replace(str,'*','');
EXCEPTION WHEN unique_violation THEN
--what to do here to delete the row which raised exception or
--to update the is_dup=true to that row
end;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
ALTER FUNCTION rem_chars() OWNER TO postgres;

-- this will show you all the potential key collisions
SELECT a.id, a.str, b.id , b.str
FROM test a, test b
WHERE a.str = replace(b.str,'*','')
AND a.id < b.id;
-- this will delete them
DELETE FROM test WHERE id IN (
SELECT b.id
FROM test a, test b
WHERE a.str = replace(b.str,'*','')
AND a.id < b.id
);

I think the only solution is to do this in two steps:
UPDATE test
SET str = replace(str,'*','')
WHERE str NOT IN (SELECT replace(str,'*','') FROM test);
UPDATE test
SET is_dup = true
WHERE str IN (SELECT replace(str,'*','') FROM test);
At least I can't think of a more efficient way.

Related

Unique constraint across multiple postgres tables

Let's say I have the following two postgres tables with the same field:
CREATE TABLE table1 (
label VARCHAR(50)
);
CREATE TABLE table2 (
label VARCHAR(50)
);
I want label to be unique across both tables. That is, the following data should raise an error:
INSERT INTO table1 (label) VALUES ('hello');
INSERT INTO table2 (label) VALUES ('hello');
Is there any way to enforce this at the database level?
You cannot create a unique constraint across table, but a trigger on both tables can. One way: (see demo)
create or replace function table1_table2_cross_check()
returns trigger
language plpgsql
as $$
begin
if tg_table_name = 'table1'
then
if exists (select null
from table2
where label = new.label
)
then
raise exception 'Executing: % table1, Label Value: ''%'', already exists in table2',tg_op,new.label;
end if;
else
if exists (select null
from table1
where label = new.label
)
then
raise exception 'Executing: % table2, Label Value: ''%'', already exists in table1',tg_op,new.label;
end if;
end if;
return new;
end;
$$;
create trigger table1_biur
before insert or update
on table1
for each row
execute procedure table1_table2_cross_check();
create trigger table2_biur
before insert or update
on table2
for each row
execute procedure table1_table2_cross_check();

Postgres query with variable in loop and condition on variable

I have a query which updates the records based on variables old_id and new_id. But condition is I need to fetch the variables dynamically. Here is simple query which I am using.
do
$$
declare
old_id bigint = 1561049391647687270;
declare new_id bigint = 2068236279446765699;
begin
update songs set poet_id = new_id where poet_id = old_id;
update poets set active = true where id = new_id;
update poets set deleted = true where id = old_id;
end
$$;
I need to assign the old_id and new_id dynamically
do
$$
declare
su record;
pc record;
old_id bigint;
new_id bigint;
begin
for pc in select name, count(name)
from poets
where deleted = false
group by name
having count(name) > 1
order by name
loop
for su in select * from poets where name ilike pc.name
loop
-- old_id could be null where I have 2 continue the flow without update
for old_id in (select id from su where su.link is null)
loop
raise notice 'old: %', old_id;
end loop;
-- new_id could be more than 2 skip this condition as well
for new_id in (select id from su where su.link is not null)
loop
raise notice 'new: %', new_id;
end loop;
end loop;
-- run the statement_1 example if new_id and old_id is not null
end loop;
end
$$;
The expected problem statement (to assign variable and use it in further execution) is with in comment.
(a) In your first "simple query", the update of the table poets could be automatically executed by a trigger function defined on the table songs :
CREATE OR REPLACE FUNCTION songs_update_id ()
RETURNS trigger LANGUAGE plpgsql AS
$$
BEGIN
UPDATE poets SET active = true WHERE id = NEW.poet_id ;
UPDATE poets SET deleted = true WHERE id = OLD.poet_id ; -- SET active = false to be added ?
END ;
$$ ;
CREATE OR REPLACE TRIGGER songs_update_id AFTER UPDATE OF id ON songs
FOR EACH ROW EXECUTE songs_update_id () ;
Your first query can then be reduced as :
do
$$
declare
old_id bigint = 1561049391647687270;
declare new_id bigint = 2068236279446765699;
begin
update songs set poet_id = new_id where poet_id = old_id;
end
$$;
(b) The tables update could be performed with a sql query instead of a plpgsql loop and with better performances :
do
$$
BEGIN
UPDATE songs
SET poet_id = list.new_id[1]
FROM
( SELECT b.name
, array_agg(b.id) FILTER (WHERE b.link IS NULL) AS old_id
, array_agg(b.id) FILTER (WHERE b.link IS NOT NULL) AS new_id
FROM
( SELECT name
FROM poets
WHERE deleted = false
GROUP BY name
HAVING COUNT(*) > 1
-- ORDER BY name -- this ORDER BY sounds like useless and resource-intensive
) AS a
INNER JOIN poets AS b
ON b.name ilike a.name
GROUP BY b.name
HAVING array_length(old_id, 1) = 1
AND array_length(new_id, 1) = 1
) AS list
WHERE poet_id = list.old_id[1] ;
END ;
$$;
This solution is not tested yet and could have to be adjusted in order to work correctly. Please provide the tables definition of songs and poets and a sample of data in dbfiddle so that I can test and adjust the proposed solution.

PostgreSQL Trigger on last deleted line

I created a trigger that updates the number of species counted from the "effectif" table to the "citation" table.
It works well except when I delete the last row, the calculation does not perform and remains at the last state. For example, if I delete the last row on the "effectif" table that represents 6 species, I would still have 6 on my "citation" table where I'm suppose to find 0 or null.
Here is my trigger :
CREATE OR REPLACE FUNCTION data.del_eff_tot()
RETURNS trigger AS
$BODY$
BEGIN
IF OLD.effectif IS NOT NULL THEN
UPDATE data.citation
SET effectif =
(SELECT sum(a.effectif) FROM data.effectif a, data.citation b WHERE OLD.id_cit = a.id_cit AND OLD.id_cit = b.id)+COALESCE((SELECT a.effectif FROM data.effectif a, data.citation b WHERE OLD.id_cit = a.id_cit AND OLD.id_cit = b.id AND sexe = 'sexe_4_1'),0)
WHERE id IN (SELECT id_cit FROM data.effectif a, data.citation b WHERE OLD.id_cit = b.id AND b.id = a.id_cit);
END IF;
RETURN OLD;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
ALTER FUNCTION data.del_eff_tot()
OWNER TO postgres;
------------------------------
CREATE TRIGGER del_eff_tot
AFTER DELETE
ON data.effectif
FOR EACH ROW
EXECUTE PROCEDURE data.del_eff_tot();

How to return id after insert from updated (created from trigger) table in postgres?

I am creating an insert query to a table (t1 -Which creates the returning id by increment) which adds the same row to another table (t2) by a trigger and creates a new id. How do I get this new ID?
I tried the following approach but it returns null (I think the row in table t2 is not available yet to perform join on).
In t2 I have an identifier 'type' which detects the type of row through trigger
Any help would be appreciated. Thanks in advance.
WITH inserted AS (
INSERT INTO t1 (entity_name, entity_type, entity_country)
VALUES
('sss', 'list', 'a',)
RETURNING id, entity_country
)
select id, entity_reference_id, entity_country, t2.entity_id from inserted
join t2 on t1.id = t2.entity_reference_id
where t2.type LIKE '%manual%'
This returns me null, However I want it to return me new t2.entity_id
CREATE OR REPLACE FUNCTION view_t2_id() RETURNS TRIGGER AS $BODY$
BEGIN
IF (NEW."type" LIKE '%manual%') THEN
RETURN NEW."entity_id";
ELSE
RAISE EXCEPTION 'Wrong "type"="%"', NEW."type";
END IF;
END;
$BODY$ LANGUAGE plpgsql;
CREATE TRIGGER view_t2_id_trigger
AFTER INSERT ON t2
FOR EACH ROW EXECUTE PROCEDURE view_t2_id();

Prevent and/or detect cycles in postgres

Assuming a schema like the following:
CREATE TABLE node (
id SERIAL PRIMARY KEY,
name VARCHAR,
parentid INT REFERENCES node(id)
);
Further, let's assume the following data is present:
INSERT INTO node (name,parentid) VALUES
('A',NULL),
('B',1),
('C',1);
Is there a way to prevent cycles from being created? Example:
UPDATE node SET parentid = 2 WHERE id = 1;
This would create a cycle of 1->2->1->...
Your trigger simplified and optimized, should be considerably faster:
CREATE OR REPLACE FUNCTION detect_cycle()
RETURNS TRIGGER
LANGUAGE plpgsql AS
$func$
BEGIN
IF EXISTS (
WITH RECURSIVE search_graph(parentid, path, cycle) AS ( -- relevant columns
-- check ahead, makes 1 step less
SELECT g.parentid, ARRAY[g.id, g.parentid], (g.id = g.parentid)
FROM node g
WHERE g.id = NEW.id -- only test starting from new row
UNION ALL
SELECT g.parentid, sg.path || g.parentid, g.parentid = ANY(sg.path)
FROM search_graph sg
JOIN node g ON g.id = sg.parentid
WHERE NOT sg.cycle
)
SELECT FROM search_graph
WHERE cycle
LIMIT 1 -- stop evaluation at first find
)
THEN
RAISE EXCEPTION 'Loop detected!';
ELSE
RETURN NEW;
END IF;
END
$func$;
You don't need dynamic SQL, you don't need to count, you don't need all the columns and you don't need to test the whole table for every single row.
CREATE TRIGGER detect_cycle_after_update
AFTER INSERT OR UPDATE ON node
FOR EACH ROW EXECUTE PROCEDURE detect_cycle();
An INSERT like this has to be prohibited, too:
INSERT INTO node (id, name,parentid) VALUES (8,'D',9), (9,'E',8);
To answer my own question, I came up with a trigger that prevents this:
CREATE OR REPLACE FUNCTION detect_cycle() RETURNS TRIGGER AS
$func$
DECLARE
loops INTEGER;
BEGIN
EXECUTE 'WITH RECURSIVE search_graph(id, parentid, name, depth, path, cycle) AS (
SELECT g.id, g.parentid, g.name, 1,
ARRAY[g.id],
false
FROM node g
UNION ALL
SELECT g.id, g.parentid, g.name, sg.depth + 1,
path || g.id,
g.id = ANY(path)
FROM node g, search_graph sg
WHERE g.id = sg.parentid AND NOT cycle
)
SELECT count(*) FROM search_graph where cycle = TRUE' INTO loops;
IF loops > 0 THEN
RAISE EXCEPTION 'Loop detected!';
ELSE
RETURN NEW;
END IF;
END
$func$ LANGUAGE plpgsql;
CREATE TRIGGER detect_cycle_after_update
AFTER UPDATE ON node
FOR EACH ROW EXECUTE PROCEDURE detect_cycle();
So, if you try to create a loop, like in the question:
UPDATE node SET parentid = 2 WHERE id = 1;
You get an EXCEPTION:
ERROR: Loop detected!
CREATE OR REPLACE FUNCTION detect_cycle()
RETURNS TRIGGER AS
$func$
DECLARE
cycle int[];
BEGIN
EXECUTE format('WITH RECURSIVE search_graph(%4$I, path, cycle) AS (
SELECT g.%4$I, ARRAY[g.%3$I, g.%4$I], (g.%3$I = g.%4$I)
FROM %1$I.%2$I g
WHERE g.%3$I = $1.%3$I
UNION ALL
SELECT g.%4$I, sg.path || g.%4$I, g.%4$I = ANY(sg.path)
FROM search_graph sg
JOIN %1$I.%2$I g ON g.%3$I = sg.%4$I
WHERE NOT sg.cycle)
SELECT path
FROM search_graph
WHERE cycle
LIMIT 1', TG_TABLE_SCHEMA, TG_TABLE_NAME, quote_ident(TG_ARGV[0]), quote_ident(TG_ARGV[1]))
INTO cycle
USING NEW;
IF cycle IS NULL
THEN
RETURN NEW;
ELSE
RAISE EXCEPTION 'Loop in %.% detected: %', TG_TABLE_SCHEMA, TG_TABLE_NAME, array_to_string(cycle, ' -> ');
END IF;
END
$func$ LANGUAGE plpgsql;
CREATE TRIGGER detect_cycle_after_update
AFTER INSERT OR UPDATE ON node
FOR EACH ROW EXECUTE PROCEDURE detect_cycle('id', 'parent_id');
While the current accepted answer by #Erwin Brandstetter is ok when you process one update/insert at a time, it still can fail when considering concurrent execution.
Assume the table content defined by
INSERT INTO node VALUES
(1, 'A', NULL),
(2, 'B', 1),
(3, 'C', NULL),
(4, 'D', 3);
and then in one transaction, execute
-- transaction A
UPDATE node SET parentid = 2 where id = 3;
and in another
-- transaction B
UPDATE node SET parentid = 4 where id = 1;
Both UPDATE commands will succeed, and you can afterwards commit both transactions.
-- transaction A
COMMIT;
-- transaction B
COMMIT;
You will then have a cycle 1->4->3->2->1 in the table.
To make it work, you will either have to use isolation level SERIALIZABLE or use explicit locking in the trigger.
slightly different from Erwin's
CREATE OR REPLACE FUNCTION detect_cycle ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $func$
BEGIN
IF EXISTS ( WITH RECURSIVE search_graph (
id,
name,
parentid,
is_cycle,
path
) AS (
SELECT *, FALSE,ARRAY[ROW (n.id,n.parentid)]
FROM
node n
WHERE
n.id = NEW.id
UNION ALL
SELECT
n.*,
ROW (n.id,n.parentid) = ANY (path),
path || ROW (n.id,n.parentid)
FROM
node n,
search_graph sg
WHERE
n.id = sg.parentid
AND NOT is_cycle
)
SELECT *
FROM
search_graph
WHERE
is_cycle
LIMIT 1) THEN
RAISE EXCEPTION 'Loop detected!';
ELSE
RETURN new;
END IF;
END
$func$;