Partial update on an postgres upsert violates constraint - postgresql

I want to be able to upsert partially inside postgres (9.5), but it seems that a partial upsert fails when not all of the constraint is fulfilled (such as the not null constraint)
Here is an example of the scenario and error
CREATE TABLE jobs (
id integer PRIMARY KEY,
employee_name TEXT NOT NULL,
address TEXT NOT NULL,
phone_number TEXT
);
CREATE OR REPLACE FUNCTION upsert_job(job JSONB)
RETURNS VOID AS $$
BEGIN
INSERT INTO jobs AS origin VALUES(
(job->>'id')::INTEGER,
job->>'employee_name'::TEXT,
job->>'address'::TEXT,
job->>'phone_number'::TEXT
) ON CONFLICT (id) DO UPDATE SET
employee_name = COALESCE(EXCLUDED.employee_name, origin.employee_name),
address = COALESCE(EXCLUDED.address, origin.address),
phone_number = COALESCE(EXCLUDED.phone_number, origin.phone_number);
END;
$$ LANGUAGE PLPGSQL SECURITY DEFINER;
--Full insert (OK)
SELECT upsert_job('{"id" : 1, "employee_name" : "AAA", "address" : "City, x street no.y", "phone_number" : "123456789"}'::jsonb);
--Partial update that fulfills constraint (Ok)
SELECT upsert_job('{"id" : 1, "employee_name" : "BBB", "address" : "City, x street no.y"}'::jsonb);
--Partial update that doesn't fulfill constraint (FAILS)
SELECT upsert_job('{"id" : 1, "phone_number" : "12345"}'::jsonb);
--ERROR: null value in column "employee_name" violates not-null constraint
--DETAIL: Failing row contains (1, null, null, 12345).
How do I go around approaching this ?

To think of it another way, what if the id didn't already exist? You can't insert just a phone number as it would have no name/address but that's exactly what you are telling it to do. So the constraint gets mad and it fails because an upsert tries to insert first and then updates if the insert fails. But your insert didn't get past the constraint check to see if it already existed.
What you can do instead if you want partials is tell it how to handle partials that would violate the constraints. Something like this (this is NOT complete and doesn't handle all partial data scenarios):
CREATE OR REPLACE FUNCTION upsert_job(job JSONB)
RETURNS VOID AS $$
BEGIN
IF (job->>'phone_number' IS NOT NULL
AND job->>'employee_name' IS NOT NULL
AND job->>'address' IS NOT NULL) THEN
INSERT INTO jobs AS origin VALUES(
(job->>'id')::INTEGER,
job->>'employee_name'::TEXT,
job->>'address'::TEXT,
job->>'phone_number'::TEXT
) ON CONFLICT (id) DO UPDATE SET
employee_name = COALESCE(EXCLUDED.employee_name, origin.employee_name),
address = COALESCE(EXCLUDED.address, origin.address),
phone_number = COALESCE(EXCLUDED.phone_number, origin.phone_number);
ELSIF (job->>'phone_number' IS NOT NULL AND (job->>'employee_name' IS NULL AND job->>'address' IS NULL)) THEN
UPDATE jobs SET phone_number=job->>'phone_number'::TEXT
WHERE id=(job->>'id')::INTEGER;
END IF;
END;
$$ LANGUAGE PLPGSQL SECURITY DEFINER;

Related

Postgres SQL Table Partitioning by Range Timestamp not Unique key Collision

I have an issue when trying to modify and existing PostgreSQL (version 13.3) table to support partitioning it gets stuck when inserting the new data from the old table because the inserted timestamp in some cases may not be unique, so it fails on execution.
The partition forces me to create the primary to be the range (timestamp) value. You can see the new table definition below:
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites_CreationDate" PRIMARY KEY ("CreationDate")
) partition by range ("CreationDate");
The original table didn't have a constraint on timestamp to either be unique or a primary key nor would we particularly want that but that seems to be a requirement of partitioning. Looking for alternatives or good ideas to solve the issue.
You can see the full code below:
alter table "UserFavorites" rename to "UserFavorites_old";
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites_CreationDate" PRIMARY KEY ("CreationDate")
) partition by range ("CreationDate");
-- Frome Reference: https://stackoverflow.com/a/53600145/1190540
create or replace function createPartitionIfNotExists(forDate timestamp) returns void
as $body$
declare yearStart date := date_trunc('year', forDate);
declare yearEndExclusive date := yearStart + interval '1 year';
declare tableName text := 'UserFavorites_Partition_' || to_char(forDate, 'YYYY');
begin
if to_regclass(tableName) is null then
execute format('create table %I partition of "UserFavorites_master" for values from (%L) to (%L)', tableName, yearStart, yearEndExclusive);
-- Unfortunatelly Postgres forces us to define index for each table individually:
--execute format('create unique index on %I (%I)', tableName, 'UserId'::text);
end if;
end;
$body$ language plpgsql;
do
$$
declare rec record;
begin
loop
for rec in 2015..2030 loop
-- ... and create a partition for them
perform createPartitionIfNotExists(to_date(rec::varchar,'yyyy'));
end loop;
end
$$;
create or replace view "UserFavorites" as select * from "UserFavorites_master";
insert into "UserFavorites" ("Id", "UserId", "CardId", "CreationDate") select * from "UserFavorites_old";
It fails on the Last line with the following error:
SQL Error [23505]: ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
No, partitioning doesn't force you to create a primary key. Just omit that line, and your example should work.
However, you definitely always should have a primary key on your tables. Otherwise, you can end up with identical rows, which is a major headache in a relational database. You might have to clean up your data.
#Laurenz Albe is correct, it seems I also have the ability to specify multiple keys though it may affect performance as referenced here Multiple Keys Performance, even indexing the creation date of the partition seemed to make the performance worse.
You can see a reference to multiple keys below, you mileage may vary.
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites" PRIMARY KEY ("Id", "CreationDate")
) partition by range ("CreationDate");

Bulk insert/update from Json param in Postgresql using ON CONFLICT (Id), but Id property must be acquired within the function as not included in param

Table definition:
CREATE TABLE public."FeatureToggles"
(
"Id" integer NOT NULL GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 2147483647 CACHE 1 ),
"IsDeleted" boolean NOT NULL,
"IsImported" boolean NOT NULL,
"TextProp" character varying(35),
CONSTRAINT "PK_FeatureToggles" PRIMARY KEY ("Id")
)
CREATE TABLE public."Additions"
(
"Id" integer NOT NULL GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 2147483647 CACHE 1 ),
"FeatureToggleId" int NOT NULL,
"IsDeleted" boolean NOT NULL,
"Url" character varying(35) NULL,
CONSTRAINT "PK_FeatureToggles" PRIMARY KEY ("Id")
CONSTRAINT "FK_Additions_FeatureToggles_FeatureToggleId" FOREIGN KEY ("FeatureToggleId")
REFERENCES public."FeatureToggles" ("Id") MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE CASCADE,
)
Insert one record into table:
INSERT INTO public."FeatureToggles" ("IsDeleted", "TextProp", "IsImported") VALUES(false, 'X', true);
Function:
CREATE OR REPLACE FUNCTION testfunctionname(jsonparam json)
RETURNS void AS
$BODY$
INSERT INTO "FeatureToggles" ("Id", "IsDeleted", "IsImported", "TextProp")
SELECT (COALESCE(SELECT "Id" FROM "FeatureToggles" WHERE "TextProp" = (prop->>'TextProp')::character varying(35)), 0),
(prop->>'IsDeleted')::boolean,
true,
(prop->>'TextProp')::character varying(35)
json_array_elements(jsonparam) prop
ON CONFLICT ("Id") DO
UPDATE SET
"IsDeleted" = EXCLUDED."IsDeleted"
INSERT INTO "Additions" ("FeatureToggleId", "IsDeleted", "Url")
SELECT (SELECT "Id" FROM "FeatureToggles" WHERE "TextProp" = (prop->>'TextProp')::character varying(35)),
(prop->>'IsDeleted')::boolean,
(prop->>'Additions')::character varying(35)
json_array_elements(jsonparam) prop
DELETE FROM "FeatureToggles" WHERE "IsImported" = true AND "TextProp" IS NOT IN (SELECT DISTINCT (prop->>'TextProp')::character varying(35)szi
json_array_elements(jsonparam) prop)
$BODY$
LANGUAGE sql
Sample JSON:
[
{
"IsDeleted": true,
"TextProp": "X",
"Additions":
[
"Test1",
"Test2"
]
},
{
"IsDeleted": false,
"TextProp": "Y",
"Additions":
[
"Test3",
"Test4"
]
}
]
Calling the function with this JSON param should update the one and only row in the FeatureToggles table to IsDeleted true and insert a new row into the FeatureToggles table with Id equals to 2, IsDeleted false and TextProp is Y. Also it should insert all Additions given in the JSON param into the corresponding table and with the correct foreign keys.
I ran into problems with populating the Id properties from the existing table and also inserting Additions into the other table.
It would be a great if the function would delete any rows in the FeatureToggle and the corresponding Additions table too if it does exists in table already, IsImported property is true, but is not in the JSON param.
Example if we change the insert script to:
INSERT INTO public."FeatureToggles" ("IsDeleted", "TextProp", "IsImported") VALUES(false, 'X', true);
INSERT INTO public."FeatureToggles" ("IsDeleted", "TextProp", "IsImported") VALUES(false, 'X222', true);
After calling the function with the same JSON param, the row with X222 should be deleted because it is marked as imported, but has no matching item (matched by TextProp property) within the new param list.
Any help would be much appreciated as this function needs to handle tens of thousands of records as parameter on each call.
You have several errors in your function (and your DDL)
Most importantly, json_array_elements() is a set returning function, so you need a FROM clause in order to generate multiple rows.
You also need to terminate each SQL statement in the function with ; and IS NOT IN is invalid - you need NOT IN
So the function should be something like this:
CREATE OR REPLACE FUNCTION testfunctionname(jsonparam json)
RETURNS void AS
$BODY$
INSERT INTO "FeatureToggles" ("Id", "IsDeleted", "IsImported", "TextProp")
SELECT coalesce(ft."Id", 0),
(prop->>'IsDeleted')::boolean,
true,
prop->>'TextProp'
FROM json_array_elements(jsonparam) prop
LEFT JOIN "FeatureToggles" ft on ft."TextProp" = (prop->>'TextProp')
ON CONFLICT ("Id") DO
UPDATE SET
"IsDeleted" = EXCLUDED."IsDeleted";
INSERT INTO "Additions" ("FeatureToggleId", "IsDeleted", "Url")
SELECT coalesce(ft."Id", 0),
(prop->>'IsDeleted')::boolean,
prop->>'Additions'
FROM json_array_elements(jsonparam) prop
JOIN "FeatureToggles" ft on ft."TextProp" = (prop->>'TextProp');
DELETE FROM "FeatureToggles"
WHERE "IsImported" = true
AND "TextProp" NOT IN (SELECT DISTINCT prop->>'TextProp' szi
FROM json_array_elements(jsonparam) prop);
$BODY$
LANGUAGE sql;
Note that ->> returns a text value, so there is no need to cast the result of those expression if the target column is text or varchar.
I also changed the scalar sub-queries to JOINs. The first insert is equivalent to an outer join - although I think that is wrong (but that's what your current code tries to do). Because if the join doesn't return anything, the INSERT will try to create a row with "Id" = 0 - bypassing the sequence generation. Using on conflict() with an auto-generated ID rarely makes sense. Maybe you want a unique index on TextProp?
I would probably implement that as a procedure rather than a function though.
Online example

How to create a before insert trigger on SQL Server 2012 to make sure the data i'm adding doesn't already exists in the table?

How to create a before insert trigger on SQL Server 2012 to make sure the data i'm adding doesn't already exists in the table?
The table design is :
[id] [int] IDENTITY(1,1) NOT NULL,
[column_one] [varchar](50) NOT NULL,
[column_two] [varchar](50) NOT NULL,
[column_three] [varchar](50) NOT NULL
I need to create a method to add data to the table and make sure the "couple" column_one, column_two is unique and not duplicated.
example :
id : 1, column_one : 'Stack', column_two : 'OverFlow', column_three :
'is great'
id : 2, column_one : 'Hello', column_two : 'World',
column_three : 'you good?'
id : 3, column_one : 'Help', column_two :
'me', column_three : 'please'
I have to make sure, no user can add 'Stack'+'Overflow' or 'Help'+'me', but can enter 'Stack'+'me' or 'Help'+'OverFlow' if he wants to.
I thought about creating a trigger (Before insert or instead of insert) but I don't know what to set as a condition.
CREATE TRIGGER VerifySomething
ON my_table
INSTEAD OF INSERT
AS
BEGIN
SET NOCOUNT ON
INSERT INTO my_table
do something
WHERE something something
END
GO
EDIT : I tried #TheGameiswar solution and I got some problems :
"An explicit value for the identity column in table 'my_table' can only be specified when a column list is used and IDENTITY_INSERT is ON".
After some brainstorming, I decided to create a constraint on both column instead of creating a trigger on insert.
the final result looks like :
ALTER TABLE my_table
ADD CONSTRAINT CheckUnicity UNIQUE (column_two, column_three)
CREATE TRIGGER VerifySomething
ON my_table
INSTEAD OF INSERT
AS
BEGIN
insert into yourtable
select * from inserted i
where not exists
(select 1 from table t where t.someuniquefield=i.someuniquefield
END
GO

Trigger after insert, update a related model Postgresql

This is the first time I'm creating a trigger, so I've got a little bit confused. I'm following this guide.
Here is what I've done so far:
DROP TRIGGER IF EXISTS "update_metas" ON "post";
CREATE TRIGGER "update_metas"
AFTER INSERT ON "post"
FOR EACH ROW EXECUTE PROCEDURE update_post_count();
I have two tables: user and post. What I need to do is to increment the column user.postCount for each new post created. The foreign key is post.user_id
The procedure I'm creating is the following:
CREATE FUNCTION update_post_count() RETURNS TRIGGER AS $updates_user_postCount$
BEGIN
-- I know that NEW contains the new post info, so I
-- can gather the user_id by doing NEW.post_id.
--
-- What exactly should I do here?
RETURN NEW;
END;
$updates_user_postCount$ LANGUAGE plpgsql;
How should I structure this procedure? Can I just use a direct SQL query, something like:
UPDATE "user"
SET "user"."post_count" = "user"."post_count" + 1
WHERE "user"."_id" = NEW.idol_id;
UPDATE
I've tried using that SQL statement inside the procedure, but it returns the error error: column "user" of relation "user" does not exist.
Here is the SQL statement that I used to create both user and post tables:
CREATE TABLE IF NOT EXISTS "user" (
_id BIGSERIAL UNIQUE,
__id TEXT UNIQUE,
fbid VARCHAR(100),
name VARCHAR(100) NOT NULL,
email VARCHAR(100) NOT NULL UNIQUE,
password VARCHAR(512) NOT NULL,
profile_picture VARCHAR(512),
profile_cover VARCHAR(512),
profile_about TEXT,
profile_birth_date BIGINT,
social_facebook VARCHAR(256),
social_twitter VARCHAR(256),
social_instagram VARCHAR(256),
post_count BIGINT,
highlighted BOOLEAN,
idol BOOLEAN,
free BOOLEAN,
blocked BOOLEAN
);
CREATE TABLE IF NOT EXISTS "post" (
_id BIGSERIAL UNIQUE,
__id TEXT UNIQUE,
idol_id BIGINT,
removed BOOLEAN,
free BOOLEAN,
created_at BIGINT,
hashtags VARCHAR(1024),
audio_src VARCHAR(512),
audio_size INTEGER,
audio_length INTEGER,
FOREIGN KEY ("idol_id") REFERENCES "user"("_id")
);
Your trigger function is largely correct. The only problem is that an UPDATE statement cannot use the table.column notation.
From the documentation: Do not include the table's name in the specification of a target column — for example, UPDATE tab SET tab.col = 1 is invalid.
CREATE FUNCTION update_post_count() RETURNS TRIGGER AS $updates_user_postCount$
BEGIN
UPDATE "user"
SET "post_count" = "post_count" + 1
WHERE "_id" = NEW.idol_id;
RETURN NEW;
END;
$updates_user_postCount$ LANGUAGE plpgsql;

Need foreign key as array

CREATE TABLE test ( id int PRIMARY KEY , name );
CREATE TABLE test1 ( id integer[] REFERENCES test , rollid int );
ERROR: foreign key constraint "test3_id_fkey" cannot be implemented
DETAIL: Key columns "id" and "id" are of incompatible types: integer[] and integer.
after that I try to another way also
CREATE TABLE test1 ( id integer[] , rollid int);
ALTER TABLE test1 ADD CONSTRAINT foreignkeyarray FOREIGN KEY (id) REFERENCES test;
ERROR: foreign key constraint "fkarray" cannot be implemented
DETAIL: Key columns "id" and "id" are of incompatible types: integer[] and integer.
so I try create a foreign key array means it say error. please tell me anyone?
postgresql version is 9.1.
What you're trying to do simply can't be done. At all. No ifs, no buts.
Create a new table, test1_test, containing two fields, test1_id, test_id. Put the foreign keys as needed on that one, and make test1's id an integer.
Using arrays with foreign element keys is usually a sign of incorrect design. You need to do separate table with one to many relationship.
But technically it is possible. Example of checking array values without triggers. One reusable function with paramethers and dynamic sql. Tested on PostgreSQL 10.5
create schema if not exists test;
CREATE OR REPLACE FUNCTION test.check_foreign_key_array(data anyarray, ref_schema text, ref_table text, ref_column text)
RETURNS BOOL
RETURNS NULL ON NULL INPUT
LANGUAGE plpgsql
AS
$body$
DECLARE
fake_id text;
sql text default format($$
select id::text
from unnest($1) as x(id)
where id is not null
and id not in (select %3$I
from %1$I.%2$I
where %3$I = any($1))
limit 1;
$$, ref_schema, ref_table, ref_column);
BEGIN
EXECUTE sql USING data INTO fake_id;
IF (fake_id IS NOT NULL) THEN
RAISE NOTICE 'Array element value % does not exist in column %.%.%', fake_id, ref_schema, ref_table, ref_column;
RETURN false;
END IF;
RETURN true;
END
$body$;
drop table if exists test.t1, test.t2;
create table test.t1 (
id integer generated by default as identity primary key
);
create table test.t2 (
id integer generated by default as identity primary key,
t1_ids integer[] not null check (test.check_foreign_key_array(t1_ids, 'test', 't1', 'id'))
);
insert into test.t1 (id) values (default), (default), (default); --ok
insert into test.t2 (id, t1_ids) values (default, array[1,2,3]); --ok
insert into test.t2 (id, t1_ids) values (default, array[1,2,3,555]); --error
If you are able to put there just values from test.id, then you can try this:
CREATE OR REPLACE FUNCTION test_trigger() RETURNS trigger
LANGUAGE plpgsql AS $BODY$
DECLARE
val integer;
BEGIN
SELECT id INTO val
FROM (
SELECT UNNEST(id) AS id
FROM test1
) AS q
WHERE id = OLD.id;
IF val IS NULL THEN RETURN OLD;
ELSE
RAISE 'Integrity Constraint Violation: ID "%" in Test1', val USING ERRCODE = '23000';
RETURN NULL;
END IF;
END; $BODY$;
-- DROP TRIGGER test_delete_trigger ON test;
CREATE TRIGGER test_delete_trigger BEFORE DELETE OR UPDATE OF id ON test
FOR EACH ROW EXECUTE PROCEDURE test_trigger();