I have 2 tables like this
drop table if exists public.table_1;
drop table if exists public.table_2;
CREATE TABLE public.table_1 (
id serial NOT NULL,
user_id bigint not null,
status varchar(255) not null,
date_start date NOT NULL,
date_end date NULL
);
CREATE TABLE public.table_2 (
id serial NOT NULL,
user_id bigint not null,
status varchar(255) not null,
date_start date NOT NULL,
date_end date NULL
);
alter table public.table_1
add constraint my_constraint_1
EXCLUDE USING gist (user_id with =, daterange(date_start, date_end, '[]') WITH &&)
where (status != 'deleted');
alter table public.table_2
add constraint my_constraint_2
EXCLUDE USING gist (user_id with =, daterange(date_start, date_end, '[]') WITH &&)
where (status != 'deleted');
Every table contains rows which are related to a user, and all the rows of the same user cannot overlap in range. In addition, some rows may be logically deleted, so I added a where condition.
So far it's working w/o problems, but the 2 constraints work separately for each table.
I need to create a constraint which cover the 2 set of tables, so that a single daterange (of the same user and not deleted), may appaer only once across the 2 different tables.
Does the EXCLUDE notation be extended to work with different tables or do I need to check it with a trigger? If the trigger is the answer, which is the simplier way to do this? Create a temporary table with the union of the 2, add the constraint on it and check if fails?
Starting from #Laurenz Albe suggestion, this is what I made
-- #################### SETUP SAMPLE TABLES ####################
drop table if exists public.table_1;
drop table if exists public.table_2;
CREATE TABLE public.table_1 (
id serial NOT NULL,
user_id bigint not null,
status varchar(255) not null,
date_start date NOT NULL,
date_end date NULL
);
CREATE TABLE public.table_2 (
id serial NOT NULL,
user_id bigint not null,
status varchar(255) not null,
date_start date NOT NULL,
date_end date NULL
);
alter table public.table_1
add constraint my_constraint_1
EXCLUDE USING gist (user_id with =, daterange(date_start, date_end, '[]') WITH &&)
where (status != 'deleted');
alter table public.table_2
add constraint my_constraint_2
EXCLUDE USING gist (user_id with =, daterange(date_start, date_end, '[]') WITH &&)
where (status != 'deleted');
-- #################### SETUP TRIGGER ####################
create or REPLACE FUNCTION check_date_overlap_trigger_hook()
RETURNS trigger as
$body$
DECLARE
l_table text;
l_sql text;
l_row record;
begin
l_table := TG_ARGV[0];
l_sql := format('
select *
from public.%s as t
where
t.user_id = %s -- Include only records of the same user
and t.status != ''deleted'' -- Include only records that are active
', l_table, new.user_id);
for l_row in execute l_sql
loop
IF daterange(l_row.date_start, COALESCE(l_row.date_end, 'infinity'::date)) && daterange(new.date_start, COALESCE(new.date_end, 'infinity'::date))
THEN
RAISE EXCEPTION 'Date interval is overlapping with another one in table %', l_table
USING HINT = 'You can''t have the same interval across table1 AND table2';
END IF;
end loop;
RETURN NEW;
end
$body$
LANGUAGE plpgsql;
-- #################### INSTALL TRIGGER ####################
create trigger check_date_overlap
BEFORE insert or update
ON public.table_1
FOR EACH row
EXECUTE PROCEDURE check_date_overlap_trigger_hook('table_2');
create trigger check_date_overlap
BEFORE insert or update
ON public.table_2
FOR EACH row
EXECUTE PROCEDURE check_date_overlap_trigger_hook('table_1');
-- #################### INSERT DEMO ROWS ####################
insert into public.table_1 (user_id, status, date_start, date_end) values (1, 'active', '2020-12-10', '2020-12-20');
insert into public.table_1 (user_id, status, date_start, date_end) values (1, 'deleted', '2020-12-15', '2020-12-25');
insert into public.table_1 (user_id, status, date_start, date_end) values (2, 'active', '2020-12-10', '2020-12-20');
insert into public.table_1 (user_id, status, date_start, date_end) values (2, 'deleted', '2020-12-15', '2020-12-25');
-- This will fail for overlap on the same table
-- insert into public.table_1 (user_id, status, date_start, date_end) values (1, 'active', '2020-12-15', '2020-12-25');
-- This will fail as the user 1 already has an overlapping period on table 1
-- insert into public.table_2 (user_id, status, date_start, date_end) values (1, 'active', '2020-12-15', '2020-12-25');
-- This will fail as the user 1 already has an overlapping period on table 1
insert into public.table_2 (user_id, status, date_start, date_end) values (1, 'deleted', '2020-12-15', '2020-12-25');
update public.table_2 set status = 'active' where id = 1;
select 'table_1' as src_table, * from public.table_1
union
select 'table_2', * from public.table_2
You can probably use a trigger, but triggers are always vulnerable to race conditions (unless you are using SERIALIZABLE isolation).
If your tables really have the same columns, why don't you use a single table (and perhaps add a type column to disambiguate)?
So I'm setting up a schema in which I can input transactions of a journal entry independent of each other but also that rely on each other (mainly to ensure that debits = credits). I set up the tables, function, and trigger. Then, when I try to input values into the transactions table, I get the error below. I'm doing all of this in pgAdmin4.
CREATE TABLE transactions (
transactions_id UUID PRIMARY KEY DEFAULT uuid_generate_v1(),
entry_id INTEGER NOT NULL,
post_date DATE NOT NULL,
account_id INTEGER NOT NULL,
contact_id INTEGER NULL,
description TEXT NOT NULL,
reference_id UUID NULL,
document_id UUID NULL,
amount NUMERIC(12,2) NOT NULL
);
CREATE TABLE entries (
id UUID PRIMARY KEY,
test_date DATE NOT NULL,
balance NUMERIC(12,2)
CHECK (balance = 0.00)
);
CREATE OR REPLACE FUNCTION transactions_biut()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
EXECUTE 'INSERT INTO entries (id,test_date,balance)
SELECT
entry_id,
post_date,
SUM(amount) AS ''balance''
FROM
transactions
GROUP BY
entry_id;';
END;
$$;
CREATE TRIGGER transactions_biut
BEFORE INSERT OR UPDATE ON transactions
FOR EACH ROW EXECUTE PROCEDURE transactions_biut();
INSERT INTO transactions (
entry_id,
post_date,
account_id,
description,
amount
)
VALUES
(
'1',
'2019-10-01',
'101',
'MISC DEBIT: PAID FOR FACEBOOK ADS',
-200.00
),
(
'1',
'2019-10-01',
'505',
'MISC DEBIT: PAID FOR FACEBOOK ADS',
200.00
);
After I execute this input, I get the following error:
ERROR: column "id" of relation "entries" does not exist
LINE 1: INSERT INTO entries (id,test_date,balance)
^
QUERY: INSERT INTO entries (id,test_date,balance)
SELECT
entry_id,
post_date,
SUM(amount) AS "balance"
FROM
transactions
GROUP BY
entry_id;
CONTEXT: PL/pgSQL function transactions_biut() line 2 at EXECUTE
SQL state: 42703
There are a few problems here:
You're not returning anything from the trigger function => should probably be return NEW or return OLD since you're not modifying anything
Since you're executing the trigger before each row, it's bound to fail for any transaction that isn't 0 => maybe you want a deferred constraint trigger?
You're not grouping by post_date, so your select should fail
You've defined entry_id as INTEGER, but entries.id is of type UUID
Also note that this isn't really going to scale (you're summing up all transactions of all days, so this will get slower and slower...)
#chirs I was able to figure out how to create a functioning solution using statement-level triggers:
CREATE TABLE transactions (
transactions_id UUID PRIMARY KEY DEFAULT uuid_generate_v1(),
entry_id INTEGER NOT NULL,
post_date DATE NOT NULL,
account_id INTEGER NOT NULL,
contact_id INTEGER NULL,
description TEXT NOT NULL,
reference_id UUID NULL,
document_id UUID NULL,
amount NUMERIC(12,2) NOT NULL
);
CREATE TABLE entries (
entry_id INTEGER PRIMARY KEY,
post_date DATE NOT NULL,
balance NUMERIC(12,2),
CHECK (balance = 0.00)
);
CREATE OR REPLACE FUNCTION transactions_entries() RETURNS TRIGGER AS $$
BEGIN
IF (TG_OP = 'DELETE') THEN
INSERT INTO entries
SELECT o.entry_id, o.post_date, SUM(o.amount) FROM old_table o GROUP BY o.entry_id, o.post_date;
ELSIF (TG_OP = 'UPDATE') THEN
INSERT INTO entries
SELECT o.entry_id, n.post_date, SUM(n.amount) FROM new_table n, old_table o GROUP BY o.entry_id, n.post_date;
ELSIF (TG_OP = 'INSERT') THEN
INSERT INTO entries
SELECT n.entry_id,n.post_date, SUM(n.amount) FROM new_table n GROUP BY n.entry_id, n.post_date;
END IF;
RETURN NULL; -- result is ignored since this is an AFTER trigger
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER transactions_ins
AFTER INSERT ON transactions
REFERENCING NEW TABLE AS new_table
FOR EACH STATEMENT EXECUTE PROCEDURE transactions_entries();
CREATE TRIGGER transactions_upd
AFTER UPDATE ON transactions
REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
FOR EACH STATEMENT EXECUTE PROCEDURE transactions_entries();
CREATE TRIGGER transactions_del
AFTER DELETE ON transactions
REFERENCING OLD TABLE AS old_table
FOR EACH STATEMENT EXECUTE PROCEDURE transactions_entries();
Any thoughts on optimization?
I've created a trigger on a table called Project, the query successfully compiled however when I go to insert data into the table I get an error stating:
Msg 14636, Level 16, State 1, Procedure sp_send_dbmail, Line 112 [Batch Start Line 139]
No global profile is configured. Specify a profile name in the #profile_name parameter.
As well as:
Msg 3930, Level 16, State 1, Procedure sp_send_dbmail, Line 64 [Batch Start Line 139]
The current transaction cannot be committed and cannot support operations that write to the log file. Roll back the transaction.
I'm not completely familiar with the sp_send_dbmail procedure but I tried to have it so when a trigger is triggered it send an email out to xxxxx#gmail.com.
--Create a Table to hold the project audit
CREATE TABLE [dbo].[ProjectAudit](
projectId char(4) not null,
projectName varchar(50) null,
fundedbudget decimal(16,2) null,
firmFedID char(9) null,
statusID varchar(25) null,
projectTypeID char(4) null,
startDate date null,
projectedEndDate date null,
projectManager char(8) null,
dateTimeCreated smalldatetime null,
operation varchar(50),
userName varchar(50)
)
go
-- Project trigger
create TRIGGER trg_ProjectAudit
ON Project
After Insert,Delete,Update
AS
Begin
declare #name varchar(50)
declare #body varchar(100)
if exists(select projectId from inserted)
BEGIN
select #name = projectName
from inserted
set #name = #name + ' has been inputted into the project table.'
INSERT INTO ProjectAudit
(projectId, projectName, fundedbudget, firmFedID, statusID, projectTypeID,
startDate, projectedEndDate, projectManager, operation, dateTimeCreated, userName)
SELECT projectId, projectName, fundedbudget, firmFedID, statusID, projectTypeID,
startDate, projectedEndDate, projectManager, 'INSERT', getdate(), System_user
FROM Inserted
exec msdb.dbo.sp_send_dbmail #recipients = 'xxxxx#gmail.com',
#body = #name
END
if exists(select projectId from deleted)
begin
select #name = projectName
from deleted
set #name = #name + ' has been deleted from the project table.'
INSERT INTO ProjectAudit
(projectId, projectName, fundedbudget, firmFedID, statusID, projectTypeID,
startDate, projectedEndDate, projectManager, operation, dateTimeCreated, userName)
SELECT projectId, projectName, fundedbudget, firmFedID, statusID, projectTypeID,
startDate, projectedEndDate, projectManager, 'DELETE', getdate(), System_user
FROM deleted
exec msdb.dbo.sp_send_dbmail #recipients = 'xxxxx#gmail.com',
#body = #name
end
--check if a column update
if (update(projectid) or update(projectTypeID))
begin
print 'ProjectID or ProjectTypeID column was updated'
exec msdb.dbo.sp_send_dbmail #recipients = 'xxx#gmail.com',
#body = 'Data has been updated in the project table.'
end
End
GO
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
BEGIN TRANSACTION
GO
CREATE TABLE dbo.Tmp_Templates
(
ID int NOT NULL IDENTITY (1, 1),
isHidden bit NULL,
FileName nvarchar(255) NOT NULL,
Name nvarchar(255) NOT NULL,
Description nvarchar(1024) NULL,
UploadedByTVDBUsersID int NOT NULL,
Created datetime NOT NULL
)
GO
SET IDENTITY_INSERT dbo.Tmp_Templates ON
GO
IF EXISTS(SELECT * FROM dbo.Templates)
EXEC('INSERT INTO dbo.Tmp_Templates (ID, FileName, Name, Description, UploadedByTVDBUsersID, Created)
SELECT ID, FileName, Name, Description, UploadedByTVDBUsersID, Created FROM dbo.Templates WITH (HOLDLOCK TABLOCKX)')
GO
SET IDENTITY_INSERT dbo.Tmp_Templates OFF
GO
DROP TABLE dbo.Templates
GO
EXECUTE sp_rename N'dbo.Tmp_Templates', N'Templates', 'OBJECT'
GO
ALTER TABLE dbo.Templates ADD CONSTRAINT
PK__Templates__499219E9 PRIMARY KEY CLUSTERED
(
ID
)
GO
PRINT N' Templates ADD isHidden'
COMMIT
END
Results in error:
Msg 102, Level 15, State 1, Line 7 Incorrect syntax near
'TRANSACTION'. Caution: Changing any part of an object name could
break scripts and stored procedures.
Update:
Excluding the IF statement wrapping the transaction this SQL is generated by Microsoft SQL Server Management Studio.
If I remove the wrapping IF statement then everything works, but I only need the change to happen if the field isn't already there. How can I make the IF statement work properly?
Um... why the -1 and the vote to close?
I had to wrap each part of the transaction in an IF statement so the GOs were not embedded in an IF statement. The following TSQL works just fine. The transaction updates the schema as expected.
BEGIN TRANSACTION
GO
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
CREATE TABLE dbo.Tmp_Templates
(
ID int NOT NULL IDENTITY (1, 1),
isHidden bit NULL,
FileName nvarchar(255) NOT NULL,
Name nvarchar(255) NOT NULL,
Description nvarchar(1024) NULL,
UploadedByTVDBUsersID int NOT NULL,
Created datetime NOT NULL
)
ALTER TABLE dbo.Tmp_Templates ADD PRIMARY KEY (ID)
END
GO
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
SET IDENTITY_INSERT dbo.Tmp_Templates ON
END
GO
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
IF EXISTS(SELECT * FROM dbo.Templates)
EXEC('INSERT INTO dbo.Tmp_Templates (ID, FileName, Name, Description, UploadedByTVDBUsersID, Created)
SELECT ID, FileName, Name, Description, UploadedByTVDBUsersID, Created FROM dbo.Templates WITH (HOLDLOCK TABLOCKX)')
END
GO
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
SET IDENTITY_INSERT dbo.Tmp_Templates OFF
END
GO
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
DROP TABLE dbo.Templates
END
GO
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
EXECUTE sp_rename N'dbo.Tmp_Templates', N'Templates', 'OBJECT'
PRINT N' Templates ADD isHidden'
END
GO
COMMIT
first GO statement split your query to
IF NOT EXISTS(SELECT * FROM SYS.COLUMNS WHERE Name=N'isHidden' AND Object_ID=Object_ID(N'Templates'))
BEGIN
BEGIN TRANSACTION
//error - END missing
And there is BEGIN keyword without END.
You need to remove GO statement.
UPDATE:
IF 1 = 1
BEGIN
SELECT * FROM someTable
GO
END
also generate Msg 102, Level 15, State 1, Line 3
Incorrect syntax near 'someTable'.
SqlServer won't allow you to use sp_rename inside the transaction since it could break things badly.
You can drop and add the table again, on in your case, you can also use a temporale table to do the query work, truncate the old table, and move rows from temp into Templates.
Sample temp table
CREATE TABLE #myTempTable
(
DummyField1 INT,
DummyField2 VARCHAR(20)
)
Reference
http://msdn.microsoft.com/en-us/library/ms188351.aspx
This isn't your standard "how do I find duplicates" question, I know how to do find duplicates, see below. This question is how do I update said records that also have child items with matching records?
Alright, I'm going to give you whole scenario so that you can work with this problem.
Duplicate records could be inserted as a result of critical system failure.
Finding later duplicates and marking the parent commission_import_commission_junction "is_processed = True" solves this problem.
The complication is that the commission_import_commission_junction and its children commission_import_commission_junction_line_items must be identical on the columns to compare.
the tables are:
commission_import_commission_junction
- id
- created_date
- some columns that are checked for duplication
- some columns that are not checked for duplication
commission_import_commission_junction_line_items
- id
- some columns that are checked for duplication
- some columns that are not checked for duplication
(For the full table spec, check out the CREATE TABLE statements in the bottom-most block of code.)
The query to mark duplicates on just the parent table commission_import_commission_junction:
UPDATE commission_import_commission_junction cicj
SET is_processed = TRUE
FROM (
SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
FROM commission_import_commission_junction inner_imports
JOIN commission_import_commission_junction_line_items inner_items ON inner_items.commission_import_commission_junction_id = inner_imports.commission_import_commission_junction_id
GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
HAVING (COUNT(*) > 1)
) AS dups
WHERE
-- MAIN TABLE COLUMNN LIST
(cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
-- OTHER TABLE COLUMN LIST
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
AND cicj.created_date <> dups.first_date
AND cicj.is_processed = FALSE;
Somewhere and somehow I need to check that the line_items are also duplicates.
THE CODE BELOW IS TO SETUP THE DATABASE, remember this is postgres specific.
-- "commission_import_build" is a record that keeps information about the process of collecting the commission information. Duplicate commission_import_commission_junction records will not exist with the same commission_import_build_id
-- "commission_import_commission_junction" is a record description commission information from a customers purchase.
-- "commission_import_commission_junction_line_items" are records describing items in that purchase.
DROP TABLE IF EXISTS commission_import_commission_junction_line_items;
DROP TABLE IF EXISTS commission_import_commission_junction;
DROP TABLE IF EXISTS commission_import_builds;
CREATE TABLE commission_import_builds
(
commission_import_build_id serial NOT NULL,
build_date timestamp with time zone NOT NULL,
CONSTRAINT pkey_commission_import_build_id PRIMARY KEY (commission_import_build_id),
CONSTRAINT commission_import_builds_build_date_key UNIQUE (build_date)
);
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (1, '2011-01-01');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (2, '2011-01-02');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (3, '2011-01-03');
CREATE TABLE commission_import_commission_junction
(
commission_import_commission_junction_id serial NOT NULL,
member_id integer,
site_id integer,
action_status character varying NOT NULL,
action_type character varying NOT NULL,
ad_id bigint,
commission_id bigint NOT NULL,
country character varying,
event_date timestamp with time zone NOT NULL,
locking_date timestamp with time zone,
order_id character varying NOT NULL,
original boolean,
original_action_id bigint NOT NULL,
posting_date timestamp with time zone NOT NULL,
website_id bigint NOT NULL,
advertiser_name character varying,
commission_amount numeric(19,2) NOT NULL,
sale_amount numeric(19,2) NOT NULL,
aggregator_affiliate_id integer NOT NULL,
is_processed boolean NOT NULL DEFAULT false,
created_date timestamp with time zone NOT NULL DEFAULT now(),
member_transaction_id integer,
commission_import_build_id integer NOT NULL,
CONSTRAINT pkey_commission_import_commission_junction_commission_import_co PRIMARY KEY (commission_import_commission_junction_id),
CONSTRAINT fk_commission_import_commission_junction_commission_import_buil FOREIGN KEY (commission_import_build_id)
REFERENCES commission_import_builds (commission_import_build_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
);
CREATE INDEX idx_commission_import_commission_junction_is_processed
ON commission_import_commission_junction
USING btree
(is_processed);
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(1, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 1, '2011-02-05');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(2, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 2, '2011-02-06');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(3, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 3, '2011-02-07');
SELECT * FROM commission_import_commission_junction;
CREATE TABLE commission_import_commission_junction_line_items
(
commission_import_commission_junction_line_item_id serial NOT NULL,
commission_import_commission_junction_id integer NOT NULL,
sku character varying,
quantity integer,
posting_date timestamp with time zone,
sale_amount numeric(19,2),
discount numeric(19,2),
CONSTRAINT pkey_commission_import_commission_junction_link_items_commissio PRIMARY KEY (commission_import_commission_junction_line_item_id),
CONSTRAINT fkey_commission_import_commission_junction_line_items_commissio FOREIGN KEY (commission_import_commission_junction_id)
REFERENCES commission_import_commission_junction (commission_import_commission_junction_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test3', 3, 32.50);
Reminds me of duplicate elimination in direct marketing mailing lists
Regardless of the details of your tables, a parent-child dupe elimination algorithm follows these steps:
1) Get duplicates into a list that matches old key to new key (temp table)
2) Update the foreign key in the child table
3) Delete the dupes from the parent
I admire the detail in your post, but I'm going to keep it simple and easier to read with some example table/column names:
-- step 1, get the list
-- Warning: t-sql syntax, adjust for Postgres
-- if it doesn't like placement of "into..." clause
select keep.primaryKey as keepKey
, dupe.primaryKey as dupeKey
into #DupeList
from (
select min(primaryKey) as primaryKey
, dupeCriteria1
, dupeCriteria2
FROM theTable
group by dupeCriteria1,dupeCritera2
having count(*) > 1
) keep
JOIN theTable dupe
ON keep.dupeCriteria1 = dupe.dupeCriteria1
AND keep.dupeCriteria2 = dupe.dupeCriteria2
AND keep.primaryKey <> dupe.primaryKey
Once you have that, update the foreign key in the child table:
update childTable
set foreignKey = #temp1.keepKey
from #temp1
where foreignKey = #temp1.dupeKey
Then just delete everything out of the parent table:
delete from parentTable
where primaryKey in (select dupeKey from #temp1)
CREATE FUNCTION removeCommissionImportCommissionJunctionDuplicates() RETURNS INT AS $BODY$ DECLARE duplicate RECORD; DECLARE parent RECORD; DECLARE children commission_import_commission_junction_line_items[]; DECLARE duplicate_children commission_import_commission_junction_line_items[]; DECLARE duplicate_child_count INT; DECLARE child commission_import_commission_junction_line_items; DECLARE duplicate_child commission_import_commission_junction_line_items; DECLARE num_updates INT; BEGIN
SELECT * FROM (SELECT 0) AS value INTO num_updates;
FOR duplicate IN
SELECT cicj.*, dups.first_date
FROM commission_import_commission_junction cicj
JOIN (SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
FROM commission_import_commission_junction inner_imports
GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
HAVING (COUNT(*) > 1)) AS dups
ON (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
WHERE cicj.created_date != dups.first_date
AND cicj.is_processed = FALSE
LOOP
--RAISE NOTICE 'Looping';
-- We need to collect the parent and children of the original record.
-- Get the parent of the original
SELECT *
FROM commission_import_commission_junction cicj
WHERE (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
(duplicate.member_id, duplicate.site_id, duplicate.action_status, duplicate.action_type, duplicate.ad_id, duplicate.commission_id, duplicate.country, duplicate.event_date, duplicate.locking_date, duplicate.order_id, duplicate.original, duplicate.original_action_id, duplicate.posting_date, duplicate.website_id, duplicate.advertiser_name, duplicate.commission_amount, duplicate.sale_amount, duplicate.aggregator_affiliate_id)
AND cicj.created_date = duplicate.first_date
INTO parent;
-- Get the children of the original
children := ARRAY(
SELECT cicjli
FROM commission_import_commission_junction_line_items cicjli
WHERE cicjli.commission_import_commission_junction_id
= parent.commission_import_commission_junction_id);
--RAISE NOTICE 'parent: %', parent;
--RAISE NOTICE 'children: %', children;
-- Now get the duplicates children
duplicate_children := ARRAY(
SELECT cicjli
FROM commission_import_commission_junction_line_items cicjli
WHERE cicjli.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id);
--RAISE NOTICE 'duplicate_children: %', duplicate_children;
-- Next, compare the children of the duplicate to the children of the original parent.
-- First compare size
IF array_upper(children, 1) = array_upper(duplicate_children, 1) THEN
--RAISE NOTICE 'Same number of children in duplicate as in parent';
-- Now compare each set
SELECT * FROM (SELECT 0) AS value INTO duplicate_child_count;
FOR child_index IN array_lower(children, 1) .. array_upper(children, 1) LOOP
child := children[child_index];
FOR duplicate_child_index IN array_lower(duplicate_children, 1) .. array_upper(duplicate_children, 1) LOOP
duplicate_child := duplicate_children[duplicate_child_index];
IF (child.sku, child.quantity, child.posting_date, child.sale_amount, child.discount) IS NOT DISTINCT FROM (duplicate_child.sku, duplicate_child.quantity, duplicate_child.posting_date, duplicate_child.sale_amount, duplicate_child.discount) THEN
SELECT * FROM (SELECT duplicate_child_count + 1) AS value INTO duplicate_child_count;
EXIT;
END IF;
END LOOP;
END LOOP;
--RAISE NOTICE 'Duplicate Child Count: %', duplicate_child_count;
-- If we have the same number of duplicates as there are records
IF duplicate_child_count = array_upper(duplicate_children, 1) THEN
-- Update the duplicate record as processed.
--RAISE NOTICE 'Marking duplicate % as is_processed', duplicate;
UPDATE commission_import_commission_junction cicj SET is_processed = TRUE WHERE cicj.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id;
SELECT * FROM (SELECT num_updates + 1) AS value INTO num_updates;
END IF;
END IF;
END LOOP;
--RAISE NOTICE 'Updates: %', num_updates;
RETURN num_updates; END; $BODY$ LANGUAGE plpgsql;