Postgres function error: column 't001' does not exist - postgresql

Here is my code:
-- Team --
DROP TABLE IF EXISTS team;
CREATE TABLE team (
tid char(4) not null,
name varchar(20),
city varchar(25),
headcoach varchar(12),
primary key(tid)
);
-- Players--
DROP TABLE IF EXISTS players;
CREATE TABLE players (
pid char(4) not null,
players varchar (9),
name varchar(15),
priceUSD numeric(8,0),
position varchar(2),
primary key(pid)
);
-- Equipment --
DROP TABLE IF EXISTS equipment;
CREATE TABLE equipment (
eid char(4) not null,
item varchar(18),
priceUSD numeric(10,0),
primary key(eid)
);
-- Team Equipment --
DROP TABLE IF EXISTS teamequipment;
CREATE TABLE teamequipment (
tid char(4) not null,
eid char(4),
qty integer,
priceUSD numeric(10,0),
primary key(tid)
);
-- Team--
INSERT INTO team( tid, name, city, headcoach )
VALUES('t001', 'Giants', 'New York', 'Coughlin');
INSERT INTO team( tid, name, city, headcoach )
VALUES('t002', 'Jets', 'New York', 'Ryan');
INSERT INTO team( tid, name, city, headcoach )
VALUES('t003', 'Patriots', 'Boston', 'Belichick');
INSERT INTO team( tid, name, city, headcoach )
VALUES('t004', 'Steelers', 'Pittsburgh', 'Tomlin');
INSERT INTO team( tid, name, city, headcoach )
VALUES('t005', 'Dolphins', 'Miami', 'Philbin');
INSERT INTO team( tid, name, city, headcoach )
VALUES('t006', 'Panthers', 'Charlotte', 'Rivera');
-- Players--
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p001', 'Manning', 'Giants', 10000000.00, 'QB' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p002', 'Cruz', 'Giants', 5000000.00, 'WR' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p003', 'Smith', 'Jets', 3000000.00, 'QB' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p004', 'Pace', 'Jets', 6000000.00, 'LB' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p005', 'Brady', 'Patriots', 12000000.00, 'QB' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p006', 'Ridley', 'Patriots', 5000000.00, 'RB' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p007', 'Polamalu', 'Steelers', 7000000.00, 'SS' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p008', 'Miller', 'Steelers', 4000000.00, 'TE' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p009', 'Pouncey', 'Dolphins', 6000000.00, 'C' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p010', 'Wake', 'Dolphins', 11000000.00, 'DE' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p011', 'Newton', 'Panthers', 9000000.00, 'QB' );
INSERT INTO players( pid, players, name, priceUSD, position )
VALUES('p012', 'Hardy', 'Panthers', 13000000.00, 'DE' );
-- Equipment
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e01', 'shoulder pads', 225.00 );
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e02', 'gloves', 40.00 );
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e03', 'cleats', 115.00 );
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e04', 'helmets', 300.00 );
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e05', 'socks', 10.00 );
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e06', 'knee pads', 6.00 );
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e07', 'jerseys', 150.00);
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e08', 'visors', 50.00 );
INSERT INTO equipment ( eid, item, priceUSD)
VALUES('e09', 'pants', 30.00 );
-- Team Equipment
INSERT INTO teamequipment ( tid, eid, qty, priceUSD)
VALUES(t001, e01, 25, 5625.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t001, e04, 33, 9900.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t001, e08, 15, 750.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t002, e01, 37, 8325.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t002, e02, 45, 1800.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t002, e04, 20, 6000.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t002, e07, 55, 8250.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t003, e01, 10, 2250.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t003, e03, 25, 2875.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t004, e05, 50, 500.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t004, e07, 55, 8250.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t004, e09, 11, 330.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t005, e01, 22, 4950.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t005, e06, 40, 240.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t005, e08, 20, 1000.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t006, e02, 13, 520.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t006, e05, 15, 150.00);
INSERT INTO teamequipment( tid, eid, qty, priceUSD)
VALUES(t006, e08, 3, 150.00);
The error I get is:
****** Error ******
ERROR: column "t001" does not exist
SQL state: 42703
Character: 3637
How do I fix this?

Character Varying OR char values must be in single quote.
Change your query like below:
INSERT INTO teamequipment( tid, eid, qty, priceUSD) VALUES('t001', 'e04', 33, 9900.00);

When inserting use quotes with t00x eg 't001'. You have declared the column as char(4).

Related

Postgres CTE Insert and get count

Here's a CTE query. After the insert, I want to get the updated count. The insert happens fine but the count returns the count before the INSERT and does not include the new row. Can you please let me know if I am doing something wrong here?
WITH reply_data(id, threadid, commentid, userid, description, created, updated) AS (
VALUES ('27c12e17-b105-48fd-897b-82e5965ab15a'::uuid,
'bbe04e77-0e53-4716-b001-81e7dbf40d70'::uuid,
'fd2513fb-5e92-4a40-a295-6c122c325166'::uuid,
'5b3a6120-233e-4b77-9160-c08c484db31b'::uuid,
'Manual Reply to comment from SQL',
now(),
now())
),
reply_insert AS (
INSERT INTO replies (id, threadid, commentid, userid, description, created, updated)
SELECT rd.id, rd.threadid, rd.commentid, rd.userid, rd.description, rd.created, rd.updated
FROM reply_data rd
RETURNING id, commentid
),
user_reply_insert as (
INSERT INTO user_replies (userid, replyid)
SELECT rd.userid, rd.id FROM reply_data rd
RETURNING userid
),
replyCount as (
select count(*) as repliescount
from replies r,
reply_data rd
where r.commentid = rd.commentid
)
SELECT repliescount FROM replyCount;
According to Postgres document all sub-statements of a query with CTEs happen virtually at the same time. I.e., they are based on the same snapshot of the database.
You would need two statements (in a single transaction) for what you are trying to do OR calculate with total data when inserted in CTE:
WITH reply_data(id, threadid, commentid, userid, description, created, updated) AS (
VALUES ('27c12e17-b105-48fd-897b-82e5965ab15a'::uuid,
'bbe04e77-0e53-4716-b001-81e7dbf40d70'::uuid,
'fd2513fb-5e92-4a40-a295-6c122c325166'::uuid,
'5b3a6120-233e-4b77-9160-c08c484db31b'::uuid,
'Manual Reply to comment from SQL',
now(),
now())
),
reply_insert AS (
INSERT INTO replies (id, threadid, commentid, userid, description, created, updated)
SELECT rd.id, rd.threadid, rd.commentid, rd.userid, rd.description, rd.created, rd.updated
FROM reply_data rd
RETURNING id, commentid
),
user_reply_insert as (
INSERT INTO user_replies (userid, replyid)
SELECT rd.userid, rd.id FROM reply_data rd
RETURNING userid
),
replyCount as (
select count(*) + (select count(*) from reply_insert) as repliescount
from replies r,
reply_data rd
where r.commentid = rd.commentid
)
SELECT repliescount FROM replyCount;

PGSQL CTE recursive INSERT RETURNING autoincrement

What I have:
CREATE TABLE public.treeview_menu_node (
id int8 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
parent_id int8 NULL,
data jsonb NULL,
name varchar NULL,
caption varchar NULL,
CONSTRAINT treeview_menu_node_pk PRIMARY KEY (id)
);
INSERT INTO public.treeview_menu_node
(parent_id, "name")
VALUES(NULL, 'node 1');
INSERT INTO public.treeview_menu_node
(parent_id, "name")
VALUES(1, 'node 1.1');
INSERT INTO public.treeview_menu_node
(parent_id, "name")
VALUES(1, 'node 1.2');
INSERT INTO public.treeview_menu_node
(parent_id, "name")
VALUES(NULL, 'node 2');
INSERT INTO public.treeview_menu_node
(parent_id, "name")
VALUES(4, 'node 2.1');
INSERT INTO public.treeview_menu_node
(parent_id, "name")
VALUES(4, 'node 2.2');
Structure:
node 1
sub node 1.1
sub node 1.2
node 2
sub node 2.1
sub node 2.2
What I need:
Copy node 1 into node 2 recursively
node 1
sub node 1.1
sub node 1.2
node 2
sub node 2.1
sub node 2.2
NODE 1
SUB NODE 1.1
SUB NODE 1.2
What I try:
WITH RECURSIVE r AS (
INSERT INTO public.treeview_menu_node (parent_id, name, caption, data)
SELECT new_parent_id, name, caption, data
FROM (
SELECT tmn.id, tmn.parent_id, :parent_id::BIGINT new_parent_id, tmn.name, tmn.caption, tmn.data
FROM public.treeview_menu_node tmn
WHERE id IN (:ids)
) t
RETURNING id, parent_id, name, caption, data
UNION ALL
INSERT INTO public.treeview_menu_node (parent_id, name, caption, data)
SELECT new_parent_id, name, caption, data
FROM (
SELECT tmn.id, tmn.parent_id, r.id new_parent_id, tmn.name, tmn.caption, tmn.data
FROM public.treeview_menu_node tmn
JOIN r r ON r.id = tmn.parent_id
) t
RETURNING id, parent_id, name, caption, data
)
SELECT id, parent_id, name, caption, data
FROM r;
Where:
:parent_id is destination node id
:ids is a list or one node to copy
What I get:
SQL Error [42601]: syntax error (Near: "UNION") Position: 357
http://sqlfiddle.com/#!17/1e6fa/3
I found a solution.
Multiple inserts in recursive CTE are not allowed. Instead of this, use the function:
NEXTVAL('table_sequensor_of_autoincrement')
The function returns a new id, like an INSERT command. Thanks to this, you can prepare the entire array for insertion using a single INSERT command.
WITH RECURSIVE r AS (
SELECT tmn.id, NEXTVAL('treeview_menu_id_seq') new_id, tmn.parent_id, :parent_id::BIGINT new_parent_id, tmn.name, tmn.caption, tmn.data
FROM public.treeview_menu_node tmn
WHERE id IN (:ids)
UNION ALL
SELECT tmn.id, NEXTVAL('treeview_menu_id_seq') new_id, tmn.parent_id, r.new_id new_parent_id, tmn.name, tmn.caption, tmn.data
FROM public.treeview_menu_node tmn
JOIN r r ON r.id = tmn.parent_id
)
INSERT INTO public.treeview_menu_node (id, parent_id, name, caption, data)
SELECT new_id, new_parent_id, name, caption, data
FROM r;
http://sqlfiddle.com/#!17/1e6fa/10

Postgres Dynamic Query

I have scenario were I have a master table which stores db table name and column name, I need to build dynamic query based on that.
CREATE TABLE MasterTable
(
Id int primary key,
caption varchar(100),
dbcolumnname varchar(100),
dbtablename varchar(100)
);
CREATE TABLE Engineers
(
Id int primary key,
Name varchar(100),
Salary BigInt
);
CREATE TABLE Executives
(
Id int primary key,
Name varchar(100),
Salary BigInt
);
CREATE TABLE Manager
(
Id int primary key,
Name varchar(100),
Salary BigInt
);
INSERT INTO Manager(Id, Name, Salary)
VALUES(1, 'Manager 1', 6000000);
INSERT INTO Executives(Id, Name, Salary)
VALUES(1, 'Executive 1', 6000000);
INSERT INTO Engineers(Id, Name, Salary)
VALUES(1, 'Engineer 1', 6000000);
INSERT INTO MasterTable(Id, caption, dbcolumnname, dbtablename)
VALUES (1, 'Name', 'name', 'Engineers');
INSERT INTO MasterTable(Id, caption, dbcolumnname, dbtablename)
VALUES (2, 'Name', 'name', 'Manager');
INSERT INTO MasterTable(Id, caption, dbcolumnname, dbtablename)
VALUES (3, 'Name', 'name', 'Executives');
INSERT INTO MasterTable(Id, caption, dbcolumnname, dbtablename)
VALUES (4, 'Salary', 'Salary', 'Engineers');
INSERT INTO MasterTable(Id, caption, dbcolumnname, dbtablename)
VALUES (5, 'Salary', 'Salary', 'Manager');
INSERT INTO MasterTable(Id, caption, dbcolumnname, dbtablename)
VALUES (6, 'Salary', 'Salary', 'Executives');
I want to build a stored procedure which accepts caption and Id and give result back based on dbcolumnname and dbtablename. For example if I pass Salary,Name as caption and Id as 1, stored procedure should be query of dbcolumn and dbtable, something like below.
Select Id as ID, name as Value from Engineers
UNION
Select Id as ID, name as Value from Manager
UNION
Select Id as ID, name as Value from Executives
UNION
Select Id as ID, Salary as Value from Executives
UNION
Select Id as ID, Salary as Value from Engineers
UNION
Select Id as ID, Salary as Value from Manager
I have heard of dynamic sql, can that be used here?
Fiddle
EDIT :: I got one dynamic query which builds union statement to get the output, however problem is i am not able to escape double quotes. Below is the query and Error
Query :
DO
$BODY$
BEGIN
EXECUTE string_agg(
format('SELECT %I FROM %I', dbcolumnname, dbtablename),
' UNION ')
FROM MasterTable;
END;
$BODY$;
Error:
ERROR: relation "Engineers" does not exist
LINE 1: SELECT name FROM "Engineers" UNION SELECT name FROM "Manager...
I'd like to suggest an alternative way of achieving what you want. That is, using PostgreSQL inheritance mechanism.
For instance:
CREATE TABLE ParentTable (
Id int,
Name varchar(100),
Salary BigInt
);
ALTER TABLE Engineers INHERIT ParentTable;
ALTER TABLE Executives INHERIT ParentTable;
ALTER TABLE Manager INHERIT ParentTable;
SELECT Id, Salary AS value FROM ParentTable
UNION
SELECT Id, Name AS value FROM ParentTable;
Now if you want to use MasterTable in order to restrict the set of tables used, you can do it as follows:
SELECT Id, Name AS value
FROM ParentTable
INNER JOIN pg_class ON parenttable.tableoid = pg_class.oid
INNER JOIN MasterTable ON LOWER(dbtablename) = LOWER(relname)
UNION
SELECT Id, Salary AS value
FROM ParentTable
INNER JOIN pg_class ON parenttable.tableoid = pg_class.oid
INNER JOIN MasterTable ON LOWER(dbtablename) = LOWER(relname)
However, you can not arbitrarily restrict the set of columns to retrieve from one table to another with this technique.
Table names and column names are case insensitive in SQL, unless they are quoted in double quotes. Postgres does this by folding unquoted identifiers to lower case.
So, your DDL:
CREATE TABLE MasterTable
(
Id int primary key,
caption varchar(100),
dbcolumnname varchar(100),
dbtablename varchar(100)
);
Will be interpreted by Postgres as
CREATE TABLE mastertable
(
id int primary key,
caption varchar(100),
dbcolumnname varchar(100),
dbtablename varchar(100)
);
You can avoid case folding by quoting the names:
CREATE TABLE "MasterTable"
(
"Id" int primary key,
caption varchar(100),
dbcolumnname varchar(100),
dbtablename varchar(100)
);
The %I format-specifier (internally uses quote_ident()) adds quotes to its argument (when needed)
, so the query asks for "MasterTable" when only mastertable is present in the schema.
But, it is easyer to just avoid MixedCase identifiers,

TSQL Case WHEN LIKE REPLACE

Newbie question... looking for the fastest way to update a new column based on the existence of a value from another table, while replacing values.
Example, below, taking the words 'Bought a car' with 'car' into another table. The problem is 'Bought a car' is into another table.
I did a hack to reselect the value and do a replace, but with more rows, the performance is horrible, taking up to 3 to 5 minutes to perform.
Oh SQL Gurus, what is the best way to do this?
Example
DECLARE #Staging_Table TABLE
(
ACCTID INT IDENTITY(1,1),
NAME VARCHAR(50),
PURCHASES VARCHAR(255)
)
INSERT INTO #Staging_Table (Name, Purchases)
VALUES ('John','Bought a table')
INSERT INTO #Staging_Table (Name, Purchases)
VALUES ('Jack','Sold a car')
INSERT INTO #Staging_Table (Name, Purchases)
VALUES ('Mary','Returned a chair')
DECLARE #HISTORY TABLE
(
ACCTID INT IDENTITY(1,1),
NAME VARCHAR(50),
Item VARCHAR(255)
)
INSERT INTO #HISTORY (Name, Item)
VALUES ('John','')
INSERT INTO #HISTORY (Name, Item)
VALUES ('Jack','')
INSERT INTO #HISTORY (Name, Item)
VALUES ('Mary','')
UPDATE #HISTORY
Set ITEM = CASE WHEN EXISTS(
Select ts.Purchases as Output from #Staging_Table ts
where ts.NAME = Name AND ts.PURCHASES LIKE '%table%')
THEN REPLACE((Select ts2.PURCHASES Output
from #Staging_Table ts2 where ts2.NAME = Name AND ts2.PURCHASES LIKE '%table%'),'Bought a ','')
WHEN EXISTS(
Select ts.Purchases as Output from #Staging_Table ts
where ts.NAME = Name AND ts.PURCHASES LIKE '%car%')
THEN REPLACE((Select ts2.PURCHASES Output
from #Staging_Table ts2 where ts2.NAME = Name AND ts2.PURCHASES LIKE '%car%'),'Bought a ','')
End
SELECT * FROM #HISTORY
DECLARE #Staging_Table TABLE
(
ACCTID INT IDENTITY(1, 1) ,
NAME VARCHAR(50) ,
PURCHASES VARCHAR(255)
)
INSERT INTO #Staging_Table
( Name, Purchases )
VALUES ( 'John', 'Bought a table' ),
( 'Jack', 'Sold a car' ),
( 'Mary', 'Returned a chair' )
DECLARE #HISTORY TABLE
(
ACCTID INT IDENTITY(1, 1) ,
NAME VARCHAR(50) ,
Item VARCHAR(255)
)
INSERT INTO #HISTORY
( Name, Item )
VALUES ( 'John', '' ),
( 'Jack', '' ),
( 'Mary', '' )
UPDATE L
SET L.ITEM = ( CASE WHEN R.PURCHASES LIKE '%table%'
THEN REPLACE(R.PURCHASES, 'Bought a ', '')
WHEN R.PURCHASES LIKE '%car%'
THEN REPLACE(R.PURCHASES, 'Sold a ', '')
END )
FROM #HISTORY AS L
JOIN #Staging_Table AS R ON L.NAME = R.NAME
WHERE ( R.PURCHASES LIKE '%table%'
OR R.PURCHASES LIKE '%car%'
)
SELECT *
FROM #HISTORY

Updating duplicates from one-to-many relationships.

This isn't your standard "how do I find duplicates" question, I know how to do find duplicates, see below. This question is how do I update said records that also have child items with matching records?
Alright, I'm going to give you whole scenario so that you can work with this problem.
Duplicate records could be inserted as a result of critical system failure.
Finding later duplicates and marking the parent commission_import_commission_junction "is_processed = True" solves this problem.
The complication is that the commission_import_commission_junction and its children commission_import_commission_junction_line_items must be identical on the columns to compare.
the tables are:
commission_import_commission_junction
- id
- created_date
- some columns that are checked for duplication
- some columns that are not checked for duplication
commission_import_commission_junction_line_items
- id
- some columns that are checked for duplication
- some columns that are not checked for duplication
(For the full table spec, check out the CREATE TABLE statements in the bottom-most block of code.)
The query to mark duplicates on just the parent table commission_import_commission_junction:
UPDATE commission_import_commission_junction cicj
SET is_processed = TRUE
FROM (
SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
FROM commission_import_commission_junction inner_imports
JOIN commission_import_commission_junction_line_items inner_items ON inner_items.commission_import_commission_junction_id = inner_imports.commission_import_commission_junction_id
GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
HAVING (COUNT(*) > 1)
) AS dups
WHERE
-- MAIN TABLE COLUMNN LIST
(cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
-- OTHER TABLE COLUMN LIST
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
AND cicj.created_date <> dups.first_date
AND cicj.is_processed = FALSE;
Somewhere and somehow I need to check that the line_items are also duplicates.
THE CODE BELOW IS TO SETUP THE DATABASE, remember this is postgres specific.
-- "commission_import_build" is a record that keeps information about the process of collecting the commission information. Duplicate commission_import_commission_junction records will not exist with the same commission_import_build_id
-- "commission_import_commission_junction" is a record description commission information from a customers purchase.
-- "commission_import_commission_junction_line_items" are records describing items in that purchase.
DROP TABLE IF EXISTS commission_import_commission_junction_line_items;
DROP TABLE IF EXISTS commission_import_commission_junction;
DROP TABLE IF EXISTS commission_import_builds;
CREATE TABLE commission_import_builds
(
commission_import_build_id serial NOT NULL,
build_date timestamp with time zone NOT NULL,
CONSTRAINT pkey_commission_import_build_id PRIMARY KEY (commission_import_build_id),
CONSTRAINT commission_import_builds_build_date_key UNIQUE (build_date)
);
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (1, '2011-01-01');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (2, '2011-01-02');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (3, '2011-01-03');
CREATE TABLE commission_import_commission_junction
(
commission_import_commission_junction_id serial NOT NULL,
member_id integer,
site_id integer,
action_status character varying NOT NULL,
action_type character varying NOT NULL,
ad_id bigint,
commission_id bigint NOT NULL,
country character varying,
event_date timestamp with time zone NOT NULL,
locking_date timestamp with time zone,
order_id character varying NOT NULL,
original boolean,
original_action_id bigint NOT NULL,
posting_date timestamp with time zone NOT NULL,
website_id bigint NOT NULL,
advertiser_name character varying,
commission_amount numeric(19,2) NOT NULL,
sale_amount numeric(19,2) NOT NULL,
aggregator_affiliate_id integer NOT NULL,
is_processed boolean NOT NULL DEFAULT false,
created_date timestamp with time zone NOT NULL DEFAULT now(),
member_transaction_id integer,
commission_import_build_id integer NOT NULL,
CONSTRAINT pkey_commission_import_commission_junction_commission_import_co PRIMARY KEY (commission_import_commission_junction_id),
CONSTRAINT fk_commission_import_commission_junction_commission_import_buil FOREIGN KEY (commission_import_build_id)
REFERENCES commission_import_builds (commission_import_build_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
);
CREATE INDEX idx_commission_import_commission_junction_is_processed
ON commission_import_commission_junction
USING btree
(is_processed);
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(1, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 1, '2011-02-05');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(2, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 2, '2011-02-06');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(3, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 3, '2011-02-07');
SELECT * FROM commission_import_commission_junction;
CREATE TABLE commission_import_commission_junction_line_items
(
commission_import_commission_junction_line_item_id serial NOT NULL,
commission_import_commission_junction_id integer NOT NULL,
sku character varying,
quantity integer,
posting_date timestamp with time zone,
sale_amount numeric(19,2),
discount numeric(19,2),
CONSTRAINT pkey_commission_import_commission_junction_link_items_commissio PRIMARY KEY (commission_import_commission_junction_line_item_id),
CONSTRAINT fkey_commission_import_commission_junction_line_items_commissio FOREIGN KEY (commission_import_commission_junction_id)
REFERENCES commission_import_commission_junction (commission_import_commission_junction_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test3', 3, 32.50);
Reminds me of duplicate elimination in direct marketing mailing lists
Regardless of the details of your tables, a parent-child dupe elimination algorithm follows these steps:
1) Get duplicates into a list that matches old key to new key (temp table)
2) Update the foreign key in the child table
3) Delete the dupes from the parent
I admire the detail in your post, but I'm going to keep it simple and easier to read with some example table/column names:
-- step 1, get the list
-- Warning: t-sql syntax, adjust for Postgres
-- if it doesn't like placement of "into..." clause
select keep.primaryKey as keepKey
, dupe.primaryKey as dupeKey
into #DupeList
from (
select min(primaryKey) as primaryKey
, dupeCriteria1
, dupeCriteria2
FROM theTable
group by dupeCriteria1,dupeCritera2
having count(*) > 1
) keep
JOIN theTable dupe
ON keep.dupeCriteria1 = dupe.dupeCriteria1
AND keep.dupeCriteria2 = dupe.dupeCriteria2
AND keep.primaryKey <> dupe.primaryKey
Once you have that, update the foreign key in the child table:
update childTable
set foreignKey = #temp1.keepKey
from #temp1
where foreignKey = #temp1.dupeKey
Then just delete everything out of the parent table:
delete from parentTable
where primaryKey in (select dupeKey from #temp1)
CREATE FUNCTION removeCommissionImportCommissionJunctionDuplicates() RETURNS INT AS $BODY$ DECLARE duplicate RECORD; DECLARE parent RECORD; DECLARE children commission_import_commission_junction_line_items[]; DECLARE duplicate_children commission_import_commission_junction_line_items[]; DECLARE duplicate_child_count INT; DECLARE child commission_import_commission_junction_line_items; DECLARE duplicate_child commission_import_commission_junction_line_items; DECLARE num_updates INT; BEGIN
SELECT * FROM (SELECT 0) AS value INTO num_updates;
FOR duplicate IN
SELECT cicj.*, dups.first_date
FROM commission_import_commission_junction cicj
JOIN (SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
FROM commission_import_commission_junction inner_imports
GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
HAVING (COUNT(*) > 1)) AS dups
ON (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
WHERE cicj.created_date != dups.first_date
AND cicj.is_processed = FALSE
LOOP
--RAISE NOTICE 'Looping';
-- We need to collect the parent and children of the original record.
-- Get the parent of the original
SELECT *
FROM commission_import_commission_junction cicj
WHERE (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
(duplicate.member_id, duplicate.site_id, duplicate.action_status, duplicate.action_type, duplicate.ad_id, duplicate.commission_id, duplicate.country, duplicate.event_date, duplicate.locking_date, duplicate.order_id, duplicate.original, duplicate.original_action_id, duplicate.posting_date, duplicate.website_id, duplicate.advertiser_name, duplicate.commission_amount, duplicate.sale_amount, duplicate.aggregator_affiliate_id)
AND cicj.created_date = duplicate.first_date
INTO parent;
-- Get the children of the original
children := ARRAY(
SELECT cicjli
FROM commission_import_commission_junction_line_items cicjli
WHERE cicjli.commission_import_commission_junction_id
= parent.commission_import_commission_junction_id);
--RAISE NOTICE 'parent: %', parent;
--RAISE NOTICE 'children: %', children;
-- Now get the duplicates children
duplicate_children := ARRAY(
SELECT cicjli
FROM commission_import_commission_junction_line_items cicjli
WHERE cicjli.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id);
--RAISE NOTICE 'duplicate_children: %', duplicate_children;
-- Next, compare the children of the duplicate to the children of the original parent.
-- First compare size
IF array_upper(children, 1) = array_upper(duplicate_children, 1) THEN
--RAISE NOTICE 'Same number of children in duplicate as in parent';
-- Now compare each set
SELECT * FROM (SELECT 0) AS value INTO duplicate_child_count;
FOR child_index IN array_lower(children, 1) .. array_upper(children, 1) LOOP
child := children[child_index];
FOR duplicate_child_index IN array_lower(duplicate_children, 1) .. array_upper(duplicate_children, 1) LOOP
duplicate_child := duplicate_children[duplicate_child_index];
IF (child.sku, child.quantity, child.posting_date, child.sale_amount, child.discount) IS NOT DISTINCT FROM (duplicate_child.sku, duplicate_child.quantity, duplicate_child.posting_date, duplicate_child.sale_amount, duplicate_child.discount) THEN
SELECT * FROM (SELECT duplicate_child_count + 1) AS value INTO duplicate_child_count;
EXIT;
END IF;
END LOOP;
END LOOP;
--RAISE NOTICE 'Duplicate Child Count: %', duplicate_child_count;
-- If we have the same number of duplicates as there are records
IF duplicate_child_count = array_upper(duplicate_children, 1) THEN
-- Update the duplicate record as processed.
--RAISE NOTICE 'Marking duplicate % as is_processed', duplicate;
UPDATE commission_import_commission_junction cicj SET is_processed = TRUE WHERE cicj.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id;
SELECT * FROM (SELECT num_updates + 1) AS value INTO num_updates;
END IF;
END IF;
END LOOP;
--RAISE NOTICE 'Updates: %', num_updates;
RETURN num_updates; END; $BODY$ LANGUAGE plpgsql;