postgresql: INSERT INTO statement with select query - postgresql

I have to insert some data in my table of existing client_id column , so i am using select with insert
INSERT into 'my_table' (column1, client_id, column3) VALUES (val1,select distinct client_id from 'my_table', val3)
I need client_id from the same table my_table and i need client_ids in insert statement.
SELECT DISTINCT client_id FROM my_table gives me 113 client_id so i want to insert some row for each 113 client using the above approach.
I did this query
INSERT INTO client_notification_preferences (client_id, object_type , frequency,created_at,updated_at) SELECT DISTINCT client_id, 'ClientShipment',1, CURRENT_TIMESTAMP , CURRENT_TIMESTAMP FROM client_notification_preferences;
but this gives me this error
create_table "client_notification_preferences", id: :uuid, default: "uuid_generate_v4()", force: :cascade do |t|
t.uuid "client_id"
t.string "object_type"
t.integer "frequency"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

if val1 and val3 are variables and client_id is a field from my_table you can use from below code.
INSERT into 'my_table' (column1, client_id, column3) select distinct val1,
client_id,val3 from 'my_table'
for example
INSERT into 'my_table' (column1, client_id, column3) select distinct 1, client_id,2 from 'my_table'

Related

plpgsql one input get all the related fields/columns to json failed

Try to answer this question then found out, I cannot solve it.
Basic idea: propagate twice, from country_id propagate to state_id, state_id propagate to city_id twice.Then the country_id need be joined twice. when we do array_agg on state level, we need explicitly join country_id, during city level we also need using join country_id.
Reference link: https://github.com/hettie-d/NORM/tree/master/sql
Basic idea, input one country_id, all the relevant country, state, city level information will be transformed to json format.
Prepare. I use country_id, state_id, city_id, since they are more descriptive.
begin;
create table public.country(country_id bigint primary key , name text, leader text);
create table public.states(state_id bigint primary key, name text, population bigint,country_id bigint REFERENCES public.country (country_id));
create table public.cities(city_id bigint,name text,state_id bigint REFERENCES public.states (state_id));
insert into public.country values ( 1, 'India', 'Narendra Modi');
insert into public.country values ( 2 , 'USA', 'Joe Biden');
insert into public.country values ( 3 , 'Australia', 'Scott Morrison');
insert into public.states values( 1 ,'California' , 39500000 , 2);
insert into public.states values( 2 , 'Washington' , 7610000 ,2 );
insert into public.states values( 4 , 'Karnataka' , 64100000,1);
insert into public.states values( 5 , 'Rajasthan' , 68900000,1 );
insert into public.states values( 6 , 'Maharashtra' , 125700000,1 );
insert into public.cities values( 1 , 'Mumbai' , 6 );
insert into public.cities values( 2 , 'Pune' , 6 );
insert into public.cities values( 3 , 'San Francisco' , 1 );
commit;
--- create composite types.
begin;
create type city_record as(city_name text);
create type state_record as (state_name text, population bigint,cities city_record[]);
create type country_record as (country_name text, leader text, states state_record[]);
commit;
array transport
create or replace
function array_transport (all_items anyarray) returns setof text
returns null on null input
language plpgsql as
$body$
declare
item record;
begin
foreach item in array all_items
loop
return next(to_json(item)::text);
end loop;
end;
$body$;
--the main function country_select_json
create or replace function country_select_json (_country_id bigint)
returns country_record[]
as
$$
declare
_result text;
begin
select array_agg(single_item)
from (select
array_agg(row(
co.name,
co.leader,
(select array_agg(row
(s.name,
s.population,
(select array_agg
(row
(c.name)::city_record)
from cities c
join states s using (state_id)
where s.country_id = co.country_id)
)::state_record) from states s where s.country_id = co.country_id
)
)::country_record)
as single_item
from country co
where co.country_id = _country_id)y into _result;
-- raise info 'state_record test: %', _result;
return (_result);
end
$$ language plpgsql;
run
select * from array_transport(country_select_json(1));
{"country_name":"India","leader":"Narendra Modi","states":[{"state_name":"Karnataka","population":64100000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]},{"state_name":"Rajasthan","population":68900000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]},{"state_name":"Maharashtra","population":125700000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]}]}
(1 row)
country level, state level ok, but the city level is wrong. How to solve this problem.
Expected Result:
{"country_name":"India","leader":"Narendra Modi","states":[{"state_name":"Karnataka","population":64100000,"cities":[NULL]},{"state_name":"Rajasthan","population":68900000,"cities":[NULL]},{"state_name":"Maharashtra","population":125700000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]}]}
update 2022-03-04.
(select array_agg
(c.name) as city_name
from cities c
join states s using (state_id)
where s.country_id = co.country_id)
Now I know the problem: because the propagate is first from city, then to state then country. Once function input the country_id then all the country related city name will be pulled together.
Does this query answer your needs ? (Result here)
with ci as (select cities.state_id,jsonb_agg(jsonb_build_object('city_name',cities.name)) as cities from cities group by state_id)
select jsonb_pretty(jsonb_build_object(
'country_name',c.name,
'leader',c.leader,
'states', jsonb_agg(jsonb_build_object(
'state_name',s.name,
'population',s.population,
'cities',ci.cities
))
))
from country c left join states s on s.country_id = c.country_id
left join ci on ci.state_id = s.state_id
where c.country_id = 1
group by c.name,c.leader
// Result
{
"country_name": "India",
"leader": "Narendra Modi",
"states": [
{
"state_name": "Maharashtra",
"population": 125700000,
"cities": [
{
"city_name": "Mumbai"
},
{
"city_name": "Pune"
}
]
},
{
"state_name": "Rajasthan",
"population": 68900000,
"cities": null
},
{
"state_name": "Karnataka",
"population": 64100000,
"cities": null
}
]

How can I filter column I will get with information _schemas.column in postgreSQL

I have this request :
INSERT INTO colonnes ("colonnesName") SELECT "column_name" FROM information_schema.columns WHERE "table_name" = tableName;
but I would like only to get the columns which can be set NULL ?
Check the value of the IS_NULLABLE column for YES or NO:
INSERT INTO colonnes (colonnesName)
SELECT column_name
FROM information_schema.columns
WHERE table_name = 'tableName' AND is_nullable = 'YES'; -- for nullable

How to insert a single row in the parent table and then multiple rows in the child table in single SQL in PostgreSQL?

Please, find below my schema:
CREATE TABLE reps (
id SERIAL PRIMARY KEY,
rep TEXT NOT NULL UNIQUE
);
CREATE TABLE terms (
id SERIAL PRIMARY KEY,
terms TEXT NOT NULL UNIQUE
);
CREATE TABLE shipVia (
id SERIAL PRIMARY KEY,
ship_via TEXT NOT NULL UNIQUE
);
CREATE TABLE invoices (
id SERIAL PRIMARY KEY,
customer TEXT NOT NULL CONSTRAINT customerNotEmpty CHECK(customer <> ''),
term_id INT REFERENCES terms,
rep_id INT NOT NULL REFERENCES reps,
ship_via_id INT REFERENCES shipVia,
...
item_count INT NOT NULL,
modified TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
version INT NOT NULL DEFAULT 0
);
CREATE TABLE invoiceItems (
id SERIAL PRIMARY KEY,
invoice_id INT NOT NULL REFERENCES invoices ON DELETE CASCADE,
name TEXT NOT NULL CONSTRAINT nameNotEmpty CHECK(name <> ''),
description TEXT,
qty INT NOT NULL CONSTRAINT validQty CHECK (qty > 0),
price DOUBLE PRECISION NOT NULL
);
I am trying to insert an invoice along with its invoice items in one SQL using writable CTE. I am currently stuck with the following SQL statement:
WITH new_invoice AS (
INSERT INTO invoices (id, customer, term_id, ship_via_id, rep_id, ..., item_count)
SELECT $1, $2, t.id, s.id, r.id, ..., $26
FROM reps r
JOIN terms t ON t.terms = $3
JOIN shipVia s ON s.ship_via = $4
WHERE r.rep = $5
RETURNING id
) INSERT INTO invoiceItems (invoice_id, name, qty, price, description) VALUES
(new_invoice.id,$27,$28,$29,$30)
,(new_invoice.id,$31,$32,$33,$34)
,(new_invoice.id,$35,$36,$37,$38);
Of course, this SQL is wrong, here is what PostgreSQL 9.2 has to say about it:
ERROR: missing FROM-clause entry for table "new_invoice"
LINE 13: (new_invoice.id,$27,$28,$29,$30)
^
********** Error **********
ERROR: missing FROM-clause entry for table "new_invoice"
SQL state: 42P01
Character: 704
Is it possible at all?
EDIT 1
I am trying the following version:
PREPARE insert_invoice_3 AS WITH
new_invoice AS (
INSERT INTO invoices (id, customer, term_id, ship_via_id, rep_id, ..., item_count)
SELECT $1, $2, t.id, s.id, r.id, ..., $26
FROM reps r
JOIN terms t ON t.terms = $3
JOIN shipVia s ON s.ship_via = $4
WHERE r.rep = $5
RETURNING id
),
v (name, qty, price, description) AS (
VALUES ($27,$28,$29,$30)
,($31,$32,$33,$34)
,($35,$36,$37,$38)
)
INSERT INTO invoiceItems (invoice_id, name, qty, price, description)
SELECT new_invoice.id, v.name, v.qty, v.price, v.description
FROM v, new_invoice;
And here is what I get in return:
ERROR: column "qty" is of type integer but expression is of type text
LINE 19: SELECT new_invoice.id, v.name, v.qty, v.price, v.descriptio...
^
HINT: You will need to rewrite or cast the expression.
********** Error **********
ERROR: column "qty" is of type integer but expression is of type text
SQL state: 42804
Hint: You will need to rewrite or cast the expression.
Character: 899
I guess v (name, qty, price, description) is not enough, the data types must be specified as well. However, v (name, qty INT, price, description) does not work - syntax error.
EDIT 2
Next, I have just tried the second version:
PREPARE insert_invoice_3 AS WITH
new_invoice AS (
INSERT INTO invoices (id, customer, term_id, ship_via_id, rep_id, ..., item_count)
SELECT $1, $2, t.id, s.id, r.id, ..., $26
FROM reps r
JOIN terms t ON t.terms = $3
JOIN shipVia s ON s.ship_via = $4
WHERE r.rep = $5
RETURNING id
)
INSERT INTO invoiceItems (invoice_id, name, qty, price, description)
(
SELECT i.id, $27, $28, $29, $30 FROM new_invoice i
UNION ALL
SELECT i.id, $31, $32, $33, $34 FROM new_invoice i
UNION ALL
SELECT i.id, $35, $36, $37, $38 FROM new_invoice i
);
Here is what I get:
ERROR: column "qty" is of type integer but expression is of type text
LINE 15: SELECT i.id, $27, $28, $29, $30 FROM new_invoice i
^
HINT: You will need to rewrite or cast the expression.
********** Error **********
ERROR: column "qty" is of type integer but expression is of type text
SQL state: 42804
Hint: You will need to rewrite or cast the expression.
Character: 759
Seems like the same error. It is interesting that if I remove all the UNION ALL and leave just one SELECT statement - it works!
EDIT 3
Why do I have to cast the parameters? Is it possible to specify the type of columns in the CTE?
PostgreSQL has such an extended interpretation of the VALUES clause that it may be used as a subquery by itself.
So you may express your query in this form:
WITH new_invoice AS (
INSERT INTO ...
RETURNING id
),
v(a,b,c,d) AS (values
($27,$28,$29,$30),
($31,$32,$33,$34),
...
)
INSERT INTO invoiceItems (invoice_id, name, qty, price, description)
SELECT new_invoice.id, a,b,c,d FROM v, new_invoice;
That assumes you want to insert the cartesian product of new_invoice and the values, which mostly makes sense if new_invoice is actually a single-row value.
WITH new_invoice AS (
INSERT INTO invoices ...
RETURNING id
)
INSERT INTO invoiceItems (invoice_id, name, qty, price, description)
VALUES ((select id from new_invoice), $27 , $28, $29, $30),
((select id from new_invoice), $31 , $32, $33, $34),
((select id from new_invoice), $35 , $36, $37, $38);
Instead of insert ... values ...., use insert ... select ...:
) INSERT INTO invoiceItems (invoice_id, name, qty, price, description)
SELECT new_invoice.id,$27,$28,$29,$30 FROM new_invoice
UNION ALL
...

in T-SQL, is it possible to find names of columns containing NULL in a given row (without knowing all column names)?

Is it possible in T-SQL to write a proper query reflecting this pseudo-code:
SELECT {primary_key}, {column_name}
FROM {table}
WHERE {any column_name value} is NULL
i.e. without referencing each column-name explicitly.
Sounds simple enough but I've searched pretty extensively and found nothing.
You have to use dynamic sql to solve that problem. I have demonstrated how it could be done.
With this sql you can pick a table and check the row with id = 1 for columns being null and primary keys. I included a test table at the bottom of the script. Code will not display anything if there is not primary keys and no columns being null.
DECLARE #table_name VARCHAR(20)
DECLARE #chosencolumn VARCHAR(20)
DECLARE #sqlstring VARCHAR(MAX)
DECLARE #sqlstring2 varchar(100)
DECLARE #text VARCHAR(8000)
DECLARE #t TABLE (col1 VARCHAR(30), dummy INT)
SET #table_name = 'test_table' -- replace with your tablename if you want
SET #chosencolumn = 'ID=1' -- replace with criteria for selected row
SELECT #sqlstring = COALESCE(#sqlstring, '') + 'UNION ALL SELECT '',''''NULL '''' '' + '''+t1.column_name+''', 1000 ordinal_position FROM ['+#table_name+'] WHERE [' +t1.column_name+ '] is null and ' +#chosencolumn+ ' '
FROM INFORMATION_SCHEMA.COLUMNS t1
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE t2
ON t1.column_name = t2.column_name
AND t1.table_name = t2.table_name
AND t1.table_schema = t2.table_schema
WHERE t1.table_name = #table_name
AND t2.column_name is null
SET #sqlstring = stuff('UNION ALL SELECT '',''''PRIMARY KEY'''' ''+ column_name + '' '' col1, ordinal_position
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
WHERE table_name = ''' + #table_name+ '''' + #sqlstring, 1, 10, '') + 'order by 2'
INSERT #t
EXEC( #sqlstring)
SELECT #text = COALESCE(#text, '') + col1
FROM #t
SET #sqlstring2 ='select '+stuff(#text,1,1,'')
EXEC( #sqlstring2)
Result:
id host_id date col1
PRIMARY KEY PRIMARY KEY PRIMARY KEY NULL
Test table
CREATE TABLE [dbo].[test_table](
[id] int not null,
[host_id] [int] NOT NULL,
[date] [datetime] NOT NULL,
[col1] [varchar](20) NULL,
[col2] [varchar](20) NULL,
CONSTRAINT [PK_test_table] PRIMARY KEY CLUSTERED
(
[id] ASC,
[host_id] ASC,
[date] ASC
))
Test data
INSERT test_table VALUES (1, 1, getdate(), null, 'somevalue')

Updating duplicates from one-to-many relationships.

This isn't your standard "how do I find duplicates" question, I know how to do find duplicates, see below. This question is how do I update said records that also have child items with matching records?
Alright, I'm going to give you whole scenario so that you can work with this problem.
Duplicate records could be inserted as a result of critical system failure.
Finding later duplicates and marking the parent commission_import_commission_junction "is_processed = True" solves this problem.
The complication is that the commission_import_commission_junction and its children commission_import_commission_junction_line_items must be identical on the columns to compare.
the tables are:
commission_import_commission_junction
- id
- created_date
- some columns that are checked for duplication
- some columns that are not checked for duplication
commission_import_commission_junction_line_items
- id
- some columns that are checked for duplication
- some columns that are not checked for duplication
(For the full table spec, check out the CREATE TABLE statements in the bottom-most block of code.)
The query to mark duplicates on just the parent table commission_import_commission_junction:
UPDATE commission_import_commission_junction cicj
SET is_processed = TRUE
FROM (
SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
FROM commission_import_commission_junction inner_imports
JOIN commission_import_commission_junction_line_items inner_items ON inner_items.commission_import_commission_junction_id = inner_imports.commission_import_commission_junction_id
GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
HAVING (COUNT(*) > 1)
) AS dups
WHERE
-- MAIN TABLE COLUMNN LIST
(cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
-- OTHER TABLE COLUMN LIST
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
AND cicj.created_date <> dups.first_date
AND cicj.is_processed = FALSE;
Somewhere and somehow I need to check that the line_items are also duplicates.
THE CODE BELOW IS TO SETUP THE DATABASE, remember this is postgres specific.
-- "commission_import_build" is a record that keeps information about the process of collecting the commission information. Duplicate commission_import_commission_junction records will not exist with the same commission_import_build_id
-- "commission_import_commission_junction" is a record description commission information from a customers purchase.
-- "commission_import_commission_junction_line_items" are records describing items in that purchase.
DROP TABLE IF EXISTS commission_import_commission_junction_line_items;
DROP TABLE IF EXISTS commission_import_commission_junction;
DROP TABLE IF EXISTS commission_import_builds;
CREATE TABLE commission_import_builds
(
commission_import_build_id serial NOT NULL,
build_date timestamp with time zone NOT NULL,
CONSTRAINT pkey_commission_import_build_id PRIMARY KEY (commission_import_build_id),
CONSTRAINT commission_import_builds_build_date_key UNIQUE (build_date)
);
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (1, '2011-01-01');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (2, '2011-01-02');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (3, '2011-01-03');
CREATE TABLE commission_import_commission_junction
(
commission_import_commission_junction_id serial NOT NULL,
member_id integer,
site_id integer,
action_status character varying NOT NULL,
action_type character varying NOT NULL,
ad_id bigint,
commission_id bigint NOT NULL,
country character varying,
event_date timestamp with time zone NOT NULL,
locking_date timestamp with time zone,
order_id character varying NOT NULL,
original boolean,
original_action_id bigint NOT NULL,
posting_date timestamp with time zone NOT NULL,
website_id bigint NOT NULL,
advertiser_name character varying,
commission_amount numeric(19,2) NOT NULL,
sale_amount numeric(19,2) NOT NULL,
aggregator_affiliate_id integer NOT NULL,
is_processed boolean NOT NULL DEFAULT false,
created_date timestamp with time zone NOT NULL DEFAULT now(),
member_transaction_id integer,
commission_import_build_id integer NOT NULL,
CONSTRAINT pkey_commission_import_commission_junction_commission_import_co PRIMARY KEY (commission_import_commission_junction_id),
CONSTRAINT fk_commission_import_commission_junction_commission_import_buil FOREIGN KEY (commission_import_build_id)
REFERENCES commission_import_builds (commission_import_build_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
);
CREATE INDEX idx_commission_import_commission_junction_is_processed
ON commission_import_commission_junction
USING btree
(is_processed);
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(1, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 1, '2011-02-05');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(2, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 2, '2011-02-06');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
(3, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 3, '2011-02-07');
SELECT * FROM commission_import_commission_junction;
CREATE TABLE commission_import_commission_junction_line_items
(
commission_import_commission_junction_line_item_id serial NOT NULL,
commission_import_commission_junction_id integer NOT NULL,
sku character varying,
quantity integer,
posting_date timestamp with time zone,
sale_amount numeric(19,2),
discount numeric(19,2),
CONSTRAINT pkey_commission_import_commission_junction_link_items_commissio PRIMARY KEY (commission_import_commission_junction_line_item_id),
CONSTRAINT fkey_commission_import_commission_junction_line_items_commissio FOREIGN KEY (commission_import_commission_junction_id)
REFERENCES commission_import_commission_junction (commission_import_commission_junction_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test3', 3, 32.50);
Reminds me of duplicate elimination in direct marketing mailing lists
Regardless of the details of your tables, a parent-child dupe elimination algorithm follows these steps:
1) Get duplicates into a list that matches old key to new key (temp table)
2) Update the foreign key in the child table
3) Delete the dupes from the parent
I admire the detail in your post, but I'm going to keep it simple and easier to read with some example table/column names:
-- step 1, get the list
-- Warning: t-sql syntax, adjust for Postgres
-- if it doesn't like placement of "into..." clause
select keep.primaryKey as keepKey
, dupe.primaryKey as dupeKey
into #DupeList
from (
select min(primaryKey) as primaryKey
, dupeCriteria1
, dupeCriteria2
FROM theTable
group by dupeCriteria1,dupeCritera2
having count(*) > 1
) keep
JOIN theTable dupe
ON keep.dupeCriteria1 = dupe.dupeCriteria1
AND keep.dupeCriteria2 = dupe.dupeCriteria2
AND keep.primaryKey <> dupe.primaryKey
Once you have that, update the foreign key in the child table:
update childTable
set foreignKey = #temp1.keepKey
from #temp1
where foreignKey = #temp1.dupeKey
Then just delete everything out of the parent table:
delete from parentTable
where primaryKey in (select dupeKey from #temp1)
CREATE FUNCTION removeCommissionImportCommissionJunctionDuplicates() RETURNS INT AS $BODY$ DECLARE duplicate RECORD; DECLARE parent RECORD; DECLARE children commission_import_commission_junction_line_items[]; DECLARE duplicate_children commission_import_commission_junction_line_items[]; DECLARE duplicate_child_count INT; DECLARE child commission_import_commission_junction_line_items; DECLARE duplicate_child commission_import_commission_junction_line_items; DECLARE num_updates INT; BEGIN
SELECT * FROM (SELECT 0) AS value INTO num_updates;
FOR duplicate IN
SELECT cicj.*, dups.first_date
FROM commission_import_commission_junction cicj
JOIN (SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
FROM commission_import_commission_junction inner_imports
GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
HAVING (COUNT(*) > 1)) AS dups
ON (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
WHERE cicj.created_date != dups.first_date
AND cicj.is_processed = FALSE
LOOP
--RAISE NOTICE 'Looping';
-- We need to collect the parent and children of the original record.
-- Get the parent of the original
SELECT *
FROM commission_import_commission_junction cicj
WHERE (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
(duplicate.member_id, duplicate.site_id, duplicate.action_status, duplicate.action_type, duplicate.ad_id, duplicate.commission_id, duplicate.country, duplicate.event_date, duplicate.locking_date, duplicate.order_id, duplicate.original, duplicate.original_action_id, duplicate.posting_date, duplicate.website_id, duplicate.advertiser_name, duplicate.commission_amount, duplicate.sale_amount, duplicate.aggregator_affiliate_id)
AND cicj.created_date = duplicate.first_date
INTO parent;
-- Get the children of the original
children := ARRAY(
SELECT cicjli
FROM commission_import_commission_junction_line_items cicjli
WHERE cicjli.commission_import_commission_junction_id
= parent.commission_import_commission_junction_id);
--RAISE NOTICE 'parent: %', parent;
--RAISE NOTICE 'children: %', children;
-- Now get the duplicates children
duplicate_children := ARRAY(
SELECT cicjli
FROM commission_import_commission_junction_line_items cicjli
WHERE cicjli.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id);
--RAISE NOTICE 'duplicate_children: %', duplicate_children;
-- Next, compare the children of the duplicate to the children of the original parent.
-- First compare size
IF array_upper(children, 1) = array_upper(duplicate_children, 1) THEN
--RAISE NOTICE 'Same number of children in duplicate as in parent';
-- Now compare each set
SELECT * FROM (SELECT 0) AS value INTO duplicate_child_count;
FOR child_index IN array_lower(children, 1) .. array_upper(children, 1) LOOP
child := children[child_index];
FOR duplicate_child_index IN array_lower(duplicate_children, 1) .. array_upper(duplicate_children, 1) LOOP
duplicate_child := duplicate_children[duplicate_child_index];
IF (child.sku, child.quantity, child.posting_date, child.sale_amount, child.discount) IS NOT DISTINCT FROM (duplicate_child.sku, duplicate_child.quantity, duplicate_child.posting_date, duplicate_child.sale_amount, duplicate_child.discount) THEN
SELECT * FROM (SELECT duplicate_child_count + 1) AS value INTO duplicate_child_count;
EXIT;
END IF;
END LOOP;
END LOOP;
--RAISE NOTICE 'Duplicate Child Count: %', duplicate_child_count;
-- If we have the same number of duplicates as there are records
IF duplicate_child_count = array_upper(duplicate_children, 1) THEN
-- Update the duplicate record as processed.
--RAISE NOTICE 'Marking duplicate % as is_processed', duplicate;
UPDATE commission_import_commission_junction cicj SET is_processed = TRUE WHERE cicj.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id;
SELECT * FROM (SELECT num_updates + 1) AS value INTO num_updates;
END IF;
END IF;
END LOOP;
--RAISE NOTICE 'Updates: %', num_updates;
RETURN num_updates; END; $BODY$ LANGUAGE plpgsql;