I am trying to create a PGSQL function that uses HTTP to get some json, format it, and insert it into a table using the donation_id key as a row constraint to prevent duplicates.
I have tried this function:
BEGIN
INSERT INTO donations(
donation_id, amount, avatar_image_url, created_date_utc, display_name, donor_id, event_id, incentive_id, message, participant_id, team_id)
ON CONFLICT (donation_id) DO NOTHING
SELECT elms::jsonb->>'donationID' AS donation_id ,
(elms::jsonb->>'amount')::float8 AS amount ,
elms::jsonb->>'avatarImageURL' AS avatar_image_url ,
(elms::jsonb->>'createdDateUTC')::timestamptz AS created_date_utc ,
elms::jsonb->>'displayName' AS display_name ,
elms::jsonb->>'donorID' AS donor_id ,
(elms::jsonb->>'eventID')::int AS event_id ,
elms::jsonb->>'incentiveID' AS incentive_id ,
elms::jsonb->>'message' AS message ,
(elms::jsonb->>'participantID')::int AS participant_id ,
(elms::jsonb->>'teamID')::int AS team_id
FROM (
select jsonb_array_elements(content::jsonb) AS elms
from http_get('https://extralife.donordrive.com/api/teams/59881/donations/')) as alias;
END;
I'm not quite understanding what I am doing wrong with the ON CONFLICT part of the query, just that it is apparently not valid syntax. I appreciate the insight as I'm not quite grasping the explainer written in docs.
Assuming a test table:
drop table if exists donations;
create table donations (
donation_id text primary key,
amount float8,
avatar_image_url text,
created_date_utc timestamptz,
display_name text,
donor_id text,
event_id int,
incentive_id text,
message text,
participant_id int,
team_id int);
It will work once you move the ON CONFLICT (donation_id) DO NOTHING to the end of the query:
INSERT INTO donations(donation_id, amount, avatar_image_url, created_date_utc,
display_name, donor_id, event_id, incentive_id, message, participant_id,
team_id)
SELECT elms::jsonb->>'donationID' AS donation_id ,
(elms::jsonb->>'amount')::float8 AS amount ,
elms::jsonb->>'avatarImageURL' AS avatar_image_url ,
(elms::jsonb->>'createdDateUTC')::timestamptz AS created_date_utc ,
elms::jsonb->>'displayName' AS display_name ,
elms::jsonb->>'donorID' AS donor_id ,
(elms::jsonb->>'eventID')::int AS event_id ,
elms::jsonb->>'incentiveID' AS incentive_id ,
elms::jsonb->>'message' AS message ,
(elms::jsonb->>'participantID')::int AS participant_id ,
(elms::jsonb->>'teamID')::int AS team_id
FROM ( select jsonb_array_elements('[
{
"displayName": "Christine",
"donorID": "A05C2C1E5DE15CDC",
"links": {
"recipient": "https://assets.yourdomain.com/somelink"
},
"eventID": 552,
"createdDateUTC": "2022-09-18T14:08:35.227+0000",
"recipientName": "Have A Drink Show",
"participantID": 494574,
"amount": 50,
"avatarImageURL": "https://assets.yourdomain.com/asset.gif",
"teamID": 59881,
"donationID": "FDBB61C5C8FFB3AE"
}
]'::jsonb) AS elms) as alias
ON CONFLICT (donation_id) DO NOTHING;
Demo.
Related
Create OR REPLACE PROCEDURE adduser(
fname varchar , lname varchar , pass varchar , uname varchar ,
cnumber integer, cname varchar
)
language SQL
BEGIN ATOMIC
insert into users(firstname , lastname , password , username ) Values (fname , lname , pass , uname);
select * from users AS uids;
insert into users_card (cardnumber , cardname , userid) values (cnumber , cname , uids);
END ;
If I understand you correctly, you want to firstly insert data into user table, after then get userid(uuid) of inserted data and insert this to another table.
Sample:
do
$body$
declare
vuserid uuid;
begin
insert into users (first_name, last_name)
values ('Jon', 'Smith')
returning uids into vuserid;
insert into users_card(card_number, card_name, userid)
values (123, 'test', vuserid);
end;
$body$
language 'plpgsql';
Try to answer this question then found out, I cannot solve it.
Basic idea: propagate twice, from country_id propagate to state_id, state_id propagate to city_id twice.Then the country_id need be joined twice. when we do array_agg on state level, we need explicitly join country_id, during city level we also need using join country_id.
Reference link: https://github.com/hettie-d/NORM/tree/master/sql
Basic idea, input one country_id, all the relevant country, state, city level information will be transformed to json format.
Prepare. I use country_id, state_id, city_id, since they are more descriptive.
begin;
create table public.country(country_id bigint primary key , name text, leader text);
create table public.states(state_id bigint primary key, name text, population bigint,country_id bigint REFERENCES public.country (country_id));
create table public.cities(city_id bigint,name text,state_id bigint REFERENCES public.states (state_id));
insert into public.country values ( 1, 'India', 'Narendra Modi');
insert into public.country values ( 2 , 'USA', 'Joe Biden');
insert into public.country values ( 3 , 'Australia', 'Scott Morrison');
insert into public.states values( 1 ,'California' , 39500000 , 2);
insert into public.states values( 2 , 'Washington' , 7610000 ,2 );
insert into public.states values( 4 , 'Karnataka' , 64100000,1);
insert into public.states values( 5 , 'Rajasthan' , 68900000,1 );
insert into public.states values( 6 , 'Maharashtra' , 125700000,1 );
insert into public.cities values( 1 , 'Mumbai' , 6 );
insert into public.cities values( 2 , 'Pune' , 6 );
insert into public.cities values( 3 , 'San Francisco' , 1 );
commit;
--- create composite types.
begin;
create type city_record as(city_name text);
create type state_record as (state_name text, population bigint,cities city_record[]);
create type country_record as (country_name text, leader text, states state_record[]);
commit;
array transport
create or replace
function array_transport (all_items anyarray) returns setof text
returns null on null input
language plpgsql as
$body$
declare
item record;
begin
foreach item in array all_items
loop
return next(to_json(item)::text);
end loop;
end;
$body$;
--the main function country_select_json
create or replace function country_select_json (_country_id bigint)
returns country_record[]
as
$$
declare
_result text;
begin
select array_agg(single_item)
from (select
array_agg(row(
co.name,
co.leader,
(select array_agg(row
(s.name,
s.population,
(select array_agg
(row
(c.name)::city_record)
from cities c
join states s using (state_id)
where s.country_id = co.country_id)
)::state_record) from states s where s.country_id = co.country_id
)
)::country_record)
as single_item
from country co
where co.country_id = _country_id)y into _result;
-- raise info 'state_record test: %', _result;
return (_result);
end
$$ language plpgsql;
run
select * from array_transport(country_select_json(1));
{"country_name":"India","leader":"Narendra Modi","states":[{"state_name":"Karnataka","population":64100000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]},{"state_name":"Rajasthan","population":68900000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]},{"state_name":"Maharashtra","population":125700000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]}]}
(1 row)
country level, state level ok, but the city level is wrong. How to solve this problem.
Expected Result:
{"country_name":"India","leader":"Narendra Modi","states":[{"state_name":"Karnataka","population":64100000,"cities":[NULL]},{"state_name":"Rajasthan","population":68900000,"cities":[NULL]},{"state_name":"Maharashtra","population":125700000,"cities":[{"city_name":"Mumbai"},{"city_name":"Pune"}]}]}
update 2022-03-04.
(select array_agg
(c.name) as city_name
from cities c
join states s using (state_id)
where s.country_id = co.country_id)
Now I know the problem: because the propagate is first from city, then to state then country. Once function input the country_id then all the country related city name will be pulled together.
Does this query answer your needs ? (Result here)
with ci as (select cities.state_id,jsonb_agg(jsonb_build_object('city_name',cities.name)) as cities from cities group by state_id)
select jsonb_pretty(jsonb_build_object(
'country_name',c.name,
'leader',c.leader,
'states', jsonb_agg(jsonb_build_object(
'state_name',s.name,
'population',s.population,
'cities',ci.cities
))
))
from country c left join states s on s.country_id = c.country_id
left join ci on ci.state_id = s.state_id
where c.country_id = 1
group by c.name,c.leader
// Result
{
"country_name": "India",
"leader": "Narendra Modi",
"states": [
{
"state_name": "Maharashtra",
"population": 125700000,
"cities": [
{
"city_name": "Mumbai"
},
{
"city_name": "Pune"
}
]
},
{
"state_name": "Rajasthan",
"population": 68900000,
"cities": null
},
{
"state_name": "Karnataka",
"population": 64100000,
"cities": null
}
]
Given the following table:
CREATE TABLE event_partitioned (
customer_id varchar(50) NOT NULL,
user_id varchar(50) NOT NULL,
event_id varchar(50) NOT NULL,
comment varchar(50) NOT NULL,
event_timestamp timestamp with time zone DEFAULT NOW()
)
PARTITION BY RANGE (event_timestamp);
And partitioning by calendar week [one example]:
CREATE TABLE event_partitioned_2020_51 PARTITION OF event_partitioned
FOR VALUES FROM ('2020-12-14') TO ('2020-12-20');
And the unique constraint [event_timestamp necessary since the partition key]:
ALTER TABLE event_partitioned
ADD UNIQUE (customer_id, user_id, event_id, event_timestamp);
I would like to update if customer_id, user_id, event_id exist, otherwise insert:
INSERT INTO event_partitioned (customer_id, user_id, event_id)
VALUES ('9', '99', '999')
ON CONFLICT (customer_id, user_id, event_id, event_timestamp) DO UPDATE
SET comment = 'I got updated';
But I cannot add a unique constraint only for customer_id, user_id, event_id, hence event_timestamp as well.
So this will insert duplicates of customer_id, user_id, event_id. Even so with adding now() as a fourth value, unless now() precisely matches what's already in event_timestamp.
Is there a way that ON CONFLICT could be less 'granular' here and update if now() falls in the week of the partition, rather than precisely on '2020-12-14 09:13:04.543256' for example?
Basically I am trying to avoid duplication of customer_id, user_id, event_id, at least within a week, but still benefit from partitioning by week (so that data retrieval can be narrowed to a date range and not scan the entire partitioned table).
I don't think you can do this with on conflict in a partitioned table. You can, however, express the logic with CTEs:
with
data as ( -- data
select '9' as customer_id, '99' as user_id, '999' as event_id
),
ins as ( -- insert if not exists
insert into event_partitioned (customer_id, user_id, event_id)
select * from data d
where not exists (
select 1
from event_partitioned ep
where
ep.customer_id = d.customer_id
and ep.user_id = d.user_id
and ep.event_id = d.event_id
)
returning *
)
update event_partitioned ep -- update if insert did not happen
set comment = 'I got updated'
from data d
where
ep.customer_id = d.customer_id
and ep.user_id = d.user_id
and ep.event_id = d.event_id
and not exists (select 1 from ins)
#GMB's answer is great and works well. Since enforcing a unique constrain on a partitioned table (parent table) partitioned by time range is usually not that useful, why now just have a unique constraint/index placed on the partition itself?
In your case, event_partitioned_2020_51 can have a unique constraint:
ALTER TABLE event_partitioned_2020_51
ADD UNIQUE (customer_id, user_id, event_id, event_timestamp);
And subsequent query can just use
INSERT ... INTO event_partitioned_2020_51 ON CONFLICT (customer_id, user_id, event_id, event_timestamp)
as long as this its the partition intended, which is usually the case.
I have created a stream and i am writing KSQL on that stream .
But when i run this query and data arrive i can see records but when i data is not arriving and i run that query i do not see any older records .
So this is my KSQL
LOG_DIR=./ksql_logs /usr/local/confluent/bin/ksql http://localhost:8088
CREATE STREAM AUDIT_EVENT ( ID VARCHAR , VERSION VARCHAR , ACTION_TYPE VARCHAR , EVENT_TYPE VARCHAR , CLIENT_ID VARCHAR , DETAILS VARCHAR , OBJECT_TYPE VARCHAR , UTC_DATE_TIME VARCHAR , POINT_IN_TIME_PRECISION VARCHAR , TIME_ZONE VARCHAR , TIMELINE_PRECISION VARCHAR , GROUP_ID VARCHAR , OBJECT_DISPLAY_NAME VARCHAR , OBJECT_ID VARCHAR , USER_DISPLAY_NAME VARCHAR , USER_ID VARCHAR , PARENT_EVENT_ID VARCHAR , NOTES VARCHAR , SUMMARY VARCHAR , AUDIT_EVENT_TO_UTC_DT VARCHAR , AUDIT_EVENT_TO_DATE_PITP VARCHAR , AUDIT_EVENT_TO_DATE_TZ VARCHAR , AUDIT_EVENT_TO_DATE_TP VARCHAR ) WITH (KAFKA_TOPIC='AVRO-AUDIT_EVENT', VALUE_FORMAT='AVRO');
SELECT * FROM "AUDIT_EVENT" WHERE CLIENT_ID='fgh-5d1e-17a2-9749-0e4d00';
I have created table and tried but i table also i can not see my older records .
Is there any way i can records when ever i run this query ?
SET 'auto.offset.reset' = 'earliest' before your SELECT query statement.
CREATE TABLE s_etpta.sfphierg (
hierar VARCHAR(10) NOT NULL,
libelle VARCHAR(40),
typfct VARCHAR(1),
utilcre VARCHAR(10),
datcre DATE,
utilmod VARCHAR(10),
datmod DATE,
CONSTRAINT i_sfphierg PRIMARY KEY(hierar)
)
CREATE TABLE s_etpta.hopsech (
horsect VARCHAR(40) NOT NULL,
libelle VARCHAR(40),
libcourt VARCHAR(20),
horcode VARCHAR(10),
CONSTRAINT i_hopsech PRIMARY KEY(horsect)
)
BEGIN
delete from SFPHIERG;
INSERT INTO SFPHIERG ("hierar", "libelle", "typfct", "utilcre", "datcre",
"utilmod", "datmod")
select '01'||horcode, E'Hircuit standard'||horcode, E'1', E'HQS', E'2007-01-
29', E' ', E'1900-01-01'
FROM HOPSECH where HOPSECH.IJIGHSUPPM like '1'
and not exists (select hierar from SFPHIERG where hierar like '01'||horcode);
INSERT INTO SFPHIERG ("hierar", "libelle", "typfct", "utilcre", "datcre",
"utilmod", "datmod")
select '00'||horcode, E'Circuit cascade'||horcode, E'1', E'HQS', E'2007-01-
29', E' ', E'1900-01-01'
FROM HOPSECH where HOPSECH.IJIGHSUPPR like '1'
and not exists (select hierar from SFPHIERG where hierar like '00'||horcode);
END;
In my functions's body I have two insert queries executed after a delete query.
The rows are correctly inserted when my first column doesn't exist, when it does I have the failed unique constraint error, when this error occur nothing is added into my table.
Is there a way I can stop this error from blocking all the inserts and only the inserts where my first column exists?