POSTGRES UPDATE ON CONFLICT. Error cannot affect a row a second time (dupes) ONLY OCCURS IN FUNCTION. Not as query - postgresql

I have a query in a function that does not seem to be returning any duplicates from my checks and if ran as a separate query... it works! If ran within a stored function, it gives the error ON CONFLICT DO UPDATE command cannot affect row a second time.
This makes no sense.
CREATE OR REPLACE FUNCTION rollups.compute_daily_rollups_every_hour(
start_time timestamp without time zone,
end_time timestamp without time zone)
RETURNS void
LANGUAGE 'plpgsql'
COST 100
VOLATILE
AS $BODY$
BEGIN
RAISE NOTICE 'Computing daily rollups from % to % (excluded)', start_time, end_time;
RAISE NOTICE 'Aggregating data into daily_rollup';
EXECUTE $$
INSERT INTO rollups.daily_rollup
SELECT
COALESCE(visitors, 0) AS visitors, COALESCE(total_dwell_time, 0) AS total_dwell_time, d.datestamp::date, doorway_id, customer_id, centre_id, postcode, gender, age_group, house_income, no_children, no_cars, shopping_frequency, marital_status, employment_status
FROM (
Select date_trunc('day', (current_date - offs)) AS datestamp
FROM generate_series(0, 365, 1) AS offs
) AS D
LEFT OUTER JOIN (
SELECT cv.datestamp,
round((date_part('epoch'::text, sum(cv.dwell_time)) / 60::double precision)::numeric, 2) AS total_dwell_time,
count(cv.sensor_id) AS visitors,
cv.doorway_id,
cv.customer_id,
cv.centre_id,
cv.gender,
cv.postcode,
cv.age_group,
cv.no_children,
cv.no_cars,
cv.marital_status,
cv.employment_status,
cv.shopping_frequency,
cv.house_income
FROM rollups.some_rollup cv
WHERE cv.dwell_time > '00:00:30'::interval
GROUP BY cv.datestamp, cv.doorway_id, cv.customer_id, cv.centre_id, cv.gender, cv.postcode, cv.age_group, cv.no_children, cv.no_cars, cv.marital_status, cv.employment_status, cv.shopping_frequency, cv.house_income
) AS t1
ON d.datestamp::date = t1.datestamp::date
WHERE d.datestamp >= $1 AND d.datestamp < $2
ORDER BY d.datestamp
ON CONFLICT (datestamp, doorway_id, customer_id, centre_id, gender, postcode, age_group, no_children, no_cars, marital_status, employment_status, shopping_frequency, house_income)
DO UPDATE SET visitors=excluded.visitors, total_dwell_time = excluded.total_dwell_time;$$
USING start_time, end_time;
END;
$BODY$;

Related

Why am I getting a "More than one row returned" error inside this postgres function?

I have a game table and a child gameeffect table. I'm trying to create a function that will find all games for which the most recent gameeffect row has a created_at date older than a certain number of minutes (passed in as a parameter). Here's the code:
create or replace function end_idle_games(idle_time int)
returns table(game_id integer)
language plpgsql
as $$
begin
create temp table temp_game_ids(game_id int);
with open_game_effects as ( -- gets all the game effects for any open games
select ge.*
from gameeffect ge
join game g on g.game_id = ge.game_id
join gamestatus gs on gs.game_status_id = g.game_status_id
where gs.game_status = 'open'
),
latest_game_effects as ( -- gets the latest game effect for each game and calculates the time since it was created in minutes
select oge.*, extract(epoch from (now() at time zone 'utc' - oge.created_at))/60 as idle_minutes
from open_game_effects oge
where oge.game_effect_id = (select max(oge1.game_effect_id) from open_game_effects oge1 group by oge1.game_id)
),
idle_games as ( -- gets all game ids with an idle minutes greater than the passed in idle time
select lge.game_id
from latest_game_effects lge
where lge.idle_minutes > idle_time
)
insert into temp_game_ids (game_id) select i.game_id from idle_games i;
return query
select t.game_id from temp_game_ids t;
end;
$$;
When I call the function e.g. select * from end_idle_games(120); I'm getting a more than one row returned by a subquery used as an expression error. I know that the line it's complaining about is:
insert into temp_game_ids (game_id) select i.game_id from idle_games i
because when I replace it with:
insert into temp_game_ids (game_id) select 99
the function works. What I don't understand is why it's throwing the error?
Stupid error on my part. The error was actually occurring on this line:
where oge.game_effect_id = (select max(oge1.game_effect_id) from open_game_effects oge1 group by oge1.game_id) because the subquery does return multiple rows. I should have been using the 'in' operator instead of '='. Also, #a_horse_with_no_name helped me to see that I could ditch the temp table by wrapping the entire expression in the return statement like so:
create or replace function end_idle_games(idle_time int)
returns table(game_id integer)
language plpgsql
as $$
begin
return query
with open_game_effects as ( -- gets all the game effects for any open games
select ge.*
from gameeffect ge
join game g on g.game_id = ge.game_id
join gamestatus gs on gs.game_status_id = g.game_status_id
where gs.game_status = 'open'
),
latest_game_effects as ( -- gets the latest game effect for each game and calculates the time since it was created in minutes
select oge.*, extract(epoch from (now() at time zone 'utc' - oge.created_at))/60 as idle_minutes
from open_game_effects oge
where oge.game_effect_id in (select max(oge1.game_effect_id) from open_game_effects oge1 group by oge1.game_id)
),
idle_games as ( -- gets all game ids with an idle minutes greater than the passed in idle time
select lge.game_id
from latest_game_effects lge
where lge.idle_minutes > idle_time
)
select i.game_id from idle_games i;
end;
$$;

stored function in postgres causing the time out

I have written a stored function in postgres. When I am running it, it is running for forever.
I am not sure why it is taking too much time even with 1 day data. Idk what is causing the issue.
Could have any one have a look and provide some feedback what it the issue here.
CREATE FUNCTION schema.delete_allHistoryRecord()
RETURNS text
LANGUAGE 'plpgsql'
VOLATILE
AS $BODY$
DECLARE
-- get the 15 old days transaction_history
transaction_object_cursor CURSOR FOR SELECT * from
(SELECT * FROM schema.transaction
WHERE start_time < (NOW() - INTERVAL '15 days')
UNION SELECT * FROM schema.transaction_history
WHERE start_time < (NOW() - INTERVAL '15 days')) trans
join schema.transaction_object_history hist
on hist.transaction_row_id = trans.row_id limit 500;
transaction_older_than_15_days schema.transaction%ROWTYPE;
-- get the 90 old days project_history
project_history_cursor CURSOR FOR SELECT * FROM schema.project_history
WHERE created_at < (now() - INTERVAL '90 Days');
project_history_older_than_90_days schema.project_history%ROWTYPE;
BEGIN
OPEN transaction_object_cursor;
LOOP
FETCH transaction_object_cursor INTO transaction_older_than_15_days;
EXIT WHEN NOT FOUND;
DELETE FROM schema.transaction_object_history
WHERE transaction_row_id = transaction_older_than_15_days.row_id;
END LOOP;
CLOSE transaction_object_cursor;
OPEN project_history_cursor;
LOOP
FETCH project_history_cursor INTO project_history_older_than_90_days;
EXIT WHEN NOT FOUND;
-- delete old data from project_user_history
DELETE FROM schema.project_user_history
WHERE project_id = project_history_older_than_90_days.project_id;
DELETE FROM schema.project_object_history
WHERE project_row_id = project_history_older_than_90_days.row_id;
DELETE FROM schema.transaction_history
WHERE project_id = project_history_older_than_90_days.project_id;
-- delete old data from project_history
DELETE FROM schema.project_history
WHERE project_id = project_history_older_than_90_days.project_id;
END LOOP;
CLOSE project_history_cursor;
RETURN 'Done';
END;
$BODY$;
Query returns the result very fast and also
SELECT * from
(SELECT * FROM schema.transaction
WHERE start_time < (NOW() - INTERVAL '15 days')
UNION SELECT * FROM schema.transaction_history
WHERE start_time < (NOW() - INTERVAL '15 days')) trans
join schema.transaction_object_history hist
on hist.transaction_row_id = trans.row_id limit 500;
and also
OPEN transaction_object_cursor;
LOOP
FETCH transaction_object_cursor INTO transaction_older_than_15_days;
EXIT WHEN NOT FOUND;
cnt:=cnt+1;
transaction_older_than_15_days.row_id;
END LOOP;
CLOSE transaction_object_cursor;
the count is returning 500, it means cursor is fetching the data. still not able to understand why it is looping forever while deleting the data.

postgresql: use timestamp variable within pl pgsql

Im new using pl pgsql. I want to concatenate two variables but im gotting always same error: time_ variable is not known
Let's say that date_ is of type date and time_ is of type time. The error came from this row:
sum(extract(epoch from (least(s.end, gs.date_+time_) - greatest(s.beg, gs.date_))) / 60) as Timing
My code is below
delcare
time_ time;
Begin
execute $$SELECT CURRENT_TIMESTAMP::time FROM $$||result_table INTO time_;
execute $$SELECT MAX(date_) FROM $$||result_table INTO max_date;
IF max_date is not NULL THEN
execute $$DELETE FROM $$||result_table||$$ WHERE date_ >= $$||quote_literal(max_date);
ELSE
max_date := 'XXXXXXX';
end if;
execute $$
INSERT INTO $$result_table$$
(Id, gs.date_, TIME, timing)
SELECT * from (
select
Id, gs.date_,
(case
When TRIM(set) ~ '^OPT[0-9]{3}/MINUTE/$'
Then 'minute'
When TRIM(set) ~ '^OPT[0-9]{3}/SECOND/$'
Then 'second' as TIME,
sum(extract(epoch from (least(s.end, gs.date_+time_) -
greatest(s.beg, gs.date_)
)
) / 60) as Timing
from source s cross join lateral
generate_series(date_trunc('day', s.beg), date_trunc('day',
least(s.end,
CASE WHEN $$||quote_literal(max_date)||$$ = 'XXXXXXX'
THEN (current_date)
ELSE $$||quote_literal(max_date)||$$
END)
), interval '1 day') gs(date_)
where ( (beg, end) overlaps ($$||quote_literal(max_date)||$$'00:00:00', $$||quote_literal(max_date)||$$'23:59:59'))
group by id, gs.date_, TIME
) as X
where ($$||quote_literal(max_date)||$$ = X.date_ and $$||quote_literal(max_date)||$$ != 'XXXXXXX')
OR ($$||quote_literal(max_date)||$$ ='XXXXXXX')
Dynamic SQL should be generated through format() and parameters should not be passed as string literals, but through placeholders and using.
Your code is really hard to read, incomplete and there are some substantial syntax errors which stem from e.g. a missing END for the CASE and parentheses not properly paired. So the following code might still contain some errors as I apparently have no way of testing it.
But as your main SELECT does not seem to use dynamic SQL at all, all the quote_literal() and string concatenation is unnecessary, just use the variables directly.
As max_date is supposed to be a date value you can assign the string 'XXXXX' to it, but if you use the max_date directly, you can get rid of that check as far as I can tell.
declare
time_ time;
max_date date;
result_table text := 'contract_frequency';
table_schema text := 'public';
Begin
time_ := localtime;
execute format('SELECT MAX(date_) FROM %I.%I', table_schema, result_table) into INTO max_date;
IF max_date is not NULL THEN
execute format('DELETE FROM %I.%I WHERE date_ >= $1', table_schema, result_table) using max_date;
ELSE
-- you replace XXXX with current_date in the CASE expression
-- later on, so using current_date here seems the right thing to do
max_date := current_date;
end if;
SELECT *
from (
select
Id, gs.date_,
case
When TRIM(set) ~ '^OPT[0-9]{3}/MINUTE/$' Then 'minute'
When TRIM(set) ~ '^OPT[0-9]{3}/SECOND/$' Then 'second' as TIME,
end
sum(extract(epoch from (least(s.end, gs.date_+time_) - greatest(s.beg, gs.date_) ) ) / 60) as Timing
from source s
cross join lateral
generate_series(date_trunc('day', s.beg), date_trunc('day', least(s.end, max_date)), interval '1 day') gs(date_)
where (beg, end) overlaps (max_date::timestamp, max_date + time '23:59:59')
group by id, gs.date_, TIME
) as X
where (max_date = X.date_ and max_date <> current_date)
OR (max_date = current_date)
end;

PostgreSQL 10 function creation not working

I have the following query, which I've modified for my database tables/columns, but it is not working upon execution:
CREATE OR REPLACE FUNCTION delete_data_antique(resourceid integer)
RETURNS TABLE(metrics_values_id int4) AS $$
BEGIN
RETURN QUERY
delete from metrics_values
where resource_id = $1
and time < (current_timestamp - interval '38 day')
and id not in
(select id
from (select distinct on (time_week)
id, time, date_trunc('week', time) time_week
from metrics_values
where resource_id = $1
and time < (current_timestamp - interval '38 day')
order by time_week, time desc)
as first_in_week_versions)
returning id;
END;
$$ LANGUAGE 'plpgsql';
Error message is as follows
I'm new to creating functions in SQL, and have been reading the docs, but not sure where/how it's not actually working.

PL/pgSQL: Comparing Successive Rows

I have the function, get_untracked_moves, below. My goal is to, for all data between two date ranges, find successive events which are farther than p_separation_distance apart.
E.g.:
If event 1 and event 2 are 40 m apart when p_separation_distance is 100m, a record would be returned with event 1's associated cont_name as the source_name and event 2's cont_name as the target_name.
CREATE FUNCTION get_untracked_moves(IN p_since_date TIMESTAMP WITHOUT TIME ZONE, IN p_before_date TIMESTAMP WITHOUT TIME ZONE, IN p_separation_distance INTEGER)
RETURNS TABLE ( id INTEGER,
asset_name CHARACTER VARYING,
source_name CHARACTER VARYING,
target_name CHARACTER VARYING,
source_time TIMESTAMP WITHOUT TIME ZONE,
target_time TIMESTAMP WITHOUT TIME ZONE,
source_lat DOUBLE PRECISION,
source_lon DOUBLE PRECISION,
target_lat DOUBLE PRECISION,
target_lon DOUBLE PRECISION ) AS $$
DECLARE
d_previous_location GEOMETRY;
d_previous_name CHARACTER VARYING;
d_previous_time TIMESTAMP WITHOUT TIME ZONE;
d_cur record;
BEGIN
-- Begin # 0,0
d_previous_location := st_setsrid(st_makepoint(0,0), 4326);
d_previous_name := '';
d_previous_time := NULL;
FOR d_cur
IN
SELECT
rank() OVER (PARTITION BY events.asset_id ORDER BY events.event_time) AS idx,
tags.id asset_id,
tags.name asset_name,
d_previous_name,
conts.name cont_name,
events.position,
events.event_time evt_time
FROM
events
JOIN
assets tags ON tags.id = events.asset_id
JOIN
assets conts ON conts.id = events.container_asset_id
WHERE
events.event_time >= p_since_date
AND
events.event_time <= p_before_date
LOOP
IF (d_previous_time = NULL) THEN
d_previous_time := events.event_time;
END IF;
IF (st_distancesphere(events.position, d_previous_location)>=p_separation_distance) THEN
RETURN NEXT;
END IF;
d_previous_location := events.position;
d_previous_name := conts.name;
d_previous_time := events.event_time;
END LOOP;
END;
$$
LANGUAGE plpgsql VOLATILE;
The function creates fine, but when I go to run it with:
select * from get_untracked_moves('2015-11-1', '2015-12-1', 10000);
I get:
ERROR: missing FROM-clause entry for table "events"
LINE 1: SELECT (st_distancesphere(events.position, d_previous_locati...
^
QUERY: SELECT (st_distancesphere(events.position, d_previous_location)>=p_separation_distance)
CONTEXT: PL/pgSQL function "get_untracked_moves" line 41 at IF
********** Error **********
ERROR: missing FROM-clause entry for table "events"
SQL state: 42P01
Context: PL/pgSQL function "get_untracked_moves" line 41 at IF
What am I missing here? I thought the inclusion of FROM events in my SELECT statement was enough.
Each pass of the loop is given the value of the record containing the corresponding row of the select result set. So events is not visible inside the loop. In instead use d_cur.position to refer to that column.
BTW, as commented to your question, you should really use the lag window function and get rid of the messy loop.
As a suggestion check this query:
select idx, asset_id, asset_name, previous_name, cont_name, position, evt_time
from (
select
rank() over (partition by e.asset_id order by e.event_time) as idx,
st_distancesphere(
e.position,
lag(e.position, 1, e.position) over (order by e.event_time)
) >= p_separation_distance as b,
t.id as asset_id,
t.name as asset_name,
lag(c.name, 1) as previous_name,
c.name as cont_name,
e.position,
e.event_time as evt_time
from
events e
inner join
assets tags on t.id = e.asset_id
inner join
assets c on c.id = e.container_asset_id
where
e.event_time >= p_since_date
and
e.event_time <= p_before_date
) s
where b