Redshift Stored Procedures not passing dates properly - date

I notice Redshift changes my date to '2001'. Run the below for a very clear simple example
Result3 is correctly today's date (which is 2019-11-07). However Result2 (the stored procedure) simply returns '2001'. I've tried casting as well as passing the date in differently but the result stays the same. Any recommendations?
CREATE OR REPLACE PROCEDURE pub.test_sp1(IN param date,INOUT tmp_name varchar(256))
AS $$
DECLARE
row record;
BEGIN
EXECUTE 'drop table if exists ' || tmp_name;
EXECUTE 'create temp table ' || tmp_name || ' as
select ' || param;
END;
$$ LANGUAGE plpgsql;
CALL pub.test_sp1(current_date, 'myresult');
SELECT * from myresult;
select current_date

EXECUTE 'create temp table ' || tmp_name || ' as select ' || param;
is transformed (after concatenating the strings and substituting the values) to
EXECUTE 'create temp table myresult as select 2019-11-07'; -- => select 2001
Don't concatenated values this way, use quote_ident() and quote_literal() for proper quoting:
EXECUTE 'create temp table ' || quote_ident(tmp_name) || ' as select ' || quote_literal(param);
or with the format() function:
EXECUTE format('create %I as select %L', tmp_date, param);
As an example:
#= SELECT 'test: ' || current_date as incorrect,
'test: ' || quote_literal(current_date) as correct;
┌──────────────────┬────────────────────┐
│ incorrect │ correct │
├──────────────────┼────────────────────┤
│ test: 2019-11-07 │ test: '2019-11-07' │
└──────────────────┴────────────────────┘

Related

Update Null columns to Zero dynamically in Redshift

Here is the code in SAS, It finds the numeric columns with blank and replace with 0's
DATA dummy_table;
SET dummy_table;
ARRAY DUMMY _NUMERIC_;
DO OVER DUMMY;
IF DUMMY=. THEN DUMMY=0;
END;
RUN;
I am trying to replicate this in Redshift, here is what I tried
create or replace procedure sp_replace_null_to_zero(IN tbl_nm varchar) as $$
Begin
Execute 'declare ' ||
'tot_cnt int := (select count(*) from information_schema.columns where table_name = ' || tbl_nm || ');' ||
'init_loop int := 0; ' ||
'cn_nm varchar; '
Begin
While init_loop <= tot_cnt
Loop
Raise info 'init_loop = %', Init_loop;
Raise info 'tot_cnt = %', tot_cnt;
Execute 'Select column_name into cn_nm from information_schema.columns ' ||
'where table_name ='|| tbl_nm || ' and ordinal_position = init_loop ' ||
'and data_type not in (''character varying'',''date'',''text''); '
Raise info 'cn_nm = %', cn_nm;
if cn_nm is not null then
Execute 'Update ' || tbl_nm ||
'Set ' || cn_nm = 0 ||
'Where ' || cn_nm is null or cn_nm =' ';
end if;
init_loop = init_loop + 1;
end loop;
End;
End;
$$ language plpgsql;
Issues I am facing
When I pass the Input parameter here, I am getting 0 count
tot_cnt int := (select count(*) from information_schema.columns where table_name = ' || tbl_nm || ');'
For testing purpose I tried hardcode the table name inside proc, I am getting the error amazon invalid operation: value for domain information_schema.cardinal_number violates check constraint "cardinal_number_domain_check"
Is this even possible in redshift, How can I do this logic or any other workaround.
Need Expertise advise here!!
You can simply run an UPDATE over the table(s) using the NVL(cn_nm,0) function
UPDATE tbl_raw
SET col2 = NVL(col2,0);
However UPDATE is a fairly expensive operation. Consider just using a view over your table that wraps the columns in NVL(cn_nm,0)
CREATE VIEW tbl_clean
AS
SELECT col1
, NVL(col2,0) col2
FROM tbl_raw;

How to convert a PL/PgSQL procedure into a dynamic one?

I am trying to write a plpgsql procedure to perform spatial tiling of a postGIS table. I can perform the operation successfully using the following procedure in which the table names are hardcoded. The procedure loops through the tiles in tile_table and for each tile clips the area_table and inserts it into split_table.
CREATE OR REPLACE PROCEDURE splitbytile()
AS $$
DECLARE
tile RECORD;
BEGIN
FOR tile IN
SELECT tid, geom FROM test_tiles ORDER BY tid
LOOP
INSERT INTO split_table (id, areaname, ttid, geom)
SELECT id, areaname, tile.tid,
CASE WHEN st_within(base.geom, tile.geom) THEN st_multi(base.geom)
ELSE st_multi(st_intersection(base.geom, tile.geom)) END as geom
FROM area_table as base
WHERE st_intersects(base.geom, tile.geom);
COMMIT;
END LOOP;
END;
$$ LANGUAGE 'plpgsql';
Having tested this successfully, now I need to convert it to a dynamic procedure where I can provide the table names as parameters. I tried the following partial conversion, using format() for inside of loop:
CREATE OR REPLACE PROCEDURE splitbytile(in_table text, grid_table text, split_table text)
AS $$
DECLARE
tile RECORD;
BEGIN
FOR tile IN
EXECUTE format('SELECT tid, geom FROM %I ORDER BY tid', grid_table)
LOOP
EXECUTE
FORMAT(
'INSERT INTO %1$I (id, areaname, ttid, geom)
SELECT id, areaname, tile.tid,
CASE WHEN st_within(base.geom, tile.geom) THEN st_multi(base.geom)
ELSE st_multi(st_intersection(base.geom, tile.geom)) END as geom
FROM %2$I as base
WHERE st_intersects(base.geom, tile.geom)', split_table, in_table
);
COMMIT;
END LOOP;
END;
$$ LANGUAGE 'plpgsql';
But it throws an error
missing FROM-clause entry for table "tile"
So, how can I convert the procedure to a dynamic one? More specifically, how can I use the record data type (tile) returned by the for loop inside the loop? Note that it works when format is not used.
You can use EXECUTE ... USING to supply parameters to a dynamic query:
EXECUTE
format(
'SELECT r FROM %I WHERE c = $1.val',
table_name
)
INTO result_var
USING record_var;
The first argument to USING will be used for $1, the second for $2 and so on.
See the documentation for details.
Personally I use somehow different way to create dynamic functions. By concatination and execute function. You can also do like this.
CREATE OR REPLACE FUNCTION splitbytile()
RETURNS void AS $$
declare
result1 text;
table_name text := 'test_tiles';
msi text := '+7 9912 231';
msi text := 'Hello world';
code text := 'code_name';
_operator_id integer := 2;
begin
query1 := 'SELECT msisdn from ' || table_name || ' where msisdn = ''' || msi::text ||''';';
query2 := 'INSERT INTO ' || table_name || '(msisdn,usage,body,pr_code,status,sent_date,code_type,operator_id)
VALUES( ''' || msi::text || ''',' || true || ',''' || _body::text || ''',''' || code::text || ''',' || false || ',''' || time_now || ''',' || kod_type || ',' || _operator_id ||');';
execute query1 into result1;
execute query2;
END;
$function$
You just make your query as text then anywhere you want you can execute it. Maybe by checking result1 value inside If statement or smth like that.

EXTRACT INTO with multiple rows (PostgreSQL)

this is my function:
CREATE OR REPLACE FUNCTION SANDBOX.DAILYVERIFY_DATE(TABLE_NAME regclass, DATE_DIFF INTEGER)
RETURNS void AS $$
DECLARE
RESULT BOOLEAN;
DATE DATE;
BEGIN
EXECUTE 'SELECT VORHANDENES_DATUM AS DATE, CASE WHEN DATUM IS NULL THEN FALSE ELSE TRUE END AS UPDATED FROM
(SELECT DISTINCT DATE VORHANDENES_DATUM FROM ' || TABLE_NAME ||
' WHERE DATE > CURRENT_DATE -14-'||DATE_DIFF|| '
) A
RIGHT JOIN
(
WITH compras AS (
SELECT ( NOW() + (s::TEXT || '' day'')::INTERVAL )::TIMESTAMP(0) AS DATUM
FROM generate_series(-14, -1, 1) AS s
)
SELECT DATUM::DATE
FROM compras)
B
ON DATUM = VORHANDENES_DATUM'
INTO date,result;
RAISE NOTICE '%', result;
INSERT INTO SANDBOX.UPDATED_TODAY VALUES (TABLE_NAME, DATE, RESULT);
END;
$$ LANGUAGE plpgsql;
It is supposed to upload rows into the table SANDBOX.UPDATED_TODAY, which contains table name, a date and a boolean.
The boolean shows, whether there was an entry for that date in the table. The whole part, which is inside of EXECUTE ... INTO works fine and gives me those days.
However, this code only inserts the first row of the query's result. What I want is that all 14 rows get inserted. Obviously, I need to change it into something like a loop or something completely different, but how exactly would that work?
Side note: I removed some unnecessary parts regarding those 2 parameters you can see. It does not have to do with that at all.
Put the INSERT statement inside the EXECUTE. You don't need the result of the SELECT for anything other than inserting it into that table, right? So just insert it directly as part of the same query:
CREATE OR REPLACE FUNCTION SANDBOX.DAILYVERIFY_DATE(TABLE_NAME regclass, DATE_DIFF INTEGER)
RETURNS void AS
$$
BEGIN
EXECUTE
'INSERT INTO SANDBOX.UPDATED_TODAY
SELECT ' || QUOTE_LITERAL(TABLE_NAME) || ', VORHANDENES_DATUM, CASE WHEN DATUM IS NULL THEN FALSE ELSE TRUE END
FROM (
SELECT DISTINCT DATE VORHANDENES_DATUM FROM ' || TABLE_NAME ||
' WHERE DATE > CURRENT_DATE -14-'||DATE_DIFF|| '
) A
RIGHT JOIN (
WITH compras AS (
SELECT ( NOW() + (s::TEXT || '' day'')::INTERVAL )::TIMESTAMP(0) AS DATUM
FROM generate_series(-14, -1, 1) AS s
)
SELECT DATUM::DATE
FROM compras
) B
ON DATUM = VORHANDENES_DATUM';
END;
$$ LANGUAGE plpgsql;
The idiomatic way to loop through dynamic query results would be
FOR date, result IN
EXECUTE 'SELECT ...'
LOOP
INSERT INTO ...
END LOOP;

Apply function to temp table inside another function

I am new to creating functions in postgresql. The version that I'm using is rather old. It's 8.2.15 (not my choice, but my org's). The following example is trying to apply one function to a temp table in another function.
-- First function
create or replace function inner_func(_tbl anyelement)
RETURNS void AS
$$
BEGIN
EXECUTE 'ALTER TABLE ' || _tbl || ' ADD COLUMN d_amount INTEGER';
EXECUTE 'UPDATE ' || _tbl || ' SET d_amount = 2* amount';
RETURN;
END;
$$
LANGUAGE plpgsql volatile;
-- Second function
CREATE OR REPLACE FUNCTION outer_func()
RETURNS void AS
$$
BEGIN
DROP TABLE IF EXISTS my_temp;
CREATE TEMP TABLE my_temp
(id serial primary key,
amount integer
);
INSERT into my_temp (amount) values (10),(20);
-- now apply the inner_func right here
EXECUTE 'SELECT inner_func(' || quote_ident('my_temp') || ')';
RETURN;
END;
LANGUAGE plpgsql volatile;
When I run
SELECT outer_func();
It spits out an ERROR:
column "my_temp" does not exist
But the inner_func works if I use it on its own like the following:
create temp table my_temp2
(id serial primary key,
amount integer
);
INSERT INTO my_temp2 (amount) values (10),(20);
SELECT inner_func(quote_ident('my_temp2'));
SELECT * from my_temp2;
id amount d_amount
1 10 20
2 20 40
How can I make this inner_func work inside outer_func? Any idea?
It looks like the problem is here:
EXECUTE 'SELECT inner_func(' || quote_ident('my_temp') || ')';
=>
EXECUTE 'SELECT inner_func(quote_ident(' || quote_literal('my_temp') || '));';
DBFiddle Demo

Self-managing PostgreSQL partition tables

I am trying to make a self-managing partition table setup with Postgres. It all revolves around this function but I can't seem to get Postgres to accept my table names. Any ideas or examples of self-managing partition table trigger functions?
My current function:
DECLARE
day integer;
year integer;
tablename text;
startdate text;
enddate text;
BEGIN
day:=date_part('doy',to_timestamp(NEW.date));
year:=date_part('year',to_timestamp(NEW.date));
tablename:='pings_'||year||'_'||day||'_'||NEW.id;
-- RAISE EXCEPTION 'tablename=%',tablename;
PERFORM 'tablename' FROM pg_tables WHERE 'schemaname'=tablename;
-- RAISE EXCEPTION 'found=%',FOUND;
IF FOUND <> TRUE THEN
startdate:=date_part('year',to_timestamp(NEW.date))||'-'||date_part('month',to_timestamp(NEW.date))||'-'||date_part('day',to_timestamp(NEW.date));
enddate:=startdate::timestamp + INTERVAL '1 day';
EXECUTE 'CREATE TABLE $1 (
CHECK ( date >= DATE $2 AND date < DATE $3 )
) INHERITS (pings)' USING quote_ident(tablename),startdate,enddate;
END IF;
EXECUTE 'INSERT INTO $1 VALUES (NEW.*)' USING quote_ident(tablename);
RETURN NULL;
END;
I want it to auto-create a table called pings_YEAR_DOY_ID but it always fails with:
2011-10-24 13:39:04 CDT [15804]: [1-1] ERROR: invalid input syntax for type double precision: "-" at character 45
2011-10-24 13:39:04 CDT [15804]: [2-1] QUERY: SELECT date_part('year',to_timestamp( $1 ))+'-'+date_part('month',to_timestamp( $2 ))+'-'+date_part('day',to_timestamp( $3 ))
2011-10-24 13:39:04 CDT [15804]: [3-1] CONTEXT: PL/pgSQL function "ping_partition" line 15 at assignment
2011-10-24 13:39:04 CDT [15804]: [4-1] STATEMENT: INSERT INTO pings VALUES (0,0,5);
TRY 2
After applying the changes and modifying it some more (date is a unixtimestamp column, my thinking being that an integer column is faster than a timestamp column when selecting). I get the below error, not sure if I am using the proper syntax for USING NEW?
Updated function:
CREATE FUNCTION ping_partition() RETURNS trigger
LANGUAGE plpgsql
AS $_$DECLARE
day integer;
year integer;
tablename text;
startdate text;
enddate text;
BEGIN
day:=date_part('doy',to_timestamp(NEW.date));
year:=date_part('year',to_timestamp(NEW.date));
tablename:='pings_'||year||'_'||day||'_'||NEW.id;
-- RAISE EXCEPTION 'tablename=%',tablename;
PERFORM 'tablename' FROM pg_tables WHERE 'schemaname'=tablename;
-- RAISE EXCEPTION 'found=%',FOUND;
IF FOUND <> TRUE THEN
startdate := to_char(to_timestamp(NEW.date), 'YYYY-MM-DD');
enddate:=startdate::timestamp + INTERVAL '1 day';
EXECUTE 'CREATE TABLE ' || quote_ident(tablename) || ' (
CHECK ( date >= EXTRACT(EPOCH FROM DATE ' || quote_literal(startdate) || ')
AND date < EXTRACT(EPOCH FROM DATE ' || quote_literal(enddate) || ') )
) INHERITS (pings)';
END IF;
EXECUTE 'INSERT INTO ' || quote_ident(tablename) || ' SELECT $1' USING NEW;
RETURN NULL;
END;
$_$;
My statement:
INSERT INTO pings VALUES (0,0,5);
SQL error:
ERROR: column "date" is of type integer but expression is of type pings
LINE 1: INSERT INTO pings_1969_365_0 SELECT $1
^
HINT: You will need to rewrite or cast the expression.
QUERY: INSERT INTO pings_1969_365_0 SELECT $1
CONTEXT: PL/pgSQL function "ping_partition" line 22 at EXECUTE statement
Note: Since Postgres 10 declarative partitioning is typically superior to partitioning by inheritance as used in this case.
You are mixing double precision output of date_part() with text '-'. That doesn't make sense to PostgreSQL. You would need an explicit cast to text. But there is a much simpler way to do all of this:
startdate:=date_part('year',to_timestamp(NEW.date))
||'-'||date_part('month',to_timestamp(NEW.date))
||'-'||date_part('day',to_timestamp(NEW.date));
Use instead:
startdate := to_char(NEW.date, 'YYYY-MM-DD');
This makes no sense either:
EXECUTE 'CREATE TABLE $1 (
CHECK (date >= DATE $2 AND date < DATE $3 )
) INHERITS (pings)' USING quote_ident(tablename),startdate,enddate;
You can only supply values with the USING clause. Read the manual here. Try instead:
EXECUTE 'CREATE TABLE ' || quote_ident(tablename) || ' (
CHECK ("date" >= ''' || startdate || ''' AND
"date" < ''' || enddate || '''))
INHERITS (ping)';
Or better yet, use format(). See below.
Also, like #a_horse answered: You need to put your text values in single quotes.
Similar here:
EXECUTE 'INSERT INTO $1 VALUES (NEW.*)' USING quote_ident(tablename);
Instead:
EXECUTE 'INSERT INTO ' || quote_ident(tablename) || ' VALUES ($1.*)'
USING NEW;
Related answer:
How to dynamically use TG_TABLE_NAME in PostgreSQL 8.2?
Aside: While "date" is allowed for a column name in PostgreSQL it is a reserved word in every SQL standard. Don't name your column "date", it leads to confusing syntax errors.
Complete working demo
CREATE TABLE ping (ping_id integer, the_date date);
CREATE OR REPLACE FUNCTION trg_ping_partition()
RETURNS trigger
LANGUAGE plpgsql SET client_min_messages = 'WARNING' AS
$func$
DECLARE
_schema text := 'public'; -- double-quoted if necessary
_tbl text := to_char(NEW.the_date, '"ping_"YYYY_DDD_') || NEW.ping_id;
BEGIN
EXECUTE format('CREATE TABLE IF NOT EXISTS %1$s.%2$s
(CHECK (the_date >= %3$L
AND the_date < %4$L)) INHERITS (%1$s.ping)'
, _schema -- %1$s
, _tbl -- %2$s -- legal(!) name needs no quotes
, to_char(NEW.the_date, 'YYYY-MM-DD') -- %3$L
, to_char(NEW.the_date + 1, 'YYYY-MM-DD') -- %4$L
);
EXECUTE 'INSERT INTO ' || _tbl || ' VALUES ($1.*)'
USING NEW;
RETURN NULL;
END
$func$;
CREATE TRIGGER insbef
BEFORE INSERT ON ping
FOR EACH ROW EXECUTE FUNCTION trg_ping_partition();
Postgres 9.1 added the clause IF NOT EXISTS for CREATE TABLE. See:
PostgreSQL create table if not exists
Postgres 11 added the more appropriate syntax variant EXECUTE FUNCTION for triggers. Use EXECUTE PROCEDURE in older versions.
See:
Trigger uses a procedure or a function?
to_char() can take a date as $1. That's converted to timestamp automatically. See:
The manual on date / time functions.
I SET client_min_messages = 'WARNING' for the scope of the function to silence the flood of notices that would otherwise be raised on conflict by IF NOT EXISTS.
Multiple other simplifications and improvements. Compare the code.
Tests:
INSERT INTO ping VALUES (1, now()::date);
INSERT INTO ping VALUES (2, now()::date);
INSERT INTO ping VALUES (2, now()::date + 1);
INSERT INTO ping VALUES (2, now()::date + 1);
fiddle
OLD sqlfiddle
Dynamic partitioning in PostgreSQL is just a bad idea. Your code is not safe in a multi-user environment. For it to be safe you would have to use locks, which slows down execution. The optimal number of partitions is about one hundred. You can easily create that many well in advance to dramatically simplify the logic necessary for partitioning.
You need to put your date literals in single quotes. Currently you are executing something like this:
CHECK ( date >= DATE 2011-10-25 AND date < DATE 2011-11-25 )
which is invalid. In this case 2011-10-25 is interpreted as 2011 minus 10 minus 25
Your code needs to create the SQL using single quotes around the date literal:
CHECK ( date >= DATE '2011-10-25' AND date < DATE '2011-11-25' )
I figured out the entirety and it works great, even have an auto-delete after 30 days. I hope this helps out future people looking for an autopartition trigger function.
CREATE FUNCTION ping_partition() RETURNS trigger
LANGUAGE plpgsql
AS $_$
DECLARE
_keepdate text;
_tablename text;
_startdate text;
_enddate text;
_result record;
BEGIN
_keepdate:=to_char(to_timestamp(NEW.date) - interval '30 days', 'YYYY-MM-DD');
_startdate := to_char(to_timestamp(NEW.date), 'YYYY-MM-DD');
_tablename:='pings_'||NEW.id||'_'||_startdate;
PERFORM 1
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'r'
AND c.relname = _tablename
AND n.nspname = 'pinglog';
IF NOT FOUND THEN
_enddate:=_startdate::timestamp + INTERVAL '1 day';
EXECUTE 'CREATE TABLE pinglog.' || quote_ident(_tablename) || ' (
CHECK ( date >= EXTRACT(EPOCH FROM DATE ' || quote_literal(_startdate) || ')
AND date < EXTRACT(EPOCH FROM DATE ' || quote_literal(_enddate) || ')
AND id = ' || quote_literal(NEW.id) || '
)
) INHERITS (pinglog.pings)';
EXECUTE 'CREATE INDEX ' || quote_ident(_tablename||'_indx1') || ' ON pinglog.' || quote_ident(_tablename) || ' USING btree (microseconds) WHERE microseconds IS NULL';
EXECUTE 'CREATE INDEX ' || quote_ident(_tablename||'_indx2') || ' ON pinglog.' || quote_ident(_tablename) || ' USING btree (date, id)';
EXECUTE 'CREATE INDEX ' || quote_ident(_tablename||'_indx3') || ' ON pinglog.' || quote_ident(_tablename) || ' USING btree (date, id, microseconds) WHERE microseconds IS NULL';
END IF;
EXECUTE 'INSERT INTO ' || quote_ident(_tablename) || ' VALUES ($1.*)' USING NEW;
FOR _result IN SELECT * FROM pg_tables WHERE schemaname='pinglog' LOOP
IF char_length(substring(_result.tablename from '[0-9-]*$')) <> 0 AND (to_timestamp(NEW.date) - interval '30 days') > to_timestamp(substring(_result.tablename from '[0-9-]*$'),'YYYY-MM-DD') THEN
-- RAISE EXCEPTION 'timestamp=%,table=%,found=%',to_timestamp(substring(_result.tablename from '[0-9-]*$'),'YYYY-MM-DD'),_result.tablename,char_length(substring(_result.tablename from '[0-9-]*$'));
-- could have it check for non-existant ids as well, or for archive bit and only delete if the archive bit is not set
EXECUTE 'DROP TABLE ' || quote_ident(_result.tablename);
END IF;
END LOOP;
RETURN NULL;
END;
$_$;