Running PostgreSQL stored procedures in SQL console - postgresql

I have following stored procedure
CREATE FUNCTION runMortalityModel(a_user_id integer) RETURNS integer AS $$
DECLARE
t1 RECORD;
t2 RECORD;
numberOfDeaths integer;
BEGIN
SELECT person.id personId, person.age, condprobmin, condprobmax, random() experiment
INTO t1
FROM person, mortality_cond_prob
WHERE (user_id = a_user_id) and
(person.age = mortality_cond_prob.age);
SELECT personId
INTO t2
FROM t1
WHERE (tmp.condprobmin <= experiment) and (experiment <= tmp.condprobmax);
SELECT COUNT(*)
INTO numberOfDeaths
FROM t2;
RAISE 'numberOfDeaths=%', numberOfDeaths;
EXECUTE
'DELETE '
|| 'FROM person '
|| 'WHERE person.id IN '
|| t2;
RETURN numberOfDeaths;
END
$$ LANGUAGE plpgsql;
When I try to run this stored procedure using
SELECT runMortalityModel(1);
I get the error Relation »t1« doesn't exist.
How can I fix it?
Update 1: Changed the stored procedure declaration to
CREATE OR REPLACE FUNCTION runMortalityModel(a_user_id integer) RETURNS integer AS $$
DECLARE
t1 RECORD;
t2 RECORD;
numberOfDeaths integer;
BEGIN
EXECUTE 'SELECT person.id personId, person.age, condprobmin, condprobmax, random() experiment '
|| 'FROM person, mortality_cond_prob '
|| 'WHERE (user_id = ' || a_user_id || ') and '
|| '(person.age = mortality_cond_prob.age)'
INTO t1;
EXECUTE 'SELECT personId '
|| 'FROM ' || t1
|| ' WHERE (tmp.condprobmin <= experiment) and (experiment <= tmp.condprobmax)'
INTO t2;
EXECUTE 'SELECT COUNT(*) '
|| 'FROM ' || t2
INTO numberOfDeaths;
RAISE 'numberOfDeaths=%', numberOfDeaths;
EXECUTE
'DELETE '
|| 'FROM person '
|| 'WHERE person.id IN '
|| t2;
RETURN numberOfDeaths;
END
$$ LANGUAGE plpgsql;

I see several issues with original code:
You're trying to use RECORD variable as a relation, you should do ... FROM (SELECT t1.*) s instead;
I see no point to select 1 record, then do a query on that record and then perform count(*), you will always have either 0 or 1 as a result.
You second version looks much better, go for it.

This one seems to work. If you have better ideas, please tell them.
CREATE FUNCTION runMortalityModel(a_user_id integer) RETURNS integer AS $$
DECLARE
t1 RECORD;
curRecord RECORD;
numberOfDeaths integer;
BEGIN
numberOfDeaths := 0;
FOR curRecord IN
SELECT person.id personId, condprobmin, condprobmax, random() experiment
FROM person, mortality_cond_prob
WHERE (user_id = a_user_id) and
(person.age = mortality_cond_prob.age)
LOOP
IF (curRecord.condprobmin <= curRecord.experiment) AND (curRecord.experiment <= curRecord.condprobmax) THEN
EXECUTE
'DELETE '
|| 'FROM person '
|| 'WHERE person.id = ' || curRecord.personId;
numberOfDeaths := numberOfDeaths + 1;
END IF;
END LOOP;
RETURN numberOfDeaths;
END
$$ LANGUAGE plpgsql;

Related

Update Null columns to Zero dynamically in Redshift

Here is the code in SAS, It finds the numeric columns with blank and replace with 0's
DATA dummy_table;
SET dummy_table;
ARRAY DUMMY _NUMERIC_;
DO OVER DUMMY;
IF DUMMY=. THEN DUMMY=0;
END;
RUN;
I am trying to replicate this in Redshift, here is what I tried
create or replace procedure sp_replace_null_to_zero(IN tbl_nm varchar) as $$
Begin
Execute 'declare ' ||
'tot_cnt int := (select count(*) from information_schema.columns where table_name = ' || tbl_nm || ');' ||
'init_loop int := 0; ' ||
'cn_nm varchar; '
Begin
While init_loop <= tot_cnt
Loop
Raise info 'init_loop = %', Init_loop;
Raise info 'tot_cnt = %', tot_cnt;
Execute 'Select column_name into cn_nm from information_schema.columns ' ||
'where table_name ='|| tbl_nm || ' and ordinal_position = init_loop ' ||
'and data_type not in (''character varying'',''date'',''text''); '
Raise info 'cn_nm = %', cn_nm;
if cn_nm is not null then
Execute 'Update ' || tbl_nm ||
'Set ' || cn_nm = 0 ||
'Where ' || cn_nm is null or cn_nm =' ';
end if;
init_loop = init_loop + 1;
end loop;
End;
End;
$$ language plpgsql;
Issues I am facing
When I pass the Input parameter here, I am getting 0 count
tot_cnt int := (select count(*) from information_schema.columns where table_name = ' || tbl_nm || ');'
For testing purpose I tried hardcode the table name inside proc, I am getting the error amazon invalid operation: value for domain information_schema.cardinal_number violates check constraint "cardinal_number_domain_check"
Is this even possible in redshift, How can I do this logic or any other workaround.
Need Expertise advise here!!
You can simply run an UPDATE over the table(s) using the NVL(cn_nm,0) function
UPDATE tbl_raw
SET col2 = NVL(col2,0);
However UPDATE is a fairly expensive operation. Consider just using a view over your table that wraps the columns in NVL(cn_nm,0)
CREATE VIEW tbl_clean
AS
SELECT col1
, NVL(col2,0) col2
FROM tbl_raw;

How to use a Function Parameter in a Cursor that's incorporated with Dynamic SQL in Postgres Functions?

Created this Postgres Function which is working fine, but the actual requirement is to pass the input parameter in the function to the Cursor which uses the dynamic SQL as follows,
The below is the Function
CREATE OR REPLACE FUNCTION ssp2_pcat.find_shift_dates (date_to_find date)
RETURNS void
LANGUAGE 'plpgsql'
COST 100
VOLATILE
AS $BODY$
DECLARE
C1 CURSOR FOR
SELECT TABLE_NAME, 'SELECT COUNT(*) FROM ' || TABLE_NAME || ' WHERE ' ||
COLUMN_NAME || ' = '||
'CASE WHEN ' || COLUMN_NAME || ' LIKE ' || '''%START%'''||' THEN
date_to_find ELSE date_to_find-1 END;' SQL_TEXT
FROM (
SELECT TABLE_NAME, COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME IN (SELECT TABLE_NAME FROM RESET_DATES WHERE RESET_IT =
'Y') AND
UPPER(DATA_TYPE) = 'DATE'
AND (COLUMN_NAME LIKE '%START%' OR COLUMN_NAME LIKE '%END%')
AND (COLUMN_NAME NOT LIKE '%TEST%'
AND COLUMN_NAME NOT LIKE '%PCAT%'
AND COLUMN_NAME NOT LIKE '%ORDER%'
AND COLUMN_NAME NOT LIKE '%SEASON%'
AND COLUMN_NAME NOT LIKE '%_AT')
ORDER BY 1, 2) A;
END_COUNT INTEGER := 0;
START_COUNT INTEGER := 0;
TABLENAME VARCHAR(32) := 'ALFU';
l_start TIMESTAMP;
l_end TIMESTAMP;
Time_Taken VARCHAR(20);
BEGIN
l_start := clock_timestamp();
DELETE FROM SHIFT_DATES_COUNT;
FOR I IN C1 LOOP
IF I.TABLE_NAME <> TABLENAME THEN
INSERT INTO SHIFT_DATES_COUNT VALUES (TABLENAME, START_COUNT,
END_COUNT, current_timestamp::timestamp(0));
TABLENAME := I.TABLE_NAME;
END_COUNT := 0;
START_COUNT := 0;
END IF;
IF STRPOS(I.SQL_TEXT, 'END') > 0 THEN
EXECUTE I.SQL_TEXT INTO END_COUNT;
RAISE NOTICE '% ', ('END: ' || I.SQL_TEXT);
ELSE
EXECUTE I.SQL_TEXT INTO START_COUNT;
RAISE NOTICE '% ', ('START: ' || I.SQL_TEXT);
END IF;
END LOOP;
INSERT INTO SHIFT_DATES_COUNT VALUES (TABLENAME, START_COUNT, END_COUNT,
current_timestamp::timestamp(0));
RAISE NOTICE '% ', ('INSERT INTO SHIFT_DATES_COUNT Done...');
l_end := clock_timestamp();
Time_Taken := (l_end-l_start);
RAISE NOTICE '% ', ('FIND_SHIFT_DATES Took: ' || Time_Taken );
END;
$BODY$;
Please let me know how can I use the date_to_find input parameter in the Dynamic SQL in the Cursor in the above Function.
You can use unbound cursor, clause fetch to get data from cursor, and exit when not found to finish, like:
CREATE OR REPLACE FUNCTION example (p_name text) RETURNS void LANGUAGE 'plpgsql' AS $$
DECLARE
C1 refcursor;
res record;
BEGIN
OPEN c1 FOR EXECUTE 'SELECT * FROM pg_database WHERE datname like ''%'||p_name||'%''';
LOOP
FETCH c1 INTO res;
EXIT WHEN not found;
raise notice 'value datname: %',res.datname;
END LOOP;
CLOSE c1;
RETURN;
END; $$;
--in my case
select example ('test')
NOTICE: value datname: test
NOTICE: value datname: test_msmov
NOTICE: value datname: test_resources
NOTICE: value datname: test_load_table
NOTICE: value datname: test_resources2
Total query runtime: 63 msec
1 row retrieved.
You can use EXECUTE clause for open cursor, see the documentation of PostgreSQL
https://www.postgresql.org/docs/10/plpgsql-cursors.html#PLPGSQL-CURSOR-OPENING
Example:
OPEN curs1 FOR EXECUTE format('SELECT * FROM %I WHERE col1 = $1',tabname) USING keyvalue;

Reusing a declared variable in postgres function

I am writing a postgresql function and my construct is as follows:
CREATE OR REPLACE FUNCTION function_name (argument_list) RETURNS INTEGER []
AS $$
DECLARE
--along with other declarations
_tablename text;
BEGIN
-- dynamically construct the intermediate _tablename which gets
-- populated
-- Now I want to use this _tablename in other queries like :
-- use it in the select from _tablename loop
-- construct array by selecting a column from this table
-- and return that array
END
How should I do this? I want to reuse the declared variable name in my further queries in the function.
My complete postgres function is as follows:
DROP FUNCTION get_value_histogram(BIGINT,BIGINT,BIGINT,INTEGER);
CREATE OR REPLACE FUNCTION get_value_histogram(customer_id BIGINT,
start_time BIGINT, end_time BIGINT, bucket_size INTEGER)
RETURNS INTEGER[] AS
$$
DECLARE
_tablename text;
_curr_timestamp BIGINT;
_var1 text;
_min_value INTEGER;
_max_value INTEGER;
_return_array INTEGER[];
BEGIN
-- create an intermediate table with the aggregation of the
-- required values. These values then will be passed to the
-- Histogram function.
_var1 := EXTRACT (EPOCH FROM now());
_var1 := replace(_var1, '.','_');
_tablename := 'thing_data_' || _var1;
EXECUTE 'CREATE TABLE ' || _tablename || ' (t_stamp BIGINT, sum_of_values INTEGER)';
--insert all the values in this intermediate table
EXECUTE ' INSERT INTO ' || _tablename || ' ( select t_stamp , sum(data) from thing_data td, collector_tb ct where td.thingname =
ct.collector_name and td.t_stamp BETWEEN ' || quote_literal(start_time) || ' AND ' || quote_literal(end_time) || ' and
ct.type like ' || quote_literal('%outlet%') ||' AND customer_id = ' || customer_id || ' GROUP BY t_stamp)' ;
EXECUTE 'select width_bucket(sum_of_values,500, 1000 , 100), count(*) as cnt from ' || _tablename || ' GROUP BY 1 ORDER BY 1' ;
_return_array := array (select cnt from (select width_bucket(sum_of_values,500, 1000 , 100), count(*) as cnt from _tablename GROUP BY 1 ORDER BY 1));
EXECUTE 'DROP TABLE ' || _tablename;
RETURN _return_array;
END $$ LANGUAGE plpgsql;
When I run this, I get an error saying relation "_tablename" does not exist
just replace :
_return_array := array (select cnt from (select width_bucket(sum_of_values,500, 1000 , 100), count(*) as cnt from _tablename GROUP BY 1 ORDER BY 1) a);
by :
EXECUTE 'select array (select cnt from (select width_bucket(sum_of_values,500, 1000 , 100), count(*) as cnt from '|| _tablename ||' GROUP BY 1 ORDER BY 1) a)' into _return_array;
I assume the error is in the last part:
_return_array := array (select cnt from (select width_bucket(sum_of_values,500, 1000 , 100), count(*) as cnt
from _tablename GROUP BY 1 ORDER BY 1));
Here you're using _tablename as an actual literal table name and not as a variable.

How to return a newly created id from a Postgres function?

I am working on creating partitions for a table in Postgres and have the following function:
CREATE OR REPLACE FUNCTION create_partition_and_insert() RETURNS trigger AS
$BODY$
DECLARE
partition_date TEXT;
partition TEXT;
BEGIN
partition_date := to_char(NEW.date,'YYYY_MM_DD');
partition := TG_RELNAME || '_' || partition_date;
IF NOT EXISTS(SELECT relname FROM pg_class WHERE relname=partition) THEN
RAISE NOTICE 'A partition has been created %',partition;
EXECUTE 'CREATE TABLE ' || partition || ' (check (date = ''' || NEW.date || ''')) INHERITS (' || TG_RELNAME || ');';
END IF;
EXECUTE 'INSERT INTO ' || partition || ' SELECT(' || TG_RELNAME || ' ' || quote_literal(NEW) || ').*;';
RETURN NULL;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
I am using this function with a trigger:
CREATE TRIGGER testing_partition_insert_trigger
BEFORE INSERT ON testing_partition
FOR EACH ROW EXECUTE PROCEDURE create_partition_and_insert();
The table for testing:
CREATE TABLE testing_partition(patent_id BIGINT, date DATE) WITH ( OIDS=FALSE);
Is there a way to return the patent_id from the function? Right now it returns null. I was trying to extend the function with:
RETURNING patent_id INTO newid;
It seem Postgres does not support returning values this way:
ERROR: syntax error at or near "INTO"
LINE 1: ...rtition '(111,2018-01-11)').* RETURNING patent_id INTO newid...
^
QUERY: INSERT INTO testing_partition_2018_01_11 SELECT(testing_partition '(111,2018-01-11)').* RETURNING patent_id INTO newid;
CONTEXT: PL/pgSQL function create_partition_and_insert() line 12 at EXECUTE statement
The solution is to add RETURNING patent_id without INTO at the and of the INSERT and do the same thing when issuing the actual INSERT.
CREATE OR REPLACE FUNCTION create_partition_and_insert() RETURNS trigger AS
$BODY$
DECLARE
partition_date TEXT;
partition TEXT;
BEGIN
partition_date := to_char(NEW.date,'YYYY_MM_DD');
partition := TG_RELNAME || '_' || partition_date;
IF NOT EXISTS(SELECT relname FROM pg_class WHERE relname=partition) THEN
RAISE NOTICE 'A partition has been created %',partition;
EXECUTE 'CREATE TABLE ' || partition || ' (check (date = ''' || NEW.date || ''')) INHERITS (' || TG_RELNAME || ');';
END IF;
EXECUTE 'INSERT INTO ' || partition || ' SELECT(' || TG_RELNAME || ' ' || quote_literal(NEW) || ').* RETURNING patent_id;';
RETURN NULL;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
Using the extended function works the following way:
# insert into testing_partition values (1, '2011-01-11') returning patent_id ;
patent_id
-----------
1
(1 row)

Postgresql function return multiple select statements

Can any one of you tell me how to approach this:
CREATE OR REPLACE FUNCTION name()
RETURNS ????? AS
$func$
BEGIN
SELECT * FROM tbl_a a;
SELECT * FROM tbl_b b;
END
$func$ LANGUAGE plpgsql;
Both tables have different structures.
You can use cursors but I can hardly imagine why you need such a function.
CREATE OR REPLACE FUNCTION my_multiselect(refcursor, refcursor) RETURNS VOID AS
$func$
BEGIN
OPEN $1 FOR SELECT * FROM information_schema.routines;
OPEN $2 FOR SELECT * FROM information_schema.sequences;
END
$func$ LANGUAGE plpgsql;
BEGIN;
SELECT my_multiselect('first_cursor_to_routines', 'second_cursor_to_sequences');
FETCH ALL IN first_cursor_to_routines;
FETCH ALL IN second_cursor_to_sequences;
COMMIT;
I'm not really sure what you're doing with this, but it sounds like you just want to return a union of these distinct result sets. You can do this with a dynamic query. I'm using Postgres 9.4.
CREATE OR REPLACE FUNCTION make_query(IN p_tables text[])
RETURNS void AS
$BODY$
DECLARE
v_qry text;
v_cols text;
v_types text;
v_as text;
BEGIN
EXECUTE format('
WITH sub AS (
SELECT
table_name,
column_name,
data_type
FROM
information_schema.columns
WHERE
table_name = ANY(%L)
ORDER BY
table_name,
ordinal_position)
,sub2 AS(
SELECT
DISTINCT ON (column_name, data_type)
column_name || '' '' || data_type AS def
FROM
sub
)
SELECT
string_agg(def, '','')
FROM
sub2;
',
p_tables
) INTO v_types;
v_qry := '
CREATE OR REPLACE FUNCTION name()
RETURNS TABLE(' || v_types || ') AS
$func$';
FOR i IN 1..array_upper(p_tables, 1)
LOOP
v_as := 'tbl' || i;
EXECUTE format('
WITH sub AS (
SELECT
table_name,
column_name,
data_type
FROM
information_schema.columns
WHERE
table_name = ANY(%L)
ORDER BY
table_name,
ordinal_position)
,sub2 AS(
SELECT
DISTINCT ON (column_name, data_type)
CASE WHEN table_name = ''%I''
THEN %L || ''.'' || column_name
ELSE ''NULL::'' || data_type
END AS cols
FROM
sub
)
SELECT
string_agg(cols, '','')
FROM
sub2;
',
p_tables,
p_tables[i],
v_as
) INTO v_cols;
IF i > 1 THEN
v_qry := v_qry || '
UNION ALL';
END IF;
v_qry := v_qry || '
SELECT ' || v_cols || ' FROM ' || p_tables[i] || ' AS ' || v_as;
IF i = array_upper(p_tables, 1) THEN
v_qry := v_qry || ';';
END IF;
END LOOP;
v_qry := v_qry || '
$func$ LANGUAGE sql;
';
EXECUTE v_qry;
END;
$BODY$
LANGUAGE plpgsql VOLATILE;
Sorry it looks ugly here, but this formatting helps the final product look nicer. If you're shy about executing a dynamic query like this off the bat, just replace EXECUTE v_qry; with RAISE INFO 'v_qry: %', v_qry; and it will simply print the dynamic query out in a message without executing it, so you can review what it will do once executed.
Then execute make_query() with a list of tables you want to display like this:
SELECT make_query(ARRAY['tbl_a', 'tbl_b']);
The result is that you will now have a function called name() which you can call in order to see the results of both tables at the same time, with all the union details already sorted out:
SELECT * FROM name();