create table in postgresql function not work - postgresql

I am a newbie to postgresql and now trying to create table in postgresql function. Though the function can be successfully created without raising any error, it always throw errors to say the table I created does not exist while the function trys to insert data into this table.
I am using pgadmin to create and test my function.
my code:
create or replace function f_produceMultiroleWorkload(sourceTable text, targetTable text) returns integer as $$
declare
mysql text;
record_cnt integer;
begin
record_cnt=0;
create temp table t_staff_job_division( staff_num varchar(30), cate_division varchar(30));
mysql:='insert into t_staff_job_division select staff_num, cate_division from (select staff_num, cate_division from bpc."' || $1 || '" group by 1,2) t1 where staff_num in (select distinct staff_num from bpc."' || $1 || '" where cate_division<>staff_division) and staff_num not in (select distinct staff_num from (select staff_num, count(distinct cate_division) as division_cnt from bpc."'|| $1 ||'" group by 1) t2 where division_cnt=1)' ;
execute mysql;
EXECUTE format(
'
CREATE TABLE IF NOT EXISTS %I.%I (
staff_num varchar(30) PRIMARY KEY,
cate_division varchar(30),
score numeric(18,7)
);
',
'bpc', $2
);
mysql:='insert into bpc.' || $2 ||' select t1.staff_num, t1.cate_division, sum(normalized_individual_gross_score) from bpc."' || $1 || '" t1 inner join t_staff_job_division t2 on t1.staff_num=t2.staff_num and t1.cate_division=t2.cate_division group by 1,2';
execute mysql;
if exists(select count(*) from bpc."' || $2 || '") then
mysql:='select count(*) from bpc."' || $2 || '"';
execute mysql into record_cnt;
else
record_cnt=0;
end if;
return record_cnt;
end;
$$ language plpgsql;
The error throws when function execute
mysql:='insert into bpc.' || $2 ||' select t1.staff_num, t1.cate_division, sum(normalized_individual_gross_score) from bpc."' || $1 || '" t1 inner join t_staff_job_division t2 on t1.staff_num=t2.staff_num and t1.cate_division=t2.cate_division group by 1,2';
Sine the prompt is in Chinese, it is no sense to publish it here. But generally speaking, it says the table named by bpc.$2 does not exist.
I will appreciate any help to assist me to solve this problem.
thanks in advance.

You are a victim of (accidental) SQL injection.
The second function argument (targettable) probably looks like this: CamelCase.
Now the CREATE TABLE statement, which is correctly constructed using format, looks like this:
CREATE TABLE bpc."CamelCase" ...
while the incorrectly constructed INSERT statement looks like
INSERT INTO bpc.CamelCase ...
Now SQL identifiers are folded to lower case in PostgreSQL unless they are (double) quoted, so the second statement will try to insert into bpc.camelcase. But table names are case sensitive, so that fails.
Recommendations:
Always use format to avoid SQL injection.
Avoid using anything but lower case ASCII letters, digits and _ in identifiers.
Unrelated, but the IF EXISTS in your function will also fail. You need dynamic SQL there too.

It seems that you need to make sure that your uppercase/lowercase naming is consistent. Here, you potentially run into a problem with uppercase/lowercase because $2 is inconsistently wrapped in double-quotes (").
Notice:
EXECUTE format(
'
CREATE TABLE IF NOT EXISTS %I.%I (
staff_num varchar(30) PRIMARY KEY,
cate_division varchar(30),
score numeric(18,7)
);
',
'bpc', $2
);
If $2 is "myTable", then the table will be created as bpc.mytable because when table/column names are not wrapped in double-quotes, PostgreSQL will automatically convert to lowercase. However, for this:
mysql:='select count(*) from bpc."' || $2 || '"';
The table name will get interpreted as bpc."myTable" and because it is in double-quotes, the uppercase T will be preserved. In Postgres mytable != "myTable", so you will get an error saying that bpc."myTable" does not exist.
Please consider using this format (notice double quotes):
EXECUTE format(
'
CREATE TABLE IF NOT EXISTS %I."%I" (
staff_num varchar(30) PRIMARY KEY,
cate_division varchar(30),
score numeric(18,7)
);
',
'bpc', $2
);

Related

Getting A "Could Not Open Relation" Error On Simple Query

I have a function that creates a set of INSERT INTO ... VALUES scripts. If I uncomment the dvp.content line, the function fails with an "ERROR: could not open relation with OID ###", which refers to the temp table. The content column is a jsonb type. Not sure where to begin?
CREATE OR REPLACE FUNCTION export_docs_as_sql(doc_list uuid[], to_org_id uuid)
RETURNS table(id integer, sql text)
AS $$
BEGIN
...
-- use a temp table to gather all INSERT statements
CREATE TEMP TABLE IF NOT EXISTS doc_data_export(
id serial PRIMARY KEY,
sql text
);
...
-- get doc_version_pages
INSERT INTO doc_data_export(sql)
SELECT 'INSERT INTO doc_version_pages(id, doc_version_id, persona_id, care_category_id, patient_group_id, title, content, created_at, updated_at, is_guide, is_root) VALUES (' ||
quote_literal(dvp.id::TEXT) || ', ' ||
quote_literal(dvp.doc_version_id::TEXT) || ', ' ||
CASE WHEN p.name IS NOT NULL THEN '(SELECT px.id FROM personas px WHERE px.org_id = ' || quote_literal(dv.id::TEXT) || ' AND px.name = ' || quote_literal(p.name) || '), ' ELSE 'NULL, ' END ||
CASE WHEN c.name IS NOT NULL THEN '(SELECT cx.id FROM care_categories cx WHERE cx.org_id = ' || quote_literal(to_org_id) || ' AND cx.name = ' || quote_literal(c.name) || '), ' ELSE 'NULL, ' END ||
CASE WHEN g.name IS NOT NULL THEN '(SELECT gx.id FROM patient_groups gx WHERE gx.org_id = ' || quote_literal(to_org_id) || ' AND gx.name = ' || quote_literal(g.name) || '), ' ELSE 'NULL, ' END ||
quote_literal(dvp.title::TEXT) || ', ' ||
--dvp.content || ', ' ||
quote_literal(dvp.created_at::TEXT) || ', ' ||
quote_literal(now()::timestamp) || ', ' ||
quote_literal(dvp.is_guide::TEXT) || ', ' ||
quote_literal(dvp.is_root::TEXT) || ');'
FROM unnest(doc_list) l
INNER JOIN doc_versions dv ON l = dv.doc_id
INNER JOIN doc_version_pages dvp ON dv.id = dvp.doc_version_id
LEFT JOIN personas p ON dvp.persona_id = p.id
LEFT JOIN care_categories c ON dvp.care_category_id = c.id
LEFT JOIN patient_groups g ON dvp.patient_group_id = g.id;
...
-- output all inserts
RETURN QUERY SELECT * FROM doc_data_export;
-- drop temp table
DROP TABLE doc_data_export;
END;
$$ LANGUAGE plpgsql;
The "Could Not Open Relation" problem is occurring due to the bug described here, which remains an issue as of Postgres 14.0:
What seems to be happening is that if the strings are large enough to be
toasted, then the data returned out of the function with RETURN QUERY
contains toast pointers referencing the temp table's toast table.
If you drop the temp table then those pointers will fail upon use.
To explain further, when a column value is greater than the TOAST_TUPLE_THRESHOLD configuration parameter (usually 2KB) and cannot be compressed or when the column is configured with a storage parameter of EXTERNAL, the value will be broken down into chunks and stored in a special secondary table called a TOAST table. This table will be stored in the pg_toast schema and will be named like pg_toast.pg_toast_<table OID>.
So when you add dvp.content to the sql statement you insert that into doc_data_export, some of these values are larger than the aforementioned constraints and are thus TOASTed. Your RETURN QUERY is only sending the pointers to the values in the toast table. After the return is done, the temporary table and its corresponding TOAST table is dropped. Thus when the outer query attempts to materialize the results, it can't find the TOAST table that these pointers reference - hence the cryptic error message you see.
You can avoid sending TOAST pointers for the temporary table -and thus safely DROP it after the RETURN QUERY -by performing an operation on the sql column that returns the same value:
RETURN QUERY SELECT id, sql || '' FROM doc_data_export;
The simple function below will reproduce a minimal example of the TOAST bug when you set fail to true and demonstrate the successful workaround when you set fail to false.
DROP FUNCTION IF EXISTS buttered_toast(boolean);
CREATE OR REPLACE FUNCTION buttered_toast(fail boolean)
RETURNS table(id integer, enormous_data text)
AS $$
BEGIN
CREATE TEMPORARY TABLE tbl_with_toasts (
id integer PRIMARY KEY,
enormous_data text
) ON COMMIT DROP;
--generate a giant string that is sure to generate a TOAST table.
INSERT INTO tbl_with_toasts(id,enormous_data) SELECT 1, string_agg(gen_random_uuid()::text,'-') FROM generate_series(1,10000) as ints(int);
IF buttered_toast.fail THEN
-- will return pointers to tbl_with_toast's TOAST table for the "enormous_data" column.
RETURN QUERY SELECT tbl_with_toasts.id, tbl_with_toasts.enormous_data FROM tbl_with_toasts ;
ELSE
-- will generate and return new values for the "enormous_data" column
RETURN QUERY SELECT tbl_with_toasts.id, tbl_with_toasts.enormous_data || '' FROM tbl_with_toasts ;
END IF;
DROP TABLE tbl_with_toasts;
END;
$$ LANGUAGE plpgsql;
-- fails with "Could Not Open Relation"
select * from buttered_toast(true)
--succeeds
select * from buttered_toast(false);

postgres lag and window to create cohort table [duplicate]

I am trying to create crosstab queries in PostgreSQL such that it automatically generates the crosstab columns instead of hardcoding it. I have written a function that dynamically generates the column list that I need for my crosstab query. The idea is to substitute the result of this function in the crosstab query using dynamic sql.
I know how to do this easily in SQL Server, but my limited knowledge of PostgreSQL is hindering my progress here. I was thinking of storing the result of function that generates the dynamic list of columns into a variable and use that to dynamically build the sql query. It would be great if someone could guide me regarding the same.
-- Table which has be pivoted
CREATE TABLE test_db
(
kernel_id int,
key int,
value int
);
INSERT INTO test_db VALUES
(1,1,99),
(1,2,78),
(2,1,66),
(3,1,44),
(3,2,55),
(3,3,89);
-- This function dynamically returns the list of columns for crosstab
CREATE FUNCTION test() RETURNS TEXT AS '
DECLARE
key_id int;
text_op TEXT = '' kernel_id int, '';
BEGIN
FOR key_id IN SELECT DISTINCT key FROM test_db ORDER BY key LOOP
text_op := text_op || key_id || '' int , '' ;
END LOOP;
text_op := text_op || '' DUMMY text'';
RETURN text_op;
END;
' LANGUAGE 'plpgsql';
-- This query works. I just need to convert the static list
-- of crosstab columns to be generated dynamically.
SELECT * FROM
crosstab
(
'SELECT kernel_id, key, value FROM test_db ORDER BY 1,2',
'SELECT DISTINCT key FROM test_db ORDER BY 1'
)
AS x (kernel_id int, key1 int, key2 int, key3 int); -- How can I replace ..
-- .. this static list with a dynamically generated list of columns ?
You can use the provided C function crosstab_hash for this.
The manual is not very clear in this respect. It's mentioned at the end of the chapter on crosstab() with two parameters:
You can create predefined functions to avoid having to write out the
result column names and types in each query. See the examples in the
previous section. The underlying C function for this form of crosstab
is named crosstab_hash.
For your example:
CREATE OR REPLACE FUNCTION f_cross_test_db(text, text)
RETURNS TABLE (kernel_id int, key1 int, key2 int, key3 int)
AS '$libdir/tablefunc','crosstab_hash' LANGUAGE C STABLE STRICT;
Call:
SELECT * FROM f_cross_test_db(
'SELECT kernel_id, key, value FROM test_db ORDER BY 1,2'
,'SELECT DISTINCT key FROM test_db ORDER BY 1');
Note that you need to create a distinct crosstab_hash function for every crosstab function with a different return type.
Related:
PostgreSQL row to columns
Your function to generate the column list is rather convoluted, the result is incorrect (int missing after kernel_id), it can be replaced with this SQL query:
SELECT 'kernel_id int, '
|| string_agg(DISTINCT key::text, ' int, ' ORDER BY key::text)
|| ' int, DUMMY text'
FROM test_db;
And it cannot be used dynamically anyway.
#erwin-brandstetter: The return type of the function isn't an issue if you're always returning a JSON type with the converted results.
Here is the function I came up with:
CREATE OR REPLACE FUNCTION report.test(
i_start_date TIMESTAMPTZ,
i_end_date TIMESTAMPTZ,
i_interval INT
) RETURNS TABLE (
tab JSON
) AS $ab$
DECLARE
_key_id TEXT;
_text_op TEXT = '';
_ret JSON;
BEGIN
-- SELECT DISTINCT for query results
FOR _key_id IN
SELECT DISTINCT at_name
FROM report.company_data_date cd
JOIN report.company_data_amount cda ON cd.id = cda.company_data_date_id
JOIN report.amount_types at ON cda.amount_type_id = at.id
WHERE date_start BETWEEN i_start_date AND i_end_date
AND interval_type_id = i_interval
LOOP
-- build function_call with datatype of column
IF char_length(_text_op) > 1 THEN
_text_op := _text_op || ', ' || _key_id || ' NUMERIC(20,2)';
ELSE
_text_op := _text_op || _key_id || ' NUMERIC(20,2)';
END IF;
END LOOP;
-- build query with parameter filters
RETURN QUERY
EXECUTE '
SELECT array_to_json(array_agg(row_to_json(t)))
FROM (
SELECT * FROM crosstab(''SELECT date_start, at.at_name, cda.amount ct
FROM report.company_data_date cd
JOIN report.company_data_amount cda ON cd.id = cda.company_data_date_id
JOIN report.amount_types at ON cda.amount_type_id = at.id
WHERE date_start between $$' || i_start_date::TEXT || '$$ AND $$' || i_end_date::TEXT || '$$
AND interval_type_id = ' || i_interval::TEXT || ' ORDER BY date_start'')
AS ct (date_start timestamptz, ' || _text_op || ')
) t;';
END;
$ab$ LANGUAGE 'plpgsql';
So, when you run it, you get the dynamic results in JSON, and you don't need to know how many values were pivoted:
select * from report.test(now()- '1 week'::interval, now(), 1);
tab
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[{"date_start":"2015-07-27T08:40:01.277556-04:00","burn_rate":0.00,"monthly_revenue":5800.00,"cash_balance":0.00},{"date_start":"2015-07-27T08:50:02.458868-04:00","burn_rate":34000.00,"monthly_revenue":15800.00,"cash_balance":24000.00}]
(1 row)
Edit: If you have mixed datatypes in your crosstab, you can add logic to look it up for each column with something like this:
SELECT a.attname as column_name, format_type(a.atttypid, a.atttypmod) AS data_type
FROM pg_attribute a
JOIN pg_class b ON (a.attrelid = b.relfilenode)
JOIN pg_catalog.pg_namespace n ON n.oid = b.relnamespace
WHERE n.nspname = $$schema_name$$ AND b.relname = $$table_name$$ and a.attstattarget = -1;"
I realise this is an older post but struggled for a little while on the same issue.
My Problem Statement:
I had a table with muliple values in a field and wanted to create a crosstab query with 40+ column headings per row.
My Solution was to create a function which looped through the table column to grab values that I wanted to use as column headings within the crosstab query.
Within this function I could then Create the crosstab query. In my use case I added this crosstab result into a separate table.
E.g.
CREATE OR REPLACE FUNCTION field_values_ct ()
RETURNS VOID AS $$
DECLARE rec RECORD;
DECLARE str text;
BEGIN
str := '"Issue ID" text,';
-- looping to get column heading string
FOR rec IN SELECT DISTINCT field_name
FROM issue_fields
ORDER BY field_name
LOOP
str := str || '"' || rec.field_name || '" text' ||',';
END LOOP;
str:= substring(str, 0, length(str));
EXECUTE 'CREATE EXTENSION IF NOT EXISTS tablefunc;
DROP TABLE IF EXISTS temp_issue_fields;
CREATE TABLE temp_issue_fields AS
SELECT *
FROM crosstab(''select issue_id, field_name, field_value from issue_fields order by 1'',
''SELECT DISTINCT field_name FROM issue_fields ORDER BY 1'')
AS final_result ('|| str ||')';
END;
$$ LANGUAGE plpgsql;
The approach described here worked well for me.
Instead of retrieving the pivot table directly. The easier approach is to let the function generate a SQL query string. Dynamically execute the resulting SQL query string on demand.

Performance of joining on multiple columns with potential NULL values

Lets say we have the following table
CREATE TABLE my_table
(
record_id SERIAL,
column_1 INTEGER,
column_2 INTEGER,
column_3 INTEGER,
price NUMERIC
);
With the following data
INSERT INTO my_table (column_1, column_2, column_3, price) VALUES
(1, NULL, 1, 54.99),
(1, NULL, 1, 69.50),
(NULL, 2, 2, 54.99),
(NULL, 2, 2, 69.50),
(3, 3, NULL, 54.99),
(3, 3, NULL, 69.50);
Now we do something like
CREATE TABLE my_table_aggregations AS
SELECT
ROW_NUMBER() OVER () AS aggregation_id,
column_1,
column_2,
column_3
FROM my_table
GROUP BY
column_1,
column_2,
column_3;
What I want to do now is assign an aggregation_id to each record_id in my_table. Now because I have NULL values I cant simply join by t1.column_1 = t2.column_1 because NULL = NULL is NULL and so the join will exclude these records.
Now I know that I should use something like this
SELECT
t.record_id,
agg.aggregation_id
FROM my_table t
JOIN my_table_aggregations agg ON
(
((t.column_1 IS NULL AND agg.column_1 IS NULL) OR t.column_1 = agg.column_1) AND
((t.column_2 IS NULL AND agg.column_2 IS NULL) OR t.column_2 = agg.column_2) AND
((t.column_3 IS NULL AND agg.column_3 IS NULL) OR t.column_3 = agg.column_3)
);
The problem here is that I am dealing with hundreds of millions of records and having an OR in the join seems to take forever to run.
There is an alternative, which is something like this
SELECT
t.record_id,
agg.aggregation_id
FROM my_table t
JOIN my_table_aggregations agg ON
(
COALESCE(t.column_1, -1) = COALESCE(agg.column_1, -1) AND
COALESCE(t.column_2, -1) = COALESCE(agg.column_2, -1) AND
COALESCE(t.column_3, -1) = COALESCE(agg.column_3, -1)
);
But the problem with this is that I am assuming there is no value in any of those columns which is -1.
Do note, this is an example which I am well aware I can use DENSE_RANK to get the same result. So lets pretend that this isn't an option.
Is there some crazy awesome way to get around having to use COALESCE but keeping the performance it has over using the correct way of the OR? I run tests, and the COALESCE is over 10 times faster than the OR.
I am running this on a Greenplum database so I am not sure if this performance difference is the same on a standard Postgres database.
Since my solution with NULLIF had performance problems, and your use of COALESCE was much faster, I wonder if you could try tweaking that solution to deal with the issue of -1. To do that, you could try casting to avoid false matches. I'm not sure what the performance hit would be, but it would look like:
SELECT
t.record_id,
agg.aggregation_id
FROM my_table t
JOIN my_table_aggregations agg ON
(
COALESCE(cast(t.column_1 as varchar), 'NA') =
COALESCE(cast(agg.column_1 as varchar), 'NA') AND
COALESCE(cast(t.column_2 as varchar), 'NA') =
COALESCE(cast(agg.column_2 as varchar), 'NA') AND
COALESCE(cast(t.column_3 as varchar), 'NA') =
COALESCE(cast(agg.column_3 as varchar), 'NA')
);
After doing some thinking, I decided the best approach this this is to dynamically find a value for each column that can be used as the second param in a COALESCE join. The function is rather long, but it does what I need and more importantly, this way keeps the COALESCE performance, the only down side is getting the MIN values is an additional time cost, but we are talking a minute.
Here is the function:
CREATE OR REPLACE FUNCTION pg_temp.get_null_join_int_value
(
left_table_schema TEXT,
left_table_name TEXT,
left_table_columns TEXT[],
right_table_schema TEXT,
right_table_name TEXT,
right_table_columns TEXT[],
output_table_schema TEXT,
output_table_name TEXT
) RETURNS TEXT AS
$$
DECLARE
colum_name TEXT;
sql TEXT;
complete_sql TEXT;
full_left_table TEXT;
full_right_table TEXT;
full_output_table TEXT;
BEGIN
/*****************************
VALIDATE PARAMS
******************************/
-- this section validates all of the function parameters ensuring that the values that cannot be NULL are not so
-- also checks for empty arrays which is not allowed and then ensures both arrays are of the same length
IF (left_table_name IS NULL) THEN
RAISE EXCEPTION 'left_table_name cannot be NULL';
ELSIF (left_table_columns IS NULL) THEN
RAISE EXCEPTION 'left_table_columns cannot be NULL';
ELSIF (right_table_name IS NULL) THEN
RAISE EXCEPTION 'right_table_name cannot be NULL';
ELSIF (right_table_columns IS NULL) THEN
RAISE EXCEPTION 'right_table_columns cannot be NULL';
ELSIF (output_table_name IS NULL) THEN
RAISE EXCEPTION 'output_table_name cannot be NULL';
ELSIF (array_upper(left_table_columns, 1) IS NULL) THEN
RAISE EXCEPTION 'left_table_columns cannot be an empty array';
ELSIF (array_upper(right_table_columns, 1) IS NULL) THEN
RAISE EXCEPTION 'right_table_columns cannot be an empty array';
ELSIF (array_upper(left_table_columns, 1) <> array_upper(right_table_columns, 1)) THEN
RAISE EXCEPTION 'left_table_columns and right_table_columns must have a matching array length';
END IF;
/************************
TABLE NAMES
*************************/
-- create the full name of the left table
-- the schema name can be NULL which means that the table is temporary
-- because of this, we need to detect if we should specify the schema
IF (left_table_schema IS NOT NULL) THEN
full_left_table = left_table_schema || '.' || left_table_name;
ELSE
full_left_table = left_table_name;
END IF;
-- create the full name of the right table
-- the schema name can be NULL which means that the table is temporary
-- because of this, we need to detect if we should specify the schema
IF (right_table_schema IS NOT NULL) THEN
full_right_table = right_table_schema || '.' || right_table_name;
ELSE
full_right_table = right_table_name;
END IF;
-- create the full name of the output table
-- the schema name can be NULL which means that the table is temporary
-- because of this, we need to detect if we should specify the schema
IF (output_table_schema IS NOT NULL) THEN
full_output_table = output_table_schema || '.' || output_table_name;
ELSE
full_output_table = output_table_name;
END IF;
/**********************
LEFT TABLE
***********************/
-- start to create the table which will store the min values from the left table
sql =
'DROP TABLE IF EXISTS temp_null_join_left_table;' || E'\n' ||
'CREATE TEMP TABLE temp_null_join_left_table AS' || E'\n' ||
'SELECT';
-- loop through each column name in the left table column names parameter
FOR colum_name IN SELECT UNNEST(left_table_columns) LOOP
-- find the minimum value in this column and subtract one
-- we will use this as a value we know is not in the column of this table
sql = sql || E'\n\t' || 'MIN("' || colum_name || '")-1 AS "' || colum_name || '",';
END LOOP;
-- remove the trailing comma from the SQL
sql = TRIM(TRAILING ',' FROM sql);
-- finish the SQL to create the left table min values
sql = sql || E'\n' ||
'FROM ' || full_left_table || ';';
-- run the query that creates the table which stores the minimum values for each column in the left table
EXECUTE sql;
-- store the sql which will be the return value of the function
complete_sql = sql;
/************************
RIGHT TABLE
*************************/
-- start to create the table which will store the min values from the right table
sql =
'DROP TABLE IF EXISTS temp_null_join_right_table;' || E'\n' ||
'CREATE TEMP TABLE temp_null_join_right_table AS' || E'\n' ||
'SELECT';
-- loop through each column name in the right table column names parameter
FOR colum_name IN SELECT UNNEST(right_table_columns) LOOP
-- find the minimum value in this column and subtract one
-- we will use this as a value we know is not in the column of this table
sql = sql || E'\n\t' || 'MIN("' || colum_name || '")-1 AS "' || colum_name || '",';
END LOOP;
-- remove the trailing comma from the SQL
sql = TRIM(TRAILING ',' FROM sql);
-- finish the SQL to create the right table min values
sql = sql || E'\n' ||
'FROM ' || full_left_table || ';';
-- run the query that creates the table which stores the minimum values for each column in the right table
EXECUTE sql;
-- store the sql which will be the return value of the function
complete_sql = complete_sql || E'\n\n' || sql;
-- start to create the final output table which will contain the column names defined in the left_table_columns parameter
-- each column will contain a negative value that is not present in both the left and right tables for the given column
sql =
'DROP TABLE IF EXISTS ' || full_output_table || ';' || E'\n' ||
'CREATE ' || (CASE WHEN output_table_schema IS NULL THEN 'TEMP ' END) || 'TABLE ' || full_output_table || ' AS' || E'\n' ||
'SELECT';
-- loop through each index of the left_table_columns array
FOR i IN coalesce(array_lower(left_table_columns, 1), 1)..coalesce(array_upper(left_table_columns, 1), 1) LOOP
-- add to the sql a call to the LEAST function
-- this function takes an infinite number of columns and returns the smallest value within those columns
-- we have -1 hardcoded because the smallest minimum value may be a positive integer and so we need to ensure the number used is negative
-- this way we will not confuse this value with a real ID from a table
sql = sql || E'\n\t' || 'LEAST(l."' || left_table_columns[i] || '", r."' || right_table_columns[i] || '", -1) AS "' || left_table_columns[i] || '",';
END LOOP;
-- remove the trailing comma from the SQL
sql = TRIM(TRAILING ',' FROM sql);
-- finish off the SQL which creates the final table
sql = sql || E'\n' ||
'FROM temp_null_join_left_table l' || E'\n' ||
'CROSS JOIN temp_null_join_right_table r' || ';';
-- create the final table
EXECUTE sql;
-- store the sql which will be the return value of the function
complete_sql = complete_sql || E'\n\n' || sql;
-- we no longer need these tables
sql =
'DROP TABLE IF EXISTS temp_null_join_left_table;' || E'\n' ||
'DROP TABLE IF EXISTS temp_null_join_right_table;';
EXECUTE sql;
-- store the sql which will be the return value of the function
complete_sql = complete_sql || E'\n\n' || sql;
-- return the SQL that has been run, good for debugging purposes or just understanding what the function does
RETURN complete_sql;
END;
$$
LANGUAGE plpgsql;
Below is an example usage of the function
SELECT pg_temp.get_null_join_int_value
(
-- left table
'public',
'my_table',
'{"column_1", "column_2", "column_3"}',
-- right table
'public',
'my_table_aggregations',
'{"column_1", "column_2", "column_3"}',
-- output table
NULL,
'temp_null_join_values'
);
Once the temp_null_join_values table is created you can do a sub select in the join for the COALESCE 2nd param.
DROP TABLE IF EXISTS temp_result_table;
CREATE TEMP TABLE temp_result_table AS
SELECT
t.record_id,
agg.aggregation_id
FROM public.my_table t
JOIN my_table_aggregations agg ON
(
COALESCE(t.column_1, (SELECT column_1 FROM temp_null_join_values)) = COALESCE(agg.column_1, (SELECT column_1 FROM temp_null_join_values)) AND
COALESCE(t.column_2, (SELECT column_2 FROM temp_null_join_values)) = COALESCE(agg.column_2, (SELECT column_2 FROM temp_null_join_values)) AND
COALESCE(t.column_3, (SELECT column_3 FROM temp_null_join_values)) = COALESCE(agg.column_3, (SELECT column_3 FROM temp_null_join_values))
);
I hope this helps someone
How about:
SELECT
t.record_id,
a.aggregation_id
FROM my_table t
JOIN my_table_aggregations agg ON
(
NULLIF(t.column_1, agg.column_1) IS NULL
AND
NULLIF(agg.column_1, t.column_1) IS NULL
AND
NULLIF(t.column_2, agg.column_2) IS NULL
AND
NULLIF(agg.column_2, t.column_2) IS NULL
AND
NULLIF(t.column_3, agg.column_3) IS NULL
AND
NULLIF(agg.column_3, t.column_3) IS NULL
);

Error: a column definition list is required for functions in dblink using PostgreSQL 9.3

I have the following function:
In which I am updating one database table by joining other database table by using the dblink().
I have installed:
create extension dblink;
The more details as shown below:
CREATE OR REPLACE FUNCTION Fun_test
(
Table_Name varchar
)
RETURNS void AS
$BODY$
DECLARE
dynamic_statement varchar;
BEGIN
perform dblink_connect('port=5234 dbname=testdb user=postgres password=****');
dynamic_statement := 'With CTE AS
(
Select HNumber,JoiningDate,Name,Address
From '|| Table_Name ||'c
)
, Test_A
AS
(
Select Row_Number() over ( Partition by PNumber order by Date1 Desc,Date2 Desc) AS roNum,
Name,PNumber,Date1,Address
From dblink(
''Select distinct PNumber,
(
case when fname is null then '' else fname end || '' ||
case when lname is null then '' else lname end
) as FullName,
Address,
Date1,Date2
From testdb_Table
inner join CTE on CTE.HNumber = PNumber''
) Num
)
Update CTE
Set
Name = Test_A.FullName
,SubAddress_A = Test_A.Address
,Date1 = Test_A.Date1
from CTE
left outer join Test_A on
CTE.HNumber= Test_A.PNumber
where roNum =1';
RAISE INFO '%',dynamic_statement;
EXECUTE dynamic_statement;
perform dblink_disconnect();
END;
$BODY$
LANGUAGE PLPGSQL;
Calling Function:
select fun_test('test1');
Getting an error:
ERROR: a column definition list is required for functions returning "record"
LINE 11: From dblink
^
You have to tell PostgreSQL what the columns the dblink query will return are.
See the manual for dblink for details.
This is the same as for any function returning a runtime-determined record type. You can't query it without telling PostgreSQL what the column layout of the results will be.
You use a column specifier list, e.g.
SELECT * FROM my_function_returning_record() f(col1 text, col2 integer);
If you are on a current PostgreSQL version you may want to look at postgres_fdw as an alternative to dblink.

EXECUTE...INTO...USING statement in PL/pgSQL can't execute into a record?

I'm attempting to write an area of a function in PL/pgSQL that loops through an hstore and sets a record's column(the key of the hstore) to a specific value (the value of the hstore). I'm using Postgres 9.1.
The hstore will look like: ' "column1"=>"value1","column2"=>"value2" '
Generally, here is what I want from a function that takes in an hstore and has a record with values to modify:
FOR my_key, my_value IN
SELECT key,
value
FROM EACH( in_hstore )
LOOP
EXECUTE 'SELECT $1'
INTO my_row.my_key
USING my_value;
END LOOP;
The error which I am getting with this code:
"myrow" has no field "my_key". I've been searching for quite a while now for a solution, but everything else I've tried to achieve the same result hasn't worked.
Simpler alternative to your posted answer. Should perform much better.
This function retrieves a row from a given table (in_table_name) and primary key value (in_row_pk), and inserts it as new row into the same table, with some values replaced (in_override_values). The new primary key value as per default is returned (pk_new).
CREATE OR REPLACE FUNCTION f_clone_row(in_table_name regclass
, in_row_pk int
, in_override_values hstore
, OUT pk_new int)
LANGUAGE plpgsql AS
$func$
DECLARE
_pk text; -- name of PK column
_cols text; -- list of names of other columns
BEGIN
-- Get name of PK column
SELECT INTO _pk a.attname
FROM pg_catalog.pg_index i
JOIN pg_catalog.pg_attribute a ON a.attrelid = i.indrelid
AND a.attnum = i.indkey[0] -- single PK col!
WHERE i.indrelid = in_table_name
AND i.indisprimary;
-- Get list of columns excluding PK column
SELECT INTO _cols string_agg(quote_ident(attname), ',')
FROM pg_catalog.pg_attribute
WHERE attrelid = in_table_name -- regclass used as OID
AND attnum > 0 -- exclude system columns
AND attisdropped = FALSE -- exclude dropped columns
AND attname <> _pk; -- exclude PK column
-- INSERT cloned row with override values, returning new PK
EXECUTE format('
INSERT INTO %1$I (%2$s)
SELECT %2$s
FROM (SELECT (t #= $1).* FROM %1$I t WHERE %3$I = $2) x
RETURNING %3$I'
, in_table_name, _cols, _pk)
USING in_override_values, in_row_pk -- use override values directly
INTO pk_new; -- return new pk directly
END
$func$;
Call:
SELECT f_clone_row('tbl', 1, '"col1"=>"foo_new","col2"=>"bar_new"');
db<>fiddle here
Old sqlfiddle
Use regclass as input parameter type, so only valid table names can be used to begin with and SQL injection is ruled out. The function also fails earlier and more gracefully if you should provide an illegal table name.
Use an OUT parameter (pk_new) to simplify the syntax.
No need to figure out the next value for the primary key manually. It is inserted automatically and returned after the fact. That's not only simpler and faster, you also avoid wasted or out-of-order sequence numbers.
Use format() to simplify the assembly of the dynamic query string and make it less error-prone. Note how I use positional parameters for identifiers and unquoted strings respectively.
I build on your implicit assumption that allowed tables have a single primary key column of type integer with a column default. Typically serial columns.
Key element of the function is the final INSERT:
Merge override values with the existing row using the #= operator in a subselect and decompose the resulting row immediately.
Then you can select only relevant columns in the main SELECT.
Let Postgres assign the default value for the PK and get it back with the RETURNING clause.
Write the returned value into the OUT parameter directly.
All done in a single SQL command, that is generally fastest.
Since I didn't want to have to use any external functions for speed purposes, I created a solution using hstores to insert a record into a table:
CREATE OR REPLACE FUNCTION fn_clone_row(in_table_name character varying, in_row_pk integer, in_override_values hstore)
RETURNS integer
LANGUAGE plpgsql
AS $function$
DECLARE
my_table_pk_col_name varchar;
my_key text;
my_value text;
my_row record;
my_pk_default text;
my_pk_new integer;
my_pk_new_text text;
my_row_hstore hstore;
my_row_keys text[];
my_row_keys_list text;
my_row_values text[];
my_row_values_list text;
BEGIN
-- Get the next value of the pk column for the table.
SELECT ad.adsrc,
at.attname
INTO my_pk_default,
my_table_pk_col_name
FROM pg_attrdef ad
JOIN pg_attribute at
ON at.attnum = ad.adnum
AND at.attrelid = ad.adrelid
JOIN pg_class c
ON c.oid = at.attrelid
JOIN pg_constraint cn
ON cn.conrelid = c.oid
AND cn.contype = 'p'
AND cn.conkey[1] = at.attnum
JOIN pg_namespace n
ON n.oid = c.relnamespace
WHERE c.relname = in_table_name
AND n.nspname = 'public';
-- Get the next value of the pk in a local variable
EXECUTE ' SELECT ' || my_pk_default
INTO my_pk_new;
-- Set the integer value back to text for the hstore
my_pk_new_text := my_pk_new::text;
-- Add the next value statement to the hstore of changes to make.
in_override_values := in_override_values || hstore( my_table_pk_col_name, my_pk_new_text );
-- Copy over only the given row to the record.
EXECUTE ' SELECT * '
' FROM ' || quote_ident( in_table_name ) ||
' WHERE ' || quote_ident( my_table_pk_col_name ) ||
' = ' || quote_nullable( in_row_pk )
INTO my_row;
-- Replace the values that need to be changed in the column name array
my_row := my_row #= in_override_values;
-- Create an hstore of my record
my_row_hstore := hstore( my_row );
-- Create a string of comma-delimited, quote-enclosed column names
my_row_keys := akeys( my_row_hstore );
SELECT array_to_string( array_agg( quote_ident( x.colname ) ), ',' )
INTO my_row_keys_list
FROM ( SELECT unnest( my_row_keys ) AS colname ) x;
-- Create a string of comma-delimited, quote-enclosed column values
my_row_values := avals( my_row_hstore );
SELECT array_to_string( array_agg( quote_nullable( x.value ) ), ',' )
INTO my_row_values_list
FROM ( SELECT unnest( my_row_values ) AS value ) x;
-- Insert the values into the columns of a new row
EXECUTE 'INSERT INTO ' || in_table_name || '(' || my_row_keys_list || ')'
' VALUES (' || my_row_values_list || ')';
RETURN my_pk_new;
END
$function$;
It's quite a bit longer than what I had envisioned, but it works and is actually quite speedy.