PostgreSQL: Pass value from an array and do union - postgresql

WITH temp AS (select * from t1 where c1 = 'string1')
select 'string1' as col1, t2.col2, temp.col3 from t2 inner join temp on t2.c2 = temp.c2 where t2.some_col like 'string1%'
union
WITH temp AS (select * from t1 where c1 = 'string2')
select 'string2' as col1, t2.col2, temp.col3 from t2 inner join temp on t2.c2 = temp.c2 where t2.some_col like 'string2%'
...
Above is just an example of a PostgreSQL query I am trying to run. It's a union of two completely similar queries. They only use different values for matching string1 and string2.
I have about 20 such queries that I want to do a union on. They only differ by the variable I want to use for comparison such as string1
How can I use such array of values ['string1', 'string2', 'string3', .., 'string20'], run a query on each variable from this array and union them?

What about a old fashioned plpgsql?
CREATE OR REPLACE FUNCTION get_all_foo(arr varchar[]) RETURNS TABLE (col1 TEXT, col2 TEXT, col3 TEXT) AS
$BODY$
DECLARE
arr_value varchar;
generated_query varchar := '';
array_index smallint := 1;
array_length smallint;
BEGIN
array_length := array_length(arr, 1);
FOREACH arr_value IN ARRAY arr LOOP
generated_query := generated_query || format(' (WITH temp AS (select * from t1 where c1 = %L) '
'select %L as col1, t2.col2, temp.col3 from t2 inner join temp on t2.c2 = temp.c2 where t2.some_col like ''%s%%'')', arr_value, arr_value, arr_value);
IF array_index < array_length THEN
generated_query := generated_query || ' UNION ';
END IF;
array_index := array_index+1;
END LOOP;
RAISE DEBUG 'Generated query: %', generated_query;
RETURN QUERY EXECUTE generated_query;
END
$BODY$
LANGUAGE plpgsql;
--Uncomment to see generated query
--SET client_min_messages = DEBUG;
SELECT * FROM get_all_foo(array['string1', 'string2', 'string3', 'string4', 'string5']);

select c1 as col1, t2.col2, temp.col3
from
(select col2, c2 from t2 where
some_col like 'string1%' or some_col like 'string2%' or <other strings in the similar fashion>) t2
inner join
(select c1,c2,col3 from t1 where c1 in ('string1', 'string2', <other strings in the similar fashion>)) temp
on t2.c2 = temp.c2;

WITH temp AS (
select *
from t1
where c1 = any(array['string1','string2','string3']))
select distinct
temp.c1 as col1, t2.col2, temp.col3
from t2 inner join
temp on (t2.c2 = temp.c2 and t2.some_col like temp.c1||'%')

Related

PgSQL function returning table and extra data computed in process

In PgSQL I make huge select, and then I want count it's size and apply some extra filters.
execute it twice sound dumm,
so I wrapped it in function
and then "cache" it and return union of filtered table and extra row at the end where in "id" column store size
with q as (select * from myFunc())
select * from q
where q.distance < 400
union all
select count(*) as id, null,null,null
from q
but it also doesn't look like proper solution...
and so the question: is in pg something like "generator function" or any other stuff that can properly solve this ?
postgreSQL 13
myFunc aka "selectItemsByRootTag"
CREATE OR REPLACE FUNCTION selectItemsByRootTag(
in tag_name VARCHAR(50)
)
RETURNS table(
id BIGINT,
name VARCHAR(50),
description TEXT,
/*info JSON,*/
distance INTEGER
)
AS $$
BEGIN
RETURN QUERY(
WITH RECURSIVE prod AS (
SELECT
tags.name, tags.id, tags.parent_tags
FROM
tags
WHERE tags.name = (tags_name)
UNION
SELECT c.name, c.id , c.parent_tags
FROM
tags as c
INNER JOIN prod as p
ON c.parent_tags = p.id
)
SELECT
points.id,
points.name,
points.description,
/*points.info,*/
points.distance
from points
left join tags on points.tag_id = tags.id
where tags.name in (select prod.name from prod)
);
END;
$$ LANGUAGE plpgsql;
as a result i want see maybe set of 2 table or generator function that yield some intermediate result not shure how exacltly it should look
demo
CREATE OR REPLACE FUNCTION pg_temp.selectitemsbyroottag(tag_name text, _distance numeric)
RETURNS TABLE(id bigint, name text, description text, distance numeric, count bigint)
LANGUAGE plpgsql
AS $function$
DECLARE _sql text;
BEGIN
_sql := $p1$WITH RECURSIVE prod AS (
SELECT
tags.name, tags.id, tags.parent_tags
FROM
tags
WHERE tags.name ilike '%$p1$ || tag_name || $p2$%'
UNION
SELECT c.name, c.id , c.parent_tags
FROM
tags as c
INNER JOIN prod as p
ON c.parent_tags = p.id
)
SELECT
points.id,
points.name,
points.description,
points.distance,
count(*) over ()
from points
left join tags on points.tag_id = tags.id
where tags.name in (select prod.name from prod)
and points.distance > $p2$ || _distance
;
raise notice '_sql: %', _sql;
return query execute _sql;
END;
$function$
You can call it throug following way
select * from pg_temp.selectItemsByRootTag('test',20);
select * from pg_temp.selectItemsByRootTag('test_8',20) with ORDINALITY;
The 1 way to call the function, will have a row of total count total number of rows. Second way call have number of rows plus a serial incremental number.
I also make where q.distance < 400 into function input argument.
selectItemsByRootTag('test',20); means that q.distance > 20 and tags.name ilike '%test%'.

I am getting Dollar sign unterminated

I want to create a function like below which inserts data as per the input given. But I keep on getting an error about undetermined dollar sign.
CREATE OR REPLACE FUNCTION test_generate
(
ref REFCURSOR,
_id INTEGER
)
RETURNS refcursor AS $$
DECLARE
BEGIN
DROP TABLE IF EXISTS test_1;
CREATE TEMP TABLE test_1
(
id int,
request_id int,
code text
);
IF _id IS NULL THEN
INSERT INTO test_1
SELECT
rd.id,
r.id,
rd.code
FROM
test_2 r
INNER JOIN
raw_table rd
ON
rd.test_2_id = r.id
LEFT JOIN
observe_test o
ON
o.raw_table_id = rd.id
WHERE o.id IS NULL
AND COALESCE(rd.processed, 0) = 0;
ELSE
INSERT INTO test_1
SELECT
rd.id,
r.id,
rd.code
FROM
test_2 r
INNER JOIN
raw_table rd
ON rd.test_2_id = r.id
WHERE r.id = _id;
END IF;
DROP TABLE IF EXISTS tmp_test_2_error;
CREATE TEMP TABLE tmp_test_2_error
(
raw_table_id int,
test_2_id int,
error text,
record_num int
);
INSERT INTO tmp_test_2_error
(
raw_table_id,
test_2_id,
error,
record_num
)
SELECT DISTINCT
test_1.id,
test_1.test_2_id,
'Error found ' || test_1.code,
0
FROM
test_1
WHERE 1 = 1
AND data_origin.id IS NULL;
INSERT INTO tmp_test_2_error
SELECT DISTINCT
test_1.id,
test_1.test_2_id,
'Error found ' || test_1.code,
0
FROM
test_1
INNER JOIN
data_origin
ON
data_origin.code = test_1.code
WHERE dop.id IS NULL;
DROP table IF EXISTS test_latest;
CREATE TEMP TABLE test_latest AS SELECT * FROM observe_test WHERE 1 = 2;
INSERT INTO test_latest
(
raw_table_id,
series_id,
timestamp
)
SELECT
test_1.id,
ds.id AS series_id,
now()
FROM
test_1
INNER JOIN data_origin ON data_origin.code = test_1.code
LEFT JOIN
observe_test o ON o.raw_table_id = test_1.id
WHERE o.id IS NULL;
CREATE TABLE latest_observe_test as Select * from test_latest where 1=0;
INSERT INTO latest_observe_test
(
raw_table_id,
series_id,
timestamp,
time
)
SELECT
t.id,
ds.id AS series_id,
now(),
t.time
FROM
test_latest t
WHERE t.series_id IS DISTINCT FROM observe_test.series_id;
DELETE FROM test_2_error re
USING t
WHERE t.test_2_id = re.test_2_id;
INSERT INTO test_2_error (test_2_id, error, record_num)
SELECT DISTINCT test_2_id, error, record_num FROM tmp_test_2_error ORDER BY error;
UPDATE raw_table AS rd1
SET processed = case WHEN tre.raw_table_id IS null THEN 2 ELSE 1 END
FROM test_1 tr
LEFT JOIN
tmp_test_2_error tre ON tre.raw_table_id = tr.id
WHERE rd1.id = tr.id;
OPEN ref FOR
SELECT 1;
RETURN ref;
OPEN ref for
SELECT o.* from observe_test o
;
RETURN ref;
OPEN ref FOR
SELECT
rd.id,
ds.id AS series_id,
now() AS timestamp,
rd.time
FROM test_2 r
INNER JOIN raw_table rd ON rd.test_2_id = r.id
INNER JOIN data_origin ON data_origin.code = rd.code
WHERE o.id IS NULL AND r.id = _id;
RETURN ref;
END;
$$ LANGUAGE plpgsql VOLATILE COST 100;
I am not able to run this procedure.
Can you please help me where I have done wrong?
I am using squirrel and face the same question as you.
until I found that:
-- Note that if you want to create the function under Squirrel SQL,
-- you must go to Sessions->Session Properties
-- then SQL tab and change the Statement Separator from ';' to something else
-- (for intance //). Otherwise Squirrel SQL sends one piece to the server
-- that stops at the first encountered ';', and the server cannot make
-- sense of it. With the separator changed as suggested, you type everything
-- as above and end with
-- ...
-- end;
-- $$ language plpgsql
-- //
--
-- You can then restore the default separator, or use the new one for
-- all queries ...
--

PostgreSQL structure of function that return a query

Given a PostgreSQL function that returns a query:
CREATE OR REPLACE FUNCTION word_frequency(_max_tokens int)
RETURNS TABLE (
txt text -- visible as OUT parameter inside and outside function
, cnt bigint
, ratio bigint) AS
$func$
BEGIN
RETURN QUERY
SELECT t.txt
, count(*) AS cnt -- column alias only visible inside
, (count(*) * 100) / _max_tokens -- I added brackets
FROM (
SELECT t.txt
FROM token t
WHERE t.chartype = 'ALPHABETIC'
LIMIT _max_tokens
) t
GROUP BY t.txt
ORDER BY cnt DESC; -- note the potential ambiguity
END
$func$ LANGUAGE plpgsql;
How can I retrieve the structure of this function? I mean, I know that this function will return the txt, cnt and ratio columns, but how can I make a query that returns these column names? I was trying to find these columns names on information_schema schema, but I couldn't.
The expected result of this hypothetical query would be something like this:
3 results found:
---------------------------------
?column_name? | ?function_name?
---------------------------------
txt word_frequency
cnt word_frequency
ratio word_frequency
This information is stored in pg_proc
SELECT unnest(p.proargnames) as column_name,
p.proname as function_name
FROM pg_proc p
JOIN pg_namespace n ON p.pronamespace = n.oid
WHERE n.nspname = 'public'
AND p.proname = 'word_frequency'
Based on the answer of a_horse_with_no_name, I came with this final version:
SELECT
column_name,
function_name
FROM
(
SELECT
unnest(p.proargnames) as column_name,
unnest(p.proargmodes) as column_type,
p.proname as function_name
FROM pg_proc p
JOIN pg_namespace n ON p.pronamespace = n.oid
WHERE n.nspname = 'public'
AND p.proname = 'my_function'
) as temp_table
WHERE column_type = 't';
I simply omitted the arguments, returning only the columns that the function returns

Accessing columns when looping over record composed of result of 2 subqueries

I have a code block in which I am looping over a record that contains two joined subqueries that contain equally named columns in different tables.
Now I seem to be able to access sq1 and sq2 in the record, but not the contents and I always get "could not identify column 'c1' in record data type", even if I add explicit aliases to the columns:
DO $$
DECLARE
r record;
BEGIN
FOR r IN SELECT sq1, sq2
FROM (SELECT t1.someColumn as c1, t2.someColumn as c2, ... FROM Table1 t1 JOIN Table2 t2 ...) sq1
JOIN (SELECT t1.someColumn as c1, t2.someColumn as c2, ... FROM Table1 t1 JOIN Table2 t2 ...) sq2
ON (sq1.joinColumn1 = sq2.joinColumn2 AND sq1.joinColumn2 = sq2.joinColumn1)
LOOP
INSERT INTO Table3 (column1, column2)
VALUES ((r.sq1).c1, (r.sq2).c1);
--^ error occurs here
END LOOP;
END$$;
I am looking for a way to access the records similar to the following way:
r.sq1.t1.someColumn
For access to the 3 level in your variable you must cast it as named record type. It might be table or type:
For type:
CREATE TYPE my_type AS (c1 int4, c2 int4, joinColumn1 int4);
For table:
CREATE TABLE my_type (c1 int4, c2 int4, joinColumn1 int4);
And after that you can do something like this:
DO $$
DECLARE
r record;
BEGIN
FOR r IN SELECT (sq1.*)::my_type AS sq1, (sq2.*)::my_type AS sq2
FROM (SELECT 10 as c1, 11 as c2, 1 as joinColumn1) sq1
JOIN (SELECT 20 as c1, 21 as c2, 1 as joinColumn2) sq2
ON (sq1.joinColumn1 = sq2.joinColumn2)
LOOP
RAISE NOTICE '%', r;
RAISE NOTICE '%', r.sq1;
RAISE NOTICE '% %', (r.sq1).c1, (r.sq2).c1;
INSERT INTO Table3 (column1, column2)
VALUES ((r.sq1).c1, (r.sq2).c1);
END LOOP;
END$$;

postgresql column names as variable in a subquery

table1
id col1 col2 col3...
table2
col_id col_name
3432 col1
5342 col2
6756 col3
Now I want to generate table 3 like this:
id col_name col_value col_id
Please note that col1, col2,col3... are not in order. Therefore I have to query table2 to obtain col_id ( I think pivot does not work here)
How can I do it in SQL?
It appears that you want a select like this:
SELECT t2.id,
CASE
WHEN t2.col_name='col1' THEN t1.col1
WHEN t2.col_name='col2' THEN t1.col2
WHEN t2.col_name='col2' THEN t1.col2
-- ... more columns
ELSE NULL
END
FROM table2 t2 LEFT JOIN table2 t1 ON t2.col_id = t1.id
You could also create a function, though this will be slower in practice:
CREATE OR REPLACE FUNCTION table1_col(id integer, name text) RETURNS text as $$
DECLARE
col_val text;
BEGIN
EXECUTE format('SELECT %s FROM table1 WHERE id=$1', name)
INTO col_val
USING id;
RETURN col_val;
END;
$$ LANGUAGE plpgsql;
SELECT table1_col(col_id,col_name) FROM table2;