postgresql column names as variable in a subquery - postgresql

table1
id col1 col2 col3...
table2
col_id col_name
3432 col1
5342 col2
6756 col3
Now I want to generate table 3 like this:
id col_name col_value col_id
Please note that col1, col2,col3... are not in order. Therefore I have to query table2 to obtain col_id ( I think pivot does not work here)
How can I do it in SQL?

It appears that you want a select like this:
SELECT t2.id,
CASE
WHEN t2.col_name='col1' THEN t1.col1
WHEN t2.col_name='col2' THEN t1.col2
WHEN t2.col_name='col2' THEN t1.col2
-- ... more columns
ELSE NULL
END
FROM table2 t2 LEFT JOIN table2 t1 ON t2.col_id = t1.id
You could also create a function, though this will be slower in practice:
CREATE OR REPLACE FUNCTION table1_col(id integer, name text) RETURNS text as $$
DECLARE
col_val text;
BEGIN
EXECUTE format('SELECT %s FROM table1 WHERE id=$1', name)
INTO col_val
USING id;
RETURN col_val;
END;
$$ LANGUAGE plpgsql;
SELECT table1_col(col_id,col_name) FROM table2;

Related

Postgresql strange behavior with update trigger

I have a table1: id int, id_2 int, date timestamp, vec float[]
And table2 : id int, vec float[]
My target is to create trigger on update of table1 which will take last 10 (by date) rows for id_2, take average of vectors by first axis(10 x N -> N) and write it to table2 under id = id_2.
My code:
CREATE OR REPLACE FUNCTION public.foo()
RETURNS trigger
LANGUAGE plpgsql
AS $function$
BEGIN
WITH rows AS (
SELECT DISTINCT t1.id_2, t2.id, t2.vec, t2.date, DENSE_RANK() OVER (PARTITION BY t1.id_2 ORDER BY t2.date desc) AS counter
FROM new_table AS t1
LEFT JOIN table1 t2 ON t1.id_2 = t2.id_2
),
elements_average AS (
SELECT id_2, AVG(unnest::float) AS av
FROM rows,
unnest(vec) with ORDINALITY
WHERE counter < 11
GROUP BY id_2, ORDINALITY
ORDER BY ORDINALITY
),
avr AS (
SELECT id_2, array_agg(av::float) AS averages
FROM elements_average
GROUP BY id_2
)
UPDATE table2 SET vec = averages FROM avr WHERE table2.id = user_av.id_2;
RETURN NULL;
END;
$function$
;
CREATE TRIGGER foo_trigger AFTER
UPDATE
ON
public.table1 REFERENCING NEW TABLE AS new_table FOR EACH STATEMENT EXECUTE FUNCTION foo()
The problem: when I update few rows in table1 with different id_2 in one transactions a value in table2 becomes wrong. Not the average.
What's even more strange is that this code gives correct values in same situation:
...
avr AS (
SELECT id_2, array_agg(av::float) AS averages
FROM elements_average
GROUP BY id_2
),
strange_thing AS (
SELECT * from elements_average
)
UPDATE table2 SET vec = averages FROM avr WHERE table2.id = user_av.id_2;
RETURN NULL;
END;
$function$
;
So, small meaningless and unimportant SELECT changes the behavior of the function. Is it a bug of postgres or my fault?

PostgreSQL: Pass value from an array and do union

WITH temp AS (select * from t1 where c1 = 'string1')
select 'string1' as col1, t2.col2, temp.col3 from t2 inner join temp on t2.c2 = temp.c2 where t2.some_col like 'string1%'
union
WITH temp AS (select * from t1 where c1 = 'string2')
select 'string2' as col1, t2.col2, temp.col3 from t2 inner join temp on t2.c2 = temp.c2 where t2.some_col like 'string2%'
...
Above is just an example of a PostgreSQL query I am trying to run. It's a union of two completely similar queries. They only use different values for matching string1 and string2.
I have about 20 such queries that I want to do a union on. They only differ by the variable I want to use for comparison such as string1
How can I use such array of values ['string1', 'string2', 'string3', .., 'string20'], run a query on each variable from this array and union them?
What about a old fashioned plpgsql?
CREATE OR REPLACE FUNCTION get_all_foo(arr varchar[]) RETURNS TABLE (col1 TEXT, col2 TEXT, col3 TEXT) AS
$BODY$
DECLARE
arr_value varchar;
generated_query varchar := '';
array_index smallint := 1;
array_length smallint;
BEGIN
array_length := array_length(arr, 1);
FOREACH arr_value IN ARRAY arr LOOP
generated_query := generated_query || format(' (WITH temp AS (select * from t1 where c1 = %L) '
'select %L as col1, t2.col2, temp.col3 from t2 inner join temp on t2.c2 = temp.c2 where t2.some_col like ''%s%%'')', arr_value, arr_value, arr_value);
IF array_index < array_length THEN
generated_query := generated_query || ' UNION ';
END IF;
array_index := array_index+1;
END LOOP;
RAISE DEBUG 'Generated query: %', generated_query;
RETURN QUERY EXECUTE generated_query;
END
$BODY$
LANGUAGE plpgsql;
--Uncomment to see generated query
--SET client_min_messages = DEBUG;
SELECT * FROM get_all_foo(array['string1', 'string2', 'string3', 'string4', 'string5']);
select c1 as col1, t2.col2, temp.col3
from
(select col2, c2 from t2 where
some_col like 'string1%' or some_col like 'string2%' or <other strings in the similar fashion>) t2
inner join
(select c1,c2,col3 from t1 where c1 in ('string1', 'string2', <other strings in the similar fashion>)) temp
on t2.c2 = temp.c2;
WITH temp AS (
select *
from t1
where c1 = any(array['string1','string2','string3']))
select distinct
temp.c1 as col1, t2.col2, temp.col3
from t2 inner join
temp on (t2.c2 = temp.c2 and t2.some_col like temp.c1||'%')

Postgres find all rows in database tables matching criteria on a given column

I am trying to write sub-queries so that I search all tables for a column named id and since there are multiple tables with id column, I want to add the condition, so that id = 3119093.
My attempt was:
Select *
from information_schema.tables
where id = '3119093' and id IN (
Select table_name
from information_schema.columns
where column_name = 'id' );
This didn't work so I tried:
Select *
from information_schema.tables
where table_name IN (
Select table_name
from information_schema.columns
where column_name = 'id' and 'id' IN (
Select * from table_name where 'id' = 3119093));
This isn't the right way either. Any help would be appreciated. Thanks!
A harder attempt is:
CREATE OR REPLACE FUNCTION search_columns(
needle text,
haystack_tables name[] default '{}',
haystack_schema name[] default '{public}'
)
RETURNS table(schemaname text, tablename text, columnname text, rowctid text)
AS $$
begin
FOR schemaname,tablename,columnname IN
SELECT c.table_schema,c.table_name,c.column_name
FROM information_schema.columns c
JOIN information_schema.tables t ON
(t.table_name=c.table_name AND t.table_schema=c.table_schema)
WHERE (c.table_name=ANY(haystack_tables) OR haystack_tables='{}')
AND c.table_schema=ANY(haystack_schema)
AND t.table_type='BASE TABLE'
--AND c.column_name = "id"
LOOP
EXECUTE format('SELECT ctid FROM %I.%I WHERE cast(%I as text) like %L',
schemaname,
tablename,
columnname,
needle
) INTO rowctid;
IF rowctid is not null THEN
RETURN NEXT;
END IF;
END LOOP;
END;
$$ language plpgsql;
select * from search_columns('%3119093%'::varchar,'{}'::name[]) ;
The only problem is this code displays the table name and column name. I have to then manually enter
Select * from table_name where id = 3119093
where I got the table name from the code above.
I want to automatically implement returning rows from a table but I don't know how to get the table name automatically.
I took the time to make it work for you.
For starters, some information on what is going on inside the code.
Explanation
function takes two input arguments: column name and column value
it requires a created type that it will be returning a set of
first loop identifies tables that have a column name specified as the input argument
then it forms a query which aggregates all rows that match the input condition inside every table taken from step 3 with comparison based on ILIKE - as per your example
function goes into the second loop only if there is at least one row in currently visited table that matches specified condition (then the array is not null)
second loop unnests the array of rows that match the condition and for every element it puts it in the function output with RETURN NEXT rec clause
Notes
Searching with LIKE is inefficient - I suggest adding another input argument "column type" and restrict it in the lookup by adding a join to pg_catalog.pg_type table.
The second loop is there so that if more than 1 row is found for a particular table, then every row gets returned.
If you are looking for something else, like you need key-value pairs, not just the values, then you need to extend the function. You could for example build json format from rows.
Now, to the code.
Test case
CREATE TABLE tbl1 (col1 int, id int); -- does contain values
CREATE TABLE tbl2 (col1 int, col2 int); -- doesn't contain column "id"
CREATE TABLE tbl3 (id int, col5 int); -- doesn't contain values
INSERT INTO tbl1 (col1, id)
VALUES (1, 5), (1, 33), (1, 25);
Table stores data:
postgres=# select * From tbl1;
col1 | id
------+----
1 | 5
1 | 33
1 | 25
(3 rows)
Creating type
CREATE TYPE sometype AS ( schemaname text, tablename text, colname text, entirerow text );
Function code
CREATE OR REPLACE FUNCTION search_tables_for_column (
v_column_name text
, v_column_value text
)
RETURNS SETOF sometype
LANGUAGE plpgsql
STABLE
AS
$$
DECLARE
rec sometype%rowtype;
v_row_array text[];
rec2 record;
arr_el text;
BEGIN
FOR rec IN
SELECT
nam.nspname AS schemaname
, cls.relname AS tablename
, att.attname AS colname
, null::text AS entirerow
FROM
pg_attribute att
JOIN pg_class cls ON att.attrelid = cls.oid
JOIN pg_namespace nam ON cls.relnamespace = nam.oid
WHERE
cls.relkind = 'r'
AND att.attname = v_column_name
LOOP
EXECUTE format('SELECT ARRAY_AGG(row(tablename.*)::text) FROM %I.%I AS tablename WHERE %I::text ILIKE %s',
rec.schemaname, rec.tablename, rec.colname, quote_literal(concat('%',v_column_value,'%'))) INTO v_row_array;
IF v_row_array is not null THEN
FOR rec2 IN
SELECT unnest(v_row_array) AS one_row
LOOP
rec.entirerow := rec2.one_row;
RETURN NEXT rec;
END LOOP;
END IF;
END LOOP;
END
$$;
Exemplary call & output
postgres=# select * from search_tables_for_column('id','5');
schemaname | tablename | colname | entirerow
------------+-----------+---------+-----------
public | tbl1 | id | (1,5)
public | tbl1 | id | (1,25)
(2 rows)

How do I avoid listing all the table columns in a PostgreSQL returns statement?

I have a PostgreSQL function similar to this:
CREATE OR REPLACE FUNCTION dbo.MyTestFunction(
_ID INT
)
RETURNS dbo.MyTable AS
$$
SELECT *,
(SELECT Name FROM dbo.MySecondTable WHERE RecordID = PersonID)
FROM dbo.MyTable
WHERE PersonID = _ID
$$ LANGUAGE SQL STABLE;
I would really like to NOT have to replace the RETURNS dbo.MyTable AS with something like:
RETURNS TABLE(
col1 INT,
col2 TEXT,
col3 BOOLEAN,
col4 TEXT
) AS
and list out all the columns of MyTable and Name of MySecondTable. Is this something that can be done? Thanks.
--EDIT--
To clarify I have to return ALL columns in MyTable and 1 column from MySecondTable. If MyTable has >15 columns, I don't want to have to list out all the columns in a RETURNS TABLE (col1.. coln).
You just list the columns that you want returned in the SELECT portion of your SQL statement:
SELECT t1.column1, t1.column2,
(SELECT Name FROM dbo.MySecondTable WHERE RecordID = PersonID)
FROM dbo.MyTable t1
WHERE PersonID = _ID
Now you'll just get column1, column3, and name returned
Furthermore, you'll probably find better performance using a LEFT OUTER JOIN in your FROM portion of the SQL statement as opposed to the correlated subquery you have now:
SELECT t1.column1, t1.column2, t2.Name
FROM dbo.MyTable t1
LEFT OUTER JOIN dbo.MySecondTable t2 ON
t2.RecordID = t1.PersonID
WHERE PersonID = _ID
Took a bit of a guess on where RecordID and PersonID were coming from, but that's the general idea.

Creating a view of frequencies from a table with unknown number of columns

I have a table with n columns and I need to create a view which contains the frequencies of every unique value in every column. n is unknown since, I need to apply the solution on numerous tables with different number of columns.
For example i have table:
column1 column2 column3
value1 value2 value3
value2 value2 value1
value1 value2 value2
The view should be something like this:
columnname value frequency
column1 value1 2
column1 value2 1
column2 value2 3
...
Since I have very little experience with sql any help would be extremely appreciated.
Many thanks in advance!
Thus far I have come up with this but am sort of stonewalled now.
CREATE or REPLACE FUNCTION create_view () RETURNS setof record AS $$
DECLARE
col RECORD;
BEGIN
for col in execute 'select column_name from information_schema.columns
where table_name = ''table123''' LOOP
???
END LOOP;
return;
END;
$$
LANGUAGE 'plpgsql';
Simple SQL:
SELECT 'col1' AS col, col1 AS val, count(*) AS ct FROM tbl GROUP BY col1
UNION ALL
SELECT 'col2', col2, count(*) FROM tbl GROUP BY col2
UNION ALL
SELECT 'col3', col3, count(*) FROM tbl GROUP BY col3
PL/pqSQL function executing dynamic SQL:
CREATE OR REPLACE FUNCTION f_demo(_schema text, _tbl text)
RETURNS TABLE(col text, val text, ct bigint) AS
$xx$
DECLARE
_fld text;
_sql text := '';
BEGIN
FOR _fld IN
SELECT a.attname -- use quote_ident to safeguard against SQLi
FROM pg_catalog.pg_attribute a
WHERE a.attrelid = (COALESCE(_schema || '.', '') || _tbl)::regclass
AND a.attnum > 0
AND NOT a.attisdropped
-- AND a.attname ~~ '%col%' -- if you want to pick specific columns
LOOP
RETURN QUERY EXECUTE
'SELECT $1, ' || quote_ident(_fld) || '::text, count(*)
FROM ' || COALESCE(quote_ident(_schema) || '.', '') || quote_ident(_tbl) || '
GROUP BY 2'
USING _fld;
END LOOP;
END;
$xx$
LANGUAGE 'plpgsql';
Call:
SELECT * FROM f_demo('public', 'mytable');
Or, if you want to use the schema provided by search_path:
SELECT * FROM f_demo(NULL, 'mytable');
Major points
Works for any table and any number of columns with values of any type.
Values are cast to text to simplify my example. Could be done with a polymorphic type, too.
See this related answer for info on plpgsql techniques and links: https://stackoverflow.com/q/8146245/939860