Redshift - Cannot load query results into table - leader node issue - amazon-redshift

My goal is it to load query results to a table to store an ordered list of column names for a given table. (Then I will stuff all these column names, into a single column, using the listagg function which I will pass to dynamic sql.) The reason I cannot load this into a table is because system table queries compute on leader nodes, yet this query does not execute on a leader node, and there is no way to force it to execute on a leader node. Any ideas how to get this to execute successfully?
create table temp_columns
as
select
cast(t1.columnname as varchar) as columname
--,
--cast(t2.ordinal_position as integer) as ordinal_position
FROM
(
SELECT
cast(schemaname as varchar) as schemaname,
cast(tablename as varchar) as tablename,
cast("column" as varchar) as columnname
FROM PG_TABLE_DEF
WHERE
schemaname = 'schema1'
and tablename = 'table1'
)
t1
join information_schema.columns t2
on t1.schemaname = t2.table_schema
and t1.tablename = t2.table_name
and t1.columnname = t2.column_name
WHERE
t1.schemaname = 'schema1'
and t1.tablename = 'table1'
order by t2.ordinal_position;

We got around this by creating the table first, then doing a INSERT INTO.
so something like:
create table public.temp_columns
(
columname varchar(255),
ordinal_position int
);
insert into public.temp_columns
select
cast(t1.columnname as varchar) as columname,
cast(t2.ordinal_position as integer) as ordinal_position
FROM
(
SELECT
cast(schemaname as varchar) as schemaname,
cast(tablename as varchar) as tablename,
cast("column" as varchar) as columnname
FROM PG_TABLE_DEF
WHERE
schemaname = 'schema1'
and tablename = 'table1'
)
t1
join information_schema.columns t2
on t1.schemaname = t2.table_schema
and t1.tablename = t2.table_name
and t1.columnname = t2.column_name
WHERE
t1.schemaname = 'schema1'
and t1.tablename = 'table1'
order by t2.ordinal_position;

Related

Issue with PK violation on insert

I have a scenario where almost all of the tables have issues with the PK value as follows. This results is a database error or the violation of the PK insert.
When using the DBCC CheckIdent it displays an inconsistency between the next value and the current one.
Can anyone have a reason for the mismatch happening on several tables?
Since this database is then replicate, I'm afraid this error will propagate across the environment.
I adapted this script to fix it, but really trying to figure out the root of the problem.
/** Version 3.0 **/
if object_id('tempdb..#temp') is not null
drop table #temp
;
with cte as (
SELECT
distinct
A.TABLE_CATALOG AS CATALOG,
A.TABLE_SCHEMA AS "SCHEMA",
A.TABLE_NAME AS "TABLE",
B.COLUMN_NAME AS "COLUMN",
IDENT_SEED (A.TABLE_NAME) AS Seed,
IDENT_INCR (A.TABLE_NAME) AS Increment,
IDENT_CURRENT (A.TABLE_NAME) AS Curr_Value
, DBPS.row_count AS NumberOfRows
FROM INFORMATION_SCHEMA.TABLES A
inner join INFORMATION_SCHEMA.COLUMNS B on b.TABLE_NAME = a.TABLE_NAME and b.TABLE_SCHEMA = a.TABLE_SCHEMA
inner join sys.identity_columns IC on OBJECT_NAME (IC.object_id) = a.TABLE_NAME
inner join sys.dm_db_partition_stats DBPS ON DBPS.object_id =IC.object_id
inner join sys.indexes as IDX ON DBPS.index_id =IDX.index_id
WHERE A.TABLE_CATALOG = B.TABLE_CATALOG AND
A.TABLE_SCHEMA = B.TABLE_SCHEMA AND
A.TABLE_NAME = B.TABLE_NAME AND
COLUMNPROPERTY (OBJECT_ID (B.TABLE_NAME), B.COLUMN_NAME, 'IsIdentity') = 1 AND
OBJECTPROPERTY (OBJECT_ID (A.TABLE_NAME), 'TableHasIdentity') = 1 AND
A.TABLE_TYPE = 'BASE TABLE'
)
select 'DBCC CHECKIDENT ('''+A.[SCHEMA]+'.'+a.[TABLE]+''', reseed)' command
, ROW_NUMBER() OVER(ORDER BY a.[SCHEMA], a.[TABLE] asc) AS ID
, A.Curr_Value
, a.[TABLE]
into #temp
from cte A
ORDER BY A.[SCHEMA], A.[TABLE]
declare #i int = 1, #count int = (select max(ID) from #temp)
declare #text varchar(max) = ''
select #COUNT= count(1) FROM #temp
WHILE #I <= #COUNT
BEGIN
SET #text = (SELECT command from #temp where ID=#I)
EXEC (#text + ';')
print #text
select Curr_Value OldValue, ident_current([TABLE]) FixValue, [TABLE] from #temp where ID=#I
SET #I = #I + 1
SET #text='';
END
go
maybe someone or something with enough permissions made a mistake by reseeding?
As simple as this:
create table testid (
id int not null identity (1,1) primary key,
data varchar (3)
)
insert into testid (data) values ('abc'),('cde')
DBCC CHECKIDENT ('testid', RESEED, 1)
insert into testid (data) values ('bad')

I am getting Dollar sign unterminated

I want to create a function like below which inserts data as per the input given. But I keep on getting an error about undetermined dollar sign.
CREATE OR REPLACE FUNCTION test_generate
(
ref REFCURSOR,
_id INTEGER
)
RETURNS refcursor AS $$
DECLARE
BEGIN
DROP TABLE IF EXISTS test_1;
CREATE TEMP TABLE test_1
(
id int,
request_id int,
code text
);
IF _id IS NULL THEN
INSERT INTO test_1
SELECT
rd.id,
r.id,
rd.code
FROM
test_2 r
INNER JOIN
raw_table rd
ON
rd.test_2_id = r.id
LEFT JOIN
observe_test o
ON
o.raw_table_id = rd.id
WHERE o.id IS NULL
AND COALESCE(rd.processed, 0) = 0;
ELSE
INSERT INTO test_1
SELECT
rd.id,
r.id,
rd.code
FROM
test_2 r
INNER JOIN
raw_table rd
ON rd.test_2_id = r.id
WHERE r.id = _id;
END IF;
DROP TABLE IF EXISTS tmp_test_2_error;
CREATE TEMP TABLE tmp_test_2_error
(
raw_table_id int,
test_2_id int,
error text,
record_num int
);
INSERT INTO tmp_test_2_error
(
raw_table_id,
test_2_id,
error,
record_num
)
SELECT DISTINCT
test_1.id,
test_1.test_2_id,
'Error found ' || test_1.code,
0
FROM
test_1
WHERE 1 = 1
AND data_origin.id IS NULL;
INSERT INTO tmp_test_2_error
SELECT DISTINCT
test_1.id,
test_1.test_2_id,
'Error found ' || test_1.code,
0
FROM
test_1
INNER JOIN
data_origin
ON
data_origin.code = test_1.code
WHERE dop.id IS NULL;
DROP table IF EXISTS test_latest;
CREATE TEMP TABLE test_latest AS SELECT * FROM observe_test WHERE 1 = 2;
INSERT INTO test_latest
(
raw_table_id,
series_id,
timestamp
)
SELECT
test_1.id,
ds.id AS series_id,
now()
FROM
test_1
INNER JOIN data_origin ON data_origin.code = test_1.code
LEFT JOIN
observe_test o ON o.raw_table_id = test_1.id
WHERE o.id IS NULL;
CREATE TABLE latest_observe_test as Select * from test_latest where 1=0;
INSERT INTO latest_observe_test
(
raw_table_id,
series_id,
timestamp,
time
)
SELECT
t.id,
ds.id AS series_id,
now(),
t.time
FROM
test_latest t
WHERE t.series_id IS DISTINCT FROM observe_test.series_id;
DELETE FROM test_2_error re
USING t
WHERE t.test_2_id = re.test_2_id;
INSERT INTO test_2_error (test_2_id, error, record_num)
SELECT DISTINCT test_2_id, error, record_num FROM tmp_test_2_error ORDER BY error;
UPDATE raw_table AS rd1
SET processed = case WHEN tre.raw_table_id IS null THEN 2 ELSE 1 END
FROM test_1 tr
LEFT JOIN
tmp_test_2_error tre ON tre.raw_table_id = tr.id
WHERE rd1.id = tr.id;
OPEN ref FOR
SELECT 1;
RETURN ref;
OPEN ref for
SELECT o.* from observe_test o
;
RETURN ref;
OPEN ref FOR
SELECT
rd.id,
ds.id AS series_id,
now() AS timestamp,
rd.time
FROM test_2 r
INNER JOIN raw_table rd ON rd.test_2_id = r.id
INNER JOIN data_origin ON data_origin.code = rd.code
WHERE o.id IS NULL AND r.id = _id;
RETURN ref;
END;
$$ LANGUAGE plpgsql VOLATILE COST 100;
I am not able to run this procedure.
Can you please help me where I have done wrong?
I am using squirrel and face the same question as you.
until I found that:
-- Note that if you want to create the function under Squirrel SQL,
-- you must go to Sessions->Session Properties
-- then SQL tab and change the Statement Separator from ';' to something else
-- (for intance //). Otherwise Squirrel SQL sends one piece to the server
-- that stops at the first encountered ';', and the server cannot make
-- sense of it. With the separator changed as suggested, you type everything
-- as above and end with
-- ...
-- end;
-- $$ language plpgsql
-- //
--
-- You can then restore the default separator, or use the new one for
-- all queries ...
--

Postgres find all rows in database tables matching criteria on a given column

I am trying to write sub-queries so that I search all tables for a column named id and since there are multiple tables with id column, I want to add the condition, so that id = 3119093.
My attempt was:
Select *
from information_schema.tables
where id = '3119093' and id IN (
Select table_name
from information_schema.columns
where column_name = 'id' );
This didn't work so I tried:
Select *
from information_schema.tables
where table_name IN (
Select table_name
from information_schema.columns
where column_name = 'id' and 'id' IN (
Select * from table_name where 'id' = 3119093));
This isn't the right way either. Any help would be appreciated. Thanks!
A harder attempt is:
CREATE OR REPLACE FUNCTION search_columns(
needle text,
haystack_tables name[] default '{}',
haystack_schema name[] default '{public}'
)
RETURNS table(schemaname text, tablename text, columnname text, rowctid text)
AS $$
begin
FOR schemaname,tablename,columnname IN
SELECT c.table_schema,c.table_name,c.column_name
FROM information_schema.columns c
JOIN information_schema.tables t ON
(t.table_name=c.table_name AND t.table_schema=c.table_schema)
WHERE (c.table_name=ANY(haystack_tables) OR haystack_tables='{}')
AND c.table_schema=ANY(haystack_schema)
AND t.table_type='BASE TABLE'
--AND c.column_name = "id"
LOOP
EXECUTE format('SELECT ctid FROM %I.%I WHERE cast(%I as text) like %L',
schemaname,
tablename,
columnname,
needle
) INTO rowctid;
IF rowctid is not null THEN
RETURN NEXT;
END IF;
END LOOP;
END;
$$ language plpgsql;
select * from search_columns('%3119093%'::varchar,'{}'::name[]) ;
The only problem is this code displays the table name and column name. I have to then manually enter
Select * from table_name where id = 3119093
where I got the table name from the code above.
I want to automatically implement returning rows from a table but I don't know how to get the table name automatically.
I took the time to make it work for you.
For starters, some information on what is going on inside the code.
Explanation
function takes two input arguments: column name and column value
it requires a created type that it will be returning a set of
first loop identifies tables that have a column name specified as the input argument
then it forms a query which aggregates all rows that match the input condition inside every table taken from step 3 with comparison based on ILIKE - as per your example
function goes into the second loop only if there is at least one row in currently visited table that matches specified condition (then the array is not null)
second loop unnests the array of rows that match the condition and for every element it puts it in the function output with RETURN NEXT rec clause
Notes
Searching with LIKE is inefficient - I suggest adding another input argument "column type" and restrict it in the lookup by adding a join to pg_catalog.pg_type table.
The second loop is there so that if more than 1 row is found for a particular table, then every row gets returned.
If you are looking for something else, like you need key-value pairs, not just the values, then you need to extend the function. You could for example build json format from rows.
Now, to the code.
Test case
CREATE TABLE tbl1 (col1 int, id int); -- does contain values
CREATE TABLE tbl2 (col1 int, col2 int); -- doesn't contain column "id"
CREATE TABLE tbl3 (id int, col5 int); -- doesn't contain values
INSERT INTO tbl1 (col1, id)
VALUES (1, 5), (1, 33), (1, 25);
Table stores data:
postgres=# select * From tbl1;
col1 | id
------+----
1 | 5
1 | 33
1 | 25
(3 rows)
Creating type
CREATE TYPE sometype AS ( schemaname text, tablename text, colname text, entirerow text );
Function code
CREATE OR REPLACE FUNCTION search_tables_for_column (
v_column_name text
, v_column_value text
)
RETURNS SETOF sometype
LANGUAGE plpgsql
STABLE
AS
$$
DECLARE
rec sometype%rowtype;
v_row_array text[];
rec2 record;
arr_el text;
BEGIN
FOR rec IN
SELECT
nam.nspname AS schemaname
, cls.relname AS tablename
, att.attname AS colname
, null::text AS entirerow
FROM
pg_attribute att
JOIN pg_class cls ON att.attrelid = cls.oid
JOIN pg_namespace nam ON cls.relnamespace = nam.oid
WHERE
cls.relkind = 'r'
AND att.attname = v_column_name
LOOP
EXECUTE format('SELECT ARRAY_AGG(row(tablename.*)::text) FROM %I.%I AS tablename WHERE %I::text ILIKE %s',
rec.schemaname, rec.tablename, rec.colname, quote_literal(concat('%',v_column_value,'%'))) INTO v_row_array;
IF v_row_array is not null THEN
FOR rec2 IN
SELECT unnest(v_row_array) AS one_row
LOOP
rec.entirerow := rec2.one_row;
RETURN NEXT rec;
END LOOP;
END IF;
END LOOP;
END
$$;
Exemplary call & output
postgres=# select * from search_tables_for_column('id','5');
schemaname | tablename | colname | entirerow
------------+-----------+---------+-----------
public | tbl1 | id | (1,5)
public | tbl1 | id | (1,25)
(2 rows)

amazon redshift utility - view v_generate_tbl_ddl not returning primary key constraint in ddl after table has been populated

I'm using the amazon redshift utility v_generate_tbl_ddl to generate a table ddl, but have found that after my table has been populated the primary key constraint doesn't show (example below)
Structure table creation:
DROP TABLE IF EXISTS temp_table;
CREATE TABLE temp_table
(
id INTEGER NOT NULL,
text VARCHAR,
PRIMARY KEY (id)
)
DISTSTYLE ALL SORTKEY (id);
Checking the ddl after the table has been created shows the primary key constraint is present:
SELECT *
FROM admin.v_generate_tbl_ddl
WHERE tablename = 'temp_table'
AND schemaname = 'myschema';
Inserting some data:
COPY temp_table
FROM 's3://s3_bucket_location/manifest_file_temp_table.json' CREDENTIALS 'aws_access_key_id=...;aws_secret_access_key=...' DELIMITER AS '|' gzip manifest
Checking again the ddl and now the primary key constraint isn't there
SELECT *
FROM admin.v_generate_tbl_ddl
WHERE tablename = 'temp_table'
AND schemaname = 'myschema';
I'm sure the primary key is still there as I've doubled checked using:
SELECT pg_table_def.tablename,
pg_table_def.column
FROM pg_table_def
WHERE schemaname = 'myschema'
AND tablename = 'temp_table_pkey';
Digging into the sql of v_generate_tbl_ddl it is this section that isn't returning rows after the table has been populated, but I can't figure out why this stops returning rows? Any help here would be appreciated
SELECT n.nspname AS schemaname,
c.relname AS tablename,
200000000 + CAST(con.oid AS INT) AS seq,
('\t,' + pg_get_constraintdef(con.oid))::CHARACTER VARYING (500) AS ddl
FROM pg_constraint AS con
INNER JOIN pg_class AS c
ON c.relnamespace = con.connamespace
AND c.relfilenode = con.conrelid
INNER JOIN pg_namespace AS n ON n.oid = c.relnamespace
WHERE c.relkind = 'r'
AND pg_get_constraintdef (con.oid) NOT LIKE 'FOREIGN KEY%'
AND c.relname = 'temp_table'
ORDER BY seq

tsearch2 add resultset to index

How can I add a resultset (more than one entry) to a tsvector? I use postgres 8.3.
I have an m-n relationship and I'd like to have all values from one column of the n-side in the tsvector of the m-side.
This statement will work if I have an limit to one the subselect. But not without the limit.
UPDATE mytable
SET mytsvector=to_tsvector('english',
coalesce(column_a, '') ||' '||
coalesce((SELECT item FROM other_table WHERE id = other_id LIMIT 1), '')
)
ERROR: more than one row returned by a subquery used as an expression
Under Postgres 8.3 at first I have to create an aggregate function to generate an array from an select.
CREATE AGGREGATE array_accum (
sfunc = array_append,
basetype = anyelement,
stype = anyarray,
initcond = '{}'
);
Since 8.4 there is the function array_agg().
The hole statement looks like this:
UPDATE mytable
SET mytsvector=to_tsvector('english',
coalesce(column_a, '') ||' '||
coalesce(
(SELECT array_to_string(array_accum(item), ' ')
FROM mytable m, other_table o
WHERE o.id = m.other_id AND m.id = id GROUP BY m.id), '')
)