List columns with indexes in Amazon Redshift - postgresql

I need to query Redshift metadata to get a list of table columns that includes information whether the column is part of primary key or not.
There is a post already List columns with indexes in PostgreSQL that has an answer for PostgreSQL, however unfortunately, it fails on Redshift with "ERROR: 42809: op ANY/ALL (array) requires array on right side"

I figured out how to do it with the help of this https://bitbucket.org/zzzeek/sqlalchemy/pull-request/6/sqlalchemy-to-support-postgresql-80/diff
SELECT attname column_name, attnotnull,
format_type(atttypid, atttypmod) as column_type, atttypmod,
i.indisprimary as primary_key,
col_description(attrelid, attnum) as description
FROM pg_attribute c
LEFT OUTER JOIN pg_index i
ON c.attrelid = i.indrelid AND i.indisprimary AND
c.attnum = ANY(string_to_array(textin(int2vectorout(i.indkey)), ' '))
where c.attnum > 0 AND NOT c.attisdropped AND c.attrelid = :tableOid
order by attnum

The following worked for me:
SELECT n.nspname as schema_name,
t.relname as table_name,
i.relname as index_name,
c.contype as index_type,
a.attname as column_name,
a.attnum AS column_position
FROM pg_class t
INNER JOIN pg_index AS ix ON t.oid = ix.indrelid
INNER JOIN pg_constraint AS c ON ix.indrelid = c.conrelid
INNER JOIN pg_class AS i ON i.oid = ix.indexrelid
INNER JOIN pg_attribute AS a ON a.attrelid = t.oid
AND a.attnum= ANY(string_to_array(textin(int2vectorout(ix.indkey)),' ')::int[])
INNER JOIN pg_namespace AS n ON n.oid = t.relnamespace;

You can leverage the table DDL view AWS published a few months ago (https://github.com/awslabs/amazon-redshift-utils/blob/master/src/AdminViews/v_generate_tbl_ddl.sql) by picking out the constraint component and parsing out the key columns:
select schemaname,tablename, substring(ddl,charindex('(',ddl)+1, charindex(')',ddl)-1-charindex('(',ddl))
from
(
SELECT
n.nspname AS schemaname
,c.relname AS tablename
,200000000 + CAST(con.oid AS INT) AS seq
,'\t,' + pg_get_constraintdef(con.oid) AS ddl
FROM
pg_constraint AS con
INNER JOIN pg_class AS c
ON c.relnamespace = con.connamespace
AND c.relfilenode = con.conrelid
INNER JOIN pg_namespace AS n
ON n.oid = c.relnamespace
WHERE c.relkind = 'r'
ORDER BY seq
)
Note that this query also gives you foreign key columns. It's easy enough to filter those out by appending the query with
where ddl like '%PRIMARY KEY%'

Use below query:
select * from pg_table_def where tablename = 'mytablename'
This will give you all columns for table along with their data type , encoding and if it has sort key or dist key.

Related

Get information about table partitions

I have a partitioned table out of main table using range.
CREATE TABLE public.partition1 PARTITION OF public.maintable
FOR VALUES FROM ('2017-01-01 00:00:00') TO ('2050-01-01 00:00:00')
How can i get the Values range information using a query to postgres.
I have used a query that at least gives me information for the main and partitioned tables, but i cannot seem to find a way to access the value range
FROM ('2017-01-01 00:00:00') TO ('2050-01-01 00:00:00')
assigned to table partition1
Query used to get partition table information
WITH RECURSIVE partition_info
(relid,
relname,
relsize,
relispartition,
relkind) AS
(
(SELECT oid AS relid,
relname,
pg_relation_size(oid) AS relsize,
relispartition,
relkind
FROM pg_catalog.pg_class
WHERE relname = 'completedorders' AND
relkind = 'p')
UNION ALL
(SELECT
c.oid AS relid,
c.relname AS relname,
pg_relation_size(c.oid) AS relsize,
c.relispartition AS relispartition,
c.relkind AS relkind
FROM partition_info AS p,
pg_catalog.pg_inherits AS i,
pg_catalog.pg_class AS c
WHERE p.relid = i.inhparent AND
c.oid = i.inhrelid AND
c.relispartition = true)
)
SELECT * FROM partition_info;
The following query provides the information about partitions as well. From there on its just string manipulation in order to get further information.
Note: you will have to change the name of the table in the query.
with recursive inh as (
select i.inhrelid, null::text as parent
from pg_catalog.pg_inherits i
join pg_catalog.pg_class cl on i.inhparent = cl.oid
join pg_catalog.pg_namespace nsp on cl.relnamespace = nsp.oid
where nsp.nspname = 'public' ---<< change table schema here
and cl.relname = 'tablename' ---<< change table name here
union all
select i.inhrelid, (i.inhparent::regclass)::text
from inh
join pg_catalog.pg_inherits i on (inh.inhrelid = i.inhparent)
)
select c.relname as partition_name,
n.nspname as partition_schema,
pg_get_expr(c.relpartbound, c.oid, true) as partition_expression,
pg_get_expr(p.partexprs, c.oid, true) as sub_partition,
parent,
case p.partstrat
when 'l' then 'LIST'
when 'r' then 'RANGE'
end as sub_partition_strategy
from inh
join pg_catalog.pg_class c on inh.inhrelid = c.oid
join pg_catalog.pg_namespace n on c.relnamespace = n.oid
left join pg_partitioned_table p on p.partrelid = c.oid
order by n.nspname, c.relname

redshift - how to use listagg with information_schema.columns table

I'm using redshift and would like to create a comma separated list of columns. I'm trying to grab the column names from information schema using listagg:
SELECT
listagg(column_name,',') within group (order by ordinal_position)
FROM information_schema.columns
WHERE table_schema = 'my_schema'
AND table_name = 'my table';
I'm getting the following error:
[Amazon](500310) Invalid operation: Function (listagg(text,text)) must be applied on at least one user created tables;
Here is a work around I tested..
select listagg(column_name, ', ') within group (order by column_name)
from
(
select
a.attname::varchar as column_name, typname::varchar as data_type
from
pg_type t,
pg_attribute a,
pg_class c,
pg_namespace ns,
(select top 1 1 from my_schema.my_table)
where
t.oid=a.atttypid
and a.attrelid = c.oid
and c.relnamespace = ns.oid
and typname NOT IN ('oid','xid','tid','cid')
and attname not in ('deletexid', 'insertxid')
and trim(relname) = 'my_table'
and ns.nspname = 'my_schema'
)
Although this does not answer how to apply listagg on information_schema, I can recommend an alternative method of using listagg on the pg catalog tables instead.
Try this:
SELECT DISTINCT listagg(attname, ',') WITHIN GROUP (ORDER BY a.attsortkeyord) AS "columns"
FROM pg_attribute a, pg_namespace ns, pg_class c, pg_type t, stv_tbl_perm p, pg_database db
WHERE t.oid=a.atttypid AND a.attrelid=p.id AND ns.oid = c.relnamespace AND db.oid = p.db_id AND c.oid = a.attrelid
AND typname NOT IN ('oid','xid','tid','cid')
AND ns.nspname = 'my_schema'
AND RTRIM(name) = 'my table'

How to list all tables and their creators (or owners) in Redshift

I thought it is straightforward but I couldn't find a way to list all tables and their creators (or owners) in Redshift. Any help/insight is welcome.
It was pg_tables table and here is the SQL:
select tablename, tableowner From pg_tables
You can list Redshift tables, views and their owners by running this script:
SELECT n.nspname AS schema_name
, pg_get_userbyid(c.relowner) AS table_owner
, c.relname AS table_name
, CASE WHEN c.relkind = 'v' THEN 'view' ELSE 'table' END
AS table_type
, d.description AS table_description
FROM pg_class As c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
LEFT JOIN pg_tablespace t ON t.oid = c.reltablespace
LEFT JOIN pg_description As d
ON (d.objoid = c.oid AND d.objsubid = 0)
WHERE c.relkind IN('r', 'v')
ORDER BY n.nspname, c.relname ;

Using default sql tables to find columns present in a table

I have a postgresql database set up with clients data split by schema.
Within sql I would like to identify for the table products which columns containing the string "id" exist
I'm trying to use the pg_... tables to identify these columns, but the below query appears to be bringing back results from across schemas despite the restriction on table_schema
SELECT *
FROM pg_class c
INNER JOIN pg_attribute a ON a.attrelid = c.oid
INNER JOIN pg_type t ON a.atttypid = t.oid
INNER JOIN information_schema.tables sch ON c.relname = sch.table_name
WHERE c.relname = 'products'
AND a.attnum > 0
AND a.attname LIKE '%id%'
AND table_schema = 'schema001'
I guess the schemas could be set up incorrectly or the where clause is incorrect - any help would be appreciated
Use pg_namespace and drop the information_schema:
SELECT nspname, relname, attname
FROM pg_class c
INNER JOIN pg_namespace n ON n.oid = c.relnamespace
INNER JOIN pg_attribute a ON a.attrelid = c.oid
INNER JOIN pg_type t ON a.atttypid = t.oid
WHERE c.relname = 'products'
AND a.attnum > 0
AND a.attname LIKE '%id%'
AND n.nspname = 'schema001';

List all index names, column names and its table name of a PostgreSQL database

What is the query to get the list all index names, its column name and its table name of a postgresql database?
I have tried to get the list of all indexes in a db by using this query but how to get the list of indexes, its column names and its table names?
SELECT *
FROM pg_class, pg_index
WHERE pg_class.oid = pg_index.indexrelid
AND pg_class.oid IN (
SELECT indexrelid
FROM pg_index, pg_class
WHERE pg_class.oid=pg_index.indrelid
AND indisunique != 't'
AND indisprimary != 't'
AND relname !~ '^pg_');
This will output all indexes with details (extracted from my view definitions):
SELECT i.relname as indname,
i.relowner as indowner,
idx.indrelid::regclass,
am.amname as indam,
idx.indkey,
ARRAY(
SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
FROM generate_subscripts(idx.indkey, 1) as k
ORDER BY k
) as indkey_names,
idx.indexprs IS NOT NULL as indexprs,
idx.indpred IS NOT NULL as indpred
FROM pg_index as idx
JOIN pg_class as i
ON i.oid = idx.indexrelid
JOIN pg_am as am
ON i.relam = am.oid;
Optionally add an extra join to the end so as to trim the namespaces:
SELECT i.relname as indname,
i.relowner as indowner,
idx.indrelid::regclass,
am.amname as indam,
idx.indkey,
ARRAY(
SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
FROM generate_subscripts(idx.indkey, 1) as k
ORDER BY k
) as indkey_names,
idx.indexprs IS NOT NULL as indexprs,
idx.indpred IS NOT NULL as indpred
FROM pg_index as idx
JOIN pg_class as i
ON i.oid = idx.indexrelid
JOIN pg_am as am
ON i.relam = am.oid
JOIN pg_namespace as ns
ON ns.oid = i.relnamespace
AND ns.nspname = ANY(current_schemas(false));
More human friendly version of #Denis solution:
SELECT
U.usename AS user_name,
ns.nspname AS schema_name,
idx.indrelid :: REGCLASS AS table_name,
i.relname AS index_name,
idx.indisunique AS is_unique,
idx.indisprimary AS is_primary,
am.amname AS index_type,
idx.indkey,
ARRAY(
SELECT pg_get_indexdef(idx.indexrelid, k + 1, TRUE)
FROM
generate_subscripts(idx.indkey, 1) AS k
ORDER BY k
) AS index_keys,
(idx.indexprs IS NOT NULL) OR (idx.indkey::int[] #> array[0]) AS is_functional,
idx.indpred IS NOT NULL AS is_partial
FROM pg_index AS idx
JOIN pg_class AS i
ON i.oid = idx.indexrelid
JOIN pg_am AS am
ON i.relam = am.oid
JOIN pg_namespace AS NS ON i.relnamespace = NS.OID
JOIN pg_user AS U ON i.relowner = U.usesysid
WHERE NOT nspname LIKE 'pg%'; -- Excluding system tables
The Query to list all the indexes of a database
SELECT
tablename,
indexes [1],
indexes [2],
indexes [3],
indexes [4],
indexes [5],
indexes [6],
indexes [7],
indexes [8],
indexes [9],
indexes [10]
FROM (SELECT
tablename,
array_agg(indexname) AS indexes
FROM pg_indexes
WHERE schemaname = 'public'
GROUP BY tablename) as sub;
Here's a version that simplifies things compared to other answers by
avoiding nested selects
avoiding built-in functions (maybe hard to remember)
using LATERAL and UNNEST(...) WITH ORDINALITY features available in later PostgreSQL versions (9.4+)
SELECT
tnsp.nspname AS schema_name,
trel.relname AS table_name,
irel.relname AS index_name,
array_agg (
a.attname
|| ' ' || CASE o.option & 1 WHEN 1 THEN 'DESC' ELSE 'ASC' END
|| ' ' || CASE o.option & 2 WHEN 2 THEN 'NULLS FIRST' ELSE 'NULLS LAST' END
ORDER BY c.ordinality
) AS columns
FROM pg_index AS i
JOIN pg_class AS trel ON trel.oid = i.indrelid
JOIN pg_namespace AS tnsp ON trel.relnamespace = tnsp.oid
JOIN pg_class AS irel ON irel.oid = i.indexrelid
CROSS JOIN LATERAL unnest (i.indkey) WITH ORDINALITY AS c (colnum, ordinality)
LEFT JOIN LATERAL unnest (i.indoption) WITH ORDINALITY AS o (option, ordinality)
ON c.ordinality = o.ordinality
JOIN pg_attribute AS a ON trel.oid = a.attrelid AND a.attnum = c.colnum
GROUP BY tnsp.nspname, trel.relname, irel.relname
If you are also interested in index size, you may use this query from the PostgreSQL Wiki.
SELECT
t.tablename,
indexname,
c.reltuples AS num_rows,
pg_size_pretty(pg_relation_size(quote_ident(t.tablename)::text)) AS table_size,
pg_size_pretty(pg_relation_size(quote_ident(indexrelname)::text)) AS index_size,
CASE WHEN indisunique THEN 'Y'
ELSE 'N'
END AS UNIQUE,
idx_scan AS number_of_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched
FROM pg_tables t
LEFT OUTER JOIN pg_class c ON t.tablename=c.relname
LEFT OUTER JOIN
( SELECT c.relname AS ctablename, ipg.relname AS indexname, x.indnatts AS number_of_columns, idx_scan, idx_tup_read, idx_tup_fetch, indexrelname, indisunique FROM pg_index x
JOIN pg_class c ON c.oid = x.indrelid
JOIN pg_class ipg ON ipg.oid = x.indexrelid
JOIN pg_stat_all_indexes psai ON x.indexrelid = psai.indexrelid )
AS foo
ON t.tablename = foo.ctablename
WHERE t.schemaname='public'
ORDER BY 1,2;
For Non-Composite Indexes
select t.relname,i.relname ,
STRING_AGG(pga.attname||'', ','order by i.relname,pga.attnum) as columnName
from pg_class t inner join pg_index ix
on t.oid = ix.indrelid
inner join pg_class i
on i.oid = ix.indexrelid
inner join pg_attribute pga
on
pga.attrelid = i.oid
inner join pg_indexes pgidx
on pgidx.indexname=i.relname
where
t.relkind = 'r'
and pgidx.schemaname='asit_cm'
and t.relname ='accessory'
group by t.relname,i.relname having count(*)=1
For Composite Indexes
select t.relname,i.relname ,
STRING_AGG(pga.attname||'', ','order by i.relname,pga.attnum) as columnName
from pg_class t inner join pg_index ix
on t.oid = ix.indrelid
inner join pg_class i
on i.oid = ix.indexrelid
inner join pg_attribute pga
on
pga.attrelid = i.oid
inner join pg_indexes pgidx
on pgidx.indexname=i.relname
where
t.relkind = 'r'
and pgidx.schemaname='asit_cm'
and t.relname ='accessory'
group by t.relname,i.relname having count(*)=1