How to check disk-usage in specific schema and sum for all schema? pg9+ - postgresql

The documentation about disk-usage have no "fast and simple" clue or information... So is faster to check here.
When I try SELECT pg_relation_filepath(oid), relpages FROM pg_class WHERE relname = 'big' it is working fine... but "big" is not a "public.big", it is a "othername.big", if there are public also, will be ambiguous.
When I try SELECT pg_relation_filepath(oid), relpages FROM pg_class WHERE relname = 'othername.big' not works.
I need to compare disk-usage of tables and to check (or sum) all schema disk-usage.

To get Schema size :
SELECT schemaname, pg_size_pretty(t.taille::bigint) AS taille_table, pg_size_pretty(t.taille_totale::bigint) AS taille_totale_table
FROM (SELECT schemaname,
sum(pg_relation_size(schemaname || '.' || tablename)) AS taille,
sum(pg_total_relation_size(schemaname || '.' || tablename)) AS taille_totale
FROM pg_tables
WHERE relname_exists(tablename,schemaname) -- see note
GROUP BY schemaname) as t ORDER BY taille_totale DESC;
And for Tables by Schema, you can do this :
SELECT schemaname, tablename, tablespace, pg_size_pretty(taille) AS taille_table, pg_size_pretty(taille_totale) AS taille_totale_table
FROM (SELECT *,
pg_relation_size(schemaname || '.' || tablename) AS taille,
pg_total_relation_size(schemaname || '.' || tablename) AS taille_totale
FROM pg_tables) AS tables
WHERE relname_exists(tablename,schemaname) -- see note
ORDER BY taille_totale DESC;
NOTE: to avoid "ERROR: relation 'x.y' does not exist", needs to guard the string before use it, so
use to_regclass(rel_name) in Postgres 9.4+... See https://stackoverflow.com/a/24089729
or use a generic (see "SwissKnife libraries") function for any pg version, as below
CREATE or replace FUNCTION relname_exists(text,text default NULL) RETURNS boolean AS $$
SELECT EXISTS (
SELECT 1
FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace,
regexp_split_to_array($1,'\.') t(x) -- not work with quoted names
WHERE CASE
WHEN COALESCE(x[2],'')>'' THEN n.nspname = x[1] AND c.relname = x[2]
WHEN $2 IS NULL THEN n.nspname = 'public' AND c.relname = $1
ELSE n.nspname = $2 AND c.relname = $1
END
)
$$ language SQL IMMUTABLE;

Using pg_class and simplifying layout
Same results as Hervé's queries (that used pg_tables)... Was adapted from https://wiki.postgresql.org/wiki/Disk_Usage
-- -- -- -- --
-- DISK-USAGE
CREATE VIEW pgvw_class_usage AS
SELECT *, pg_size_pretty(table_bytes) AS table_size
FROM (
SELECT nspname , relname, total_bytes
, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes
FROM (
SELECT nspname , relname
, pg_total_relation_size(c.oid) AS total_bytes
, pg_indexes_size(c.oid) AS index_bytes
, pg_total_relation_size(reltoastrelid) AS toast_bytes
FROM pg_class c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE relkind = 'r'
) a
) t
ORDER BY 1,2
; -- eg. SELECT * FROM pgvw_class_usage WHERE relname='foo' AND nspname='bar';
CREATE VIEW pgvw_nsclass_usage AS
SELECT *, pg_size_pretty(table_bytes) as table_size
FROM (
SELECT nspname, count(*) as n_tables,
sum(total_bytes) as total_bytes, sum(table_bytes) as table_bytes
FROM pgvw_class_usage
GROUP BY nspname
) t
; -- eg. SELECT * FROM pgvw_nsclass_usage WHERE nspname='bar';
EXAMPLE:
nspname | n_tables | total_bytes | table_bytes | table_size
--------------------+----------+-------------+-------------+------------
bench1 | 8 | 4718592 | 3825664 | 3736 kB
dataset | 4 | 8552448 | 6225920 | 6080 kB
information_schema | 7 | 352256 | 294912 | 288 kB
pg_catalog | 54 | 9003008 | 4734976 | 4624 kB
(4 rows)

Related

Get all sequences with current values

I have the following query that gets all sequences and their schemas:
SELECT sequence_schema as schema, sequence_name as sequence
FROM information_schema.sequences
WHERE sequence_schema NOT IN ('topology', 'tiger')
ORDER BY 1, 2
I would like to get the current value of each sequence name with something like select last_value from [sequence];. I have tried the following (and a couple variations), but it doesn't work because the syntax isn't correct:
DO $$
BEGIN
EXECUTE
sequence_schema as schema,
sequence_name as sequence,
last_value
FROM information_schema.sequences
LEFT JOIN (
EXECUTE 'SELECT last_value FROM ' || schema || '.' || sequence
) tmp
ORDER BY 1, 2;
END
$$;
I've found some solutions that create functions to execute text or piece together a query inside a function and return the result, but I would prefer to have a single query that I can run and modify however I like.
In Postgres 12, you can use pg_sequences:
select schemaname as schema,
sequencename as sequence,
last_value
from pg_sequences
You can rely on the function pg_sequence_last_value
SELECT nspname as schema,
relname AS sequence_name,
coalesce(pg_sequence_last_value(s.oid), 0) AS seq_last_value
FROM pg_class AS s
JOIN pg_depend AS d ON d.objid = s.oid
JOIN pg_attribute a ON d.refobjid = a.attrelid
AND d.refobjsubid = a.attnum
JOIN pg_namespace nsp ON s.relnamespace = nsp.oid
WHERE s.relkind = 'S'
AND d.refclassid = 'pg_class'::regclass
AND d.classid = 'pg_class'::regclass
AND nspname NOT IN ('topology', 'tiger')
ORDER BY 1,2 DESC;
Here's a solution that doesn't rely on pg_sequences or pg_sequence_last_value:
CREATE OR REPLACE FUNCTION get_sequences()
RETURNS TABLE (
last_value bigint,
sequence_schema text,
sequence_name text
)
LANGUAGE plpgsql AS
$func$
DECLARE
s RECORD;
BEGIN
FOR s IN SELECT t.sequence_schema, t.sequence_name
FROM information_schema.sequences t
LOOP
RETURN QUERY EXECUTE format(
'SELECT last_value, ''%1$s''::text, ''%2$s''::text FROM %1$I.%2$I',
s.sequence_schema,
s.sequence_name
);
END LOOP;
END;
$func$;
SELECT * FROM get_sequences();
That'll output a table like this:
last_value | sequence_schema | sequence_name
------------+-----------------+-------------------------------------------------------
1 | public | contact_infos_id_seq
1 | media | photos_id_seq
2006 | company | companies_id_seq
2505 | public | houses_id_seq
1 | public | purchase_numbers_id_seq
... etc
The other answers will only work if you are on a modern version of Postgres (I believe 10 or greater).

Table bloat on Postgres

I have a small (~200GB) data warehouse running on Postgres 9.5.15 on AWS RDS instance.
For robustness, I'm inserting new data into analytical schema (result of ELT) as follows:
insert new slice
remove the old slice using delete command
vacuum
I know Postgres soft deletes tuples when you execute delete or update commands. This is not a concern giving the table sizes. The problem is that the dead tuples are not removed with explicit vacuum on (3) or regular autovacuum. So, if the pipeline is executed many times, I end up with HUGE table bloat that affects table performance a lot, not mentioning extra storage wasted.
Moreover, when I started investigating I found out that even system tables have this issue:
schemaname | relname | n_live_tup | n_dead_tup | ratio%
pg_catalog | pg_attribute | 46081 | 8339587 | 18097
pg_catalog | pg_depend | 27375 | 2490507 | 9097
pg_catalog | pg_statistic20094 | 1208474 | 6013
That might make general performance of the instance worse in ways I can't even imagine. When I try to do VACUUM FULL VERBOSE pg_catalog.pg_attribute it gives me this:
"pg_attribute": found 0 removable, 8387117 nonremovable row versions in 152494 pages
I have read the articles like "3 reasons of table bloat" but neither does not apply (I'm not doing replication, I don't have hanging transactions, etc.). I can use something like pg_repack to get rid of the bloat at some schedule but I'd like to understand the reason why it happens. Also I don't want to repack system tables for sure.
My only hypothesis is that vacuum requires all the dead tuples to fit in memory that is limited to maintenance_work_mem setting (127MB for our instance) and we need to increase that but I need a side opinion first.
I ended up writing my own functions to repack the data and running them on schedule:
-- repack an individual table
CREATE OR REPLACE FUNCTION admin.repack_table(text)
RETURNS text
AS $$
DECLARE SQL text;
BEGIN
SELECT
'CREATE TEMP TABLE t1 (LIKE '||$1||');'||chr(10)||
'INSERT INTO t1 SELECT * FROM '||$1||';'||chr(10)||
'TRUNCATE TABLE '||$1||';'||chr(10)||
'INSERT INTO '||$1||' SELECT * FROM t1;'||chr(10)||
'DROP TABLE t1;'||chr(10)||
'ANALYZE '||$1||';'
INTO SQL;
EXECUTE SQL;
RETURN $1;
END;
$$ LANGUAGE plpgsql;
-- repack all tables in certain schema (with an optional threshold for N of dead tuples)
CREATE OR REPLACE FUNCTION admin.repack_schema(text,int default 5000)
RETURNS table (table_name text)
AS $$
DECLARE SQL text;
BEGIN
RETURN QUERY (
with
schema as (select $1)
select admin.repack_table(t.table_schema||'.'||t.table_name)
from information_schema.tables t
where t.table_schema=(select * from schema)
and t.table_name in (
select relname
from pg_stat_all_tables
where schemaname=(select * from schema)
and n_dead_tup>$2
and n_live_tup<1000000 -- avoid repacking too large tables
)
);
END;
$$ LANGUAGE plpgsql;
Bloat Score Query
The following SQL query will examine each table in the public schema and identify dead rows (tuples) that are wasting disk space.
SELECT schemaname || '.' || relname as tblnam,
n_dead_tup,
(n_dead_tup::float / n_live_tup::float) * 100 as pfrag
FROM pg_stat_user_tables
WHERE schemaname = 'public' and n_dead_tup > 0 and n_live_tup > 0 order by pfrag desc;
If this query returns a high percentage ( pfrag ) of dead tuples, the VACUUM command may be used to reclaim space.
7 Considered to be high
From wiki.postgres.org
SELECT
current_database(), schemaname, tablename, /*reltuples::bigint, relpages::bigint, otta,*/
ROUND((CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages::float/otta END)::numeric,1) AS tbloat,
CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END AS wastedbytes,
iname, /*ituples::bigint, ipages::bigint, iotta,*/
ROUND((CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages::float/iotta END)::numeric,1) AS ibloat,
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes
FROM (
SELECT
schemaname, tablename, cc.reltuples, cc.relpages, bs,
CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM (
SELECT
ma,bs,schemaname,tablename,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
schemaname, tablename, hdr, ma, bs,
SUM((1-null_frac)*avg_width) AS datawidth,
MAX(null_frac) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, (
SELECT
(SELECT current_setting('block_size')::numeric) AS bs,
CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
FROM (SELECT version() AS v) AS foo
) AS constants
GROUP BY 1,2,3,4,5
) AS foo
) AS rs
JOIN pg_class cc ON cc.relname = rs.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml
ORDER BY wastedbytes DESC
Human readable wasted disk space
WITH constants AS (
SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 4 AS ma
), bloat_info AS (
SELECT
ma,bs,schemaname,tablename,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
schemaname, tablename, hdr, ma, bs,
SUM((1-null_frac)*avg_width) AS datawidth,
MAX(null_frac) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, constants
GROUP BY 1,2,3,4,5
) AS foo
), table_bloat AS (
SELECT
schemaname, tablename, cc.relpages, bs,
CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta
FROM bloat_info
JOIN pg_class cc ON cc.relname = bloat_info.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
), index_bloat AS (
SELECT
schemaname, tablename, bs,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM bloat_info
JOIN pg_class cc ON cc.relname = bloat_info.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
JOIN pg_index i ON indrelid = cc.oid
JOIN pg_class c2 ON c2.oid = i.indexrelid
)
SELECT
type, schemaname, object_name, bloat, pg_size_pretty(raw_waste) as waste
FROM
(SELECT
'table' as type,
schemaname,
tablename as object_name,
ROUND(CASE WHEN otta=0 THEN 0.0 ELSE table_bloat.relpages/otta::numeric END,1) AS bloat,
CASE WHEN relpages < otta THEN '0' ELSE (bs*(table_bloat.relpages-otta)::bigint)::bigint END AS raw_waste
FROM
table_bloat
UNION
SELECT
'index' as type,
schemaname,
tablename || '::' || iname as object_name,
ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS bloat,
CASE WHEN ipages < iotta THEN '0' ELSE (bs*(ipages-iotta))::bigint END AS raw_waste
FROM
index_bloat) bloat_summary
ORDER BY raw_waste DESC, bloat DESC

"Show Create Table" in Redshift [duplicate]

Team,
I am working on redshift ( 8.0.2 ). I would like to have DDL command in place for any object type ( table / view...) in redshift.
I have below one. but it is not giving the full text.
select s.userid,u.usename,s.starttime, s.type, rtrim(s.text) from svl_statementtext s, pg_user u
where u.usesysid = s.userid
and s.type = 'DDL'
and s.text like '%table11%'
order by s.starttime asc;
userid | usename | starttime | type | text
--------+----------------------------------------------------------------------------------------------------------------------------------+----------------------------+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
143 | user11 | 2014-04-16 23:42:06.227296 | DDL | CREATE TABLE table11 ( log_time date, user_name text, database_name text, process_id integer, connection_from text, session_id text, session_line_num bigint, command_tag text, session_start_time
143 | user11 | 2014-04-16 23:42:06.234987 | DDL | CREATE TABLE table11 ( log_time date, user_name text, database_name text, process_id integer, connection_from text, session_id text, session_line_num bigint, command_tag text, session_start_time
(2 rows)
in Oracle we have DDL_METADATA.GET_DDL pkg. it will give the full text. I would like to have the same. I tried with STL_DDLTEXT. text is much trimmed.
select xid, starttime, sequence, substring(text,1,40) as text
from stl_ddltext where userid = 100 and text like '%table11%' order by xid desc, sequence;
xid | starttime | sequence | text
--------+----------------------------+----------+------------------------------------------
135475 | 2014-04-16 23:42:06.234987 | 0 | CREATE TABLE table11 ( log_time dat
135475 | 2014-04-16 23:42:06.227296 | 0 | CREATE TABLE table11 ( log_time dat
(2 rows)
I have few more doubts on the first query output. the column lenght of "usename" is too high. how to trim that. If i query pg_user, it is trimmed internally. IN oracle we can have for e.g.
" col <col_name> for a80 "
second doubt: i am getting 2 rows. actually i created only one table. Any reason for 2 rows in the output ?
for e.g. in physical postgre db, if we want to generate any ddl for one function,
we can use below.
in the below, function name is "add"
SELECT pg_catalog.pg_get_functiondef('add'::regproc);
like this, do we have any pkg in Redshift for table/views ?
Thanks
For DDL:
First create the admin view here: https://github.com/awslabs/amazon-redshift-utils/blob/master/src/AdminViews/v_generate_tbl_ddl.sql
Next write a query like this:
select ddl
from admin.v_generate_tbl_ddl
where schemaname = 'some_schema' and tablename='some_table'
order by seq asc
I have not found a single function in Redshift that provides this functionality. You can get the full definition of views by using the pg_get_viewdef function:
SELECT 'create view '|| nc.nspname::information_schema.sql_identifier ||'.'|| c.relname::information_schema.sql_identifier ||' as '||
pg_get_viewdef(c.oid)::information_schema.character_data AS view_definition
FROM pg_namespace nc, pg_class c, pg_user u
WHERE c.relnamespace = nc.oid AND u.usesysid = c.relowner AND c.relkind = 'v'::"char"
AND nc.nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema');
For table definitions I've put together a query, but it still needs a little work to fill in some details as noted in the commented lines:
select tm.schemaname||'.'||tm.tablename, 'create table '||tm.schemaname||'.'||tm.tablename
||' ('
||cp.coldef
-- primary key
-- diststyle
-- dist key
||d.distkey
--sort key
|| (select
' sortkey(' ||substr(array_to_string(
array( select ','||cast(column_name as varchar(100)) as str from
(select column_name from information_schema.columns col where col.table_schema= tm.schemaname and col.table_name=tm.tablename) c2
join
(-- gives sort cols
select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute pa where
pa.attnum > 0 AND NOT pa.attisdropped AND pa.attsortkeyord > 0
) st on tm.tableid=st.tableid and c2.column_name=st.colname order by sort_col_order
)
,'')
,2,10000) || ')'
)
||';'
from
-- t master table list
(
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace
AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
AND c.relname <> 'temp_staging_tables_1'
-- and c.relname in ('f_recipient_registration','ht_base_document','ht_folder','ht_logical_file','ht_transaction_addresses','ht_ysi_batch','ht_ysi_batch_messages','ht_ysi_files')
) tm
-- cp creates the col params for the create string
join
(select
substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)) as tableid
,substr(replace(replace(str,'ZZZ',''),'QQQ'||substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)),''),2,10000) as coldef
from
( select array_to_string(array(
SELECT 'QQQ'||cast(t.tableid as varchar(10))||'ZZZ'|| ','||column_name||' '|| decode(udt_name,'bpchar','char',udt_name) || decode(character_maximum_length,null,'', '('||cast(character_maximum_length as varchar(9))||')' )
-- default
|| decode(substr(column_default,2,8),'identity','',null,'',' default '||column_default||' ')
-- nullable
|| decode(is_nullable,'YES',' NULL ','NO',' NOT NULL ')
-- identity
|| decode(substr(column_default,2,8),'identity',' identity('||substr(column_default,(charindex('''',column_default)+1), (length(column_default)-charindex('''',reverse(column_default))-charindex('''',column_default) ) ) ||') ', '') as str
from
-- ci all the col info
(
select cast(t.tableid as int), cast(table_schema as varchar(100)), cast(table_name as varchar(100)), cast(column_name as varchar(100)),
cast(ordinal_position as int), cast(column_default as varchar(100)), cast(is_nullable as varchar(20)) , cast(udt_name as varchar(50)) ,cast(character_maximum_length as int),
sort_col_order , decode(d.colname,null,0,1) dist_key
from (select * from information_schema.columns c where c.table_schema= t.schemaname and c.table_name=t.tablename) c
left join
(-- gives sort cols
select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute a where
a.attnum > 0 AND NOT a.attisdropped AND a.attsortkeyord > 0
) s on t.tableid=s.tableid and c.column_name=s.colname
left join
-- gives dist col
(select attrelid as tableid, attname as colname from pg_attribute a where
a.attnum > 0 AND NOT a.attisdropped AND a.attisdistkey = 't'
) d on t.tableid=d.tableid and c.column_name=d.colname
order by ordinal_position
) ci
-- for the working array funct
), '') as str
from
(-- need tableid
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace
AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
) t
-- for the agg functions that dont exist
-- ) group by table_schema, table_name
)) cp on tm.tableid=cp.tableid
-- add in primary key query here
-- dist key
left join
( select
-- close off the col defs after the primary key
')' ||
' distkey('|| cast(column_name as varchar(100)) ||')' as distkey, t.tableid
from information_schema.columns c
join
(-- need tableid
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace
AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
) t on c.table_schema= t.schemaname and c.table_name=t.tablename
join
-- gives dist col
(select attrelid as tableid, attname as colname from pg_attribute a where
a.attnum > 0 AND NOT a.attisdropped AND a.attisdistkey = 't'
) d on t.tableid=d.tableid and c.column_name=d.colname
) d on tm.tableid=d.tableid

Postgresql size of tables (bytes) based on a column in the table [duplicate]

I'm coming to Postgres from Oracle and looking for a way to find the table and index size in terms of bytes/MB/GB/etc, or even better the size for all tables. In Oracle I had a nasty long query that looked at user_lobs and user_segments to give back an answer.
I assume in Postgres there's something I can use in the information_schema tables, but I'm not seeing where.
Try the Database Object Size Functions. An example:
SELECT pg_size_pretty(pg_total_relation_size('"<schema>"."<table>"'));
For all tables, something along the lines of:
SELECT
table_schema || '.' || table_name AS table_full_name,
pg_size_pretty(pg_total_relation_size('"' || table_schema || '"."' || table_name || '"')) AS size
FROM information_schema.tables
ORDER BY
pg_total_relation_size('"' || table_schema || '"."' || table_name || '"') DESC;
Edit: Here's the query submitted by #phord, for convenience:
SELECT
table_name,
pg_size_pretty(table_size) AS table_size,
pg_size_pretty(indexes_size) AS indexes_size,
pg_size_pretty(total_size) AS total_size
FROM (
SELECT
table_name,
pg_table_size(table_name) AS table_size,
pg_indexes_size(table_name) AS indexes_size,
pg_total_relation_size(table_name) AS total_size
FROM (
SELECT ('"' || table_schema || '"."' || table_name || '"') AS table_name
FROM information_schema.tables
) AS all_tables
ORDER BY total_size DESC
) AS pretty_sizes;
I've modified it slightly to use pg_table_size() to include metadata and make the sizes add up.
Show database sizes:
\l+
e.g.
=> \l+
berbatik_prd_commerce | berbatik_prd | UTF8 | en_US.UTF-8 | en_US.UTF-8 | | 19 MB | pg_default |
berbatik_stg_commerce | berbatik_stg | UTF8 | en_US.UTF-8 | en_US.UTF-8 | | 8633 kB | pg_default |
bursasajadah_prd | bursasajadah_prd | UTF8 | en_US.UTF-8 | en_US.UTF-8 | | 1122 MB | pg_default |
Show table sizes:
\d+
e.g.
=> \d+
public | tuneeca_prd | table | tomcat | 8192 bytes |
public | tuneeca_stg | table | tomcat | 1464 kB |
Only works in psql.
(Summary of #zkutch's answer.)
If the database name is snort, the following sentence give it size:
psql -c "\l+ snort" | awk -F "|" '{print $7}'
Try this : (Index size/usage statistics)
SELECT
t.tablename,
indexname,
c.reltuples AS num_rows,
pg_size_pretty(pg_relation_size(quote_ident(t.tablename)::text)) AS table_size,
pg_size_pretty(pg_relation_size(quote_ident(indexrelname)::text)) AS index_size,
CASE WHEN indisunique THEN 'Y'
ELSE 'N'
END AS UNIQUE,
idx_scan AS number_of_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched
FROM pg_tables t
LEFT OUTER JOIN pg_class c ON t.tablename=c.relname
LEFT OUTER JOIN
( SELECT c.relname AS ctablename, ipg.relname AS indexname, x.indnatts AS number_of_columns, idx_scan, idx_tup_read, idx_tup_fetch, indexrelname, indisunique FROM pg_index x
JOIN pg_class c ON c.oid = x.indrelid
JOIN pg_class ipg ON ipg.oid = x.indexrelid
JOIN pg_stat_all_indexes psai ON x.indexrelid = psai.indexrelid )
AS foo
ON t.tablename = foo.ctablename
WHERE t.schemaname='public'
ORDER BY 1,2;
PostgreSQL tables have three components: the table itself, any indexes on it, and potentially TOAST data. There's a couple of examples showing how to slide and dice the available information various ways at http://wiki.postgresql.org/wiki/Disk_Usage
Just for info, I have got the excelent answer from #aib and modified it a little for:
getting only tables from "public" schema
show also materialized views data and index size
On materialized view we can use index for refreshing materialized views concurrently, which allows using them while updating.
Well, my query will be the following:
SELECT
table_name,
pg_size_pretty(table_size) AS table_size,
pg_size_pretty(indexes_size) AS indexes_size,
pg_size_pretty(total_size) AS total_size
FROM (
SELECT
table_name,
pg_table_size(table_name) AS table_size,
pg_indexes_size(table_name) AS indexes_size,
pg_total_relation_size(table_name) AS total_size
FROM (
-- tables from 'public'
SELECT table_name
FROM information_schema.tables
where table_schema = 'public' and table_type = 'BASE TABLE'
union
-- materialized views
SELECT oid::regclass::text as table_name
FROM pg_class
WHERE relkind = 'm'
order by table_name
) AS all_tables
-- ORDER BY total_size DESC
order by table_name
) AS pretty_sizes
check this wiki. https://wiki.postgresql.org/wiki/Disk_Usage
SELECT *, pg_size_pretty(total_bytes) AS total
, pg_size_pretty(index_bytes) AS INDEX
, pg_size_pretty(toast_bytes) AS toast
, pg_size_pretty(table_bytes) AS TABLE
FROM (
SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
, c.reltuples AS row_estimate
, pg_total_relation_size(c.oid) AS total_bytes
, pg_indexes_size(c.oid) AS index_bytes
, pg_total_relation_size(reltoastrelid) AS toast_bytes
FROM pg_class c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE relkind = 'r'
) a
) a
The Query below will serve you
SELECT nspname || '.' || relname AS "relation",
pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size"
FROM pg_class C
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE nspname NOT IN ('pg_catalog', 'information_schema')
AND C.relkind <> 'i'
AND nspname !~ '^pg_toast'
ORDER BY pg_total_relation_size(C.oid) DESC
LIMIT 20;
See this Link: https://wiki.postgresql.org/wiki/Disk_Usage
Try this script to find all table size:
SELECT
table_schema || '.' || table_name AS TableName,
pg_size_pretty(pg_total_relation_size('"' || table_schema || '"."' || table_name || '"')) AS TableSize
FROM information_schema.tables
ORDER BY
pg_total_relation_size('"' || table_schema || '"."' || table_name || '"') DESC
For other different script to find size in PostgreSQL, Please visit this url:
http://www.dbrnd.com/2015/05/how-to-find-size-of-database-and-table-in-postgresql/

How to get full length of DDL for a table or any object in redshift / postgresql

Team,
I am working on redshift ( 8.0.2 ). I would like to have DDL command in place for any object type ( table / view...) in redshift.
I have below one. but it is not giving the full text.
select s.userid,u.usename,s.starttime, s.type, rtrim(s.text) from svl_statementtext s, pg_user u
where u.usesysid = s.userid
and s.type = 'DDL'
and s.text like '%table11%'
order by s.starttime asc;
userid | usename | starttime | type | text
--------+----------------------------------------------------------------------------------------------------------------------------------+----------------------------+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
143 | user11 | 2014-04-16 23:42:06.227296 | DDL | CREATE TABLE table11 ( log_time date, user_name text, database_name text, process_id integer, connection_from text, session_id text, session_line_num bigint, command_tag text, session_start_time
143 | user11 | 2014-04-16 23:42:06.234987 | DDL | CREATE TABLE table11 ( log_time date, user_name text, database_name text, process_id integer, connection_from text, session_id text, session_line_num bigint, command_tag text, session_start_time
(2 rows)
in Oracle we have DDL_METADATA.GET_DDL pkg. it will give the full text. I would like to have the same. I tried with STL_DDLTEXT. text is much trimmed.
select xid, starttime, sequence, substring(text,1,40) as text
from stl_ddltext where userid = 100 and text like '%table11%' order by xid desc, sequence;
xid | starttime | sequence | text
--------+----------------------------+----------+------------------------------------------
135475 | 2014-04-16 23:42:06.234987 | 0 | CREATE TABLE table11 ( log_time dat
135475 | 2014-04-16 23:42:06.227296 | 0 | CREATE TABLE table11 ( log_time dat
(2 rows)
I have few more doubts on the first query output. the column lenght of "usename" is too high. how to trim that. If i query pg_user, it is trimmed internally. IN oracle we can have for e.g.
" col <col_name> for a80 "
second doubt: i am getting 2 rows. actually i created only one table. Any reason for 2 rows in the output ?
for e.g. in physical postgre db, if we want to generate any ddl for one function,
we can use below.
in the below, function name is "add"
SELECT pg_catalog.pg_get_functiondef('add'::regproc);
like this, do we have any pkg in Redshift for table/views ?
Thanks
For DDL:
First create the admin view here: https://github.com/awslabs/amazon-redshift-utils/blob/master/src/AdminViews/v_generate_tbl_ddl.sql
Next write a query like this:
select ddl
from admin.v_generate_tbl_ddl
where schemaname = 'some_schema' and tablename='some_table'
order by seq asc
I have not found a single function in Redshift that provides this functionality. You can get the full definition of views by using the pg_get_viewdef function:
SELECT 'create view '|| nc.nspname::information_schema.sql_identifier ||'.'|| c.relname::information_schema.sql_identifier ||' as '||
pg_get_viewdef(c.oid)::information_schema.character_data AS view_definition
FROM pg_namespace nc, pg_class c, pg_user u
WHERE c.relnamespace = nc.oid AND u.usesysid = c.relowner AND c.relkind = 'v'::"char"
AND nc.nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema');
For table definitions I've put together a query, but it still needs a little work to fill in some details as noted in the commented lines:
select tm.schemaname||'.'||tm.tablename, 'create table '||tm.schemaname||'.'||tm.tablename
||' ('
||cp.coldef
-- primary key
-- diststyle
-- dist key
||d.distkey
--sort key
|| (select
' sortkey(' ||substr(array_to_string(
array( select ','||cast(column_name as varchar(100)) as str from
(select column_name from information_schema.columns col where col.table_schema= tm.schemaname and col.table_name=tm.tablename) c2
join
(-- gives sort cols
select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute pa where
pa.attnum > 0 AND NOT pa.attisdropped AND pa.attsortkeyord > 0
) st on tm.tableid=st.tableid and c2.column_name=st.colname order by sort_col_order
)
,'')
,2,10000) || ')'
)
||';'
from
-- t master table list
(
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace
AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
AND c.relname <> 'temp_staging_tables_1'
-- and c.relname in ('f_recipient_registration','ht_base_document','ht_folder','ht_logical_file','ht_transaction_addresses','ht_ysi_batch','ht_ysi_batch_messages','ht_ysi_files')
) tm
-- cp creates the col params for the create string
join
(select
substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)) as tableid
,substr(replace(replace(str,'ZZZ',''),'QQQ'||substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)),''),2,10000) as coldef
from
( select array_to_string(array(
SELECT 'QQQ'||cast(t.tableid as varchar(10))||'ZZZ'|| ','||column_name||' '|| decode(udt_name,'bpchar','char',udt_name) || decode(character_maximum_length,null,'', '('||cast(character_maximum_length as varchar(9))||')' )
-- default
|| decode(substr(column_default,2,8),'identity','',null,'',' default '||column_default||' ')
-- nullable
|| decode(is_nullable,'YES',' NULL ','NO',' NOT NULL ')
-- identity
|| decode(substr(column_default,2,8),'identity',' identity('||substr(column_default,(charindex('''',column_default)+1), (length(column_default)-charindex('''',reverse(column_default))-charindex('''',column_default) ) ) ||') ', '') as str
from
-- ci all the col info
(
select cast(t.tableid as int), cast(table_schema as varchar(100)), cast(table_name as varchar(100)), cast(column_name as varchar(100)),
cast(ordinal_position as int), cast(column_default as varchar(100)), cast(is_nullable as varchar(20)) , cast(udt_name as varchar(50)) ,cast(character_maximum_length as int),
sort_col_order , decode(d.colname,null,0,1) dist_key
from (select * from information_schema.columns c where c.table_schema= t.schemaname and c.table_name=t.tablename) c
left join
(-- gives sort cols
select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute a where
a.attnum > 0 AND NOT a.attisdropped AND a.attsortkeyord > 0
) s on t.tableid=s.tableid and c.column_name=s.colname
left join
-- gives dist col
(select attrelid as tableid, attname as colname from pg_attribute a where
a.attnum > 0 AND NOT a.attisdropped AND a.attisdistkey = 't'
) d on t.tableid=d.tableid and c.column_name=d.colname
order by ordinal_position
) ci
-- for the working array funct
), '') as str
from
(-- need tableid
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace
AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
) t
-- for the agg functions that dont exist
-- ) group by table_schema, table_name
)) cp on tm.tableid=cp.tableid
-- add in primary key query here
-- dist key
left join
( select
-- close off the col defs after the primary key
')' ||
' distkey('|| cast(column_name as varchar(100)) ||')' as distkey, t.tableid
from information_schema.columns c
join
(-- need tableid
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace
AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
) t on c.table_schema= t.schemaname and c.table_name=t.tablename
join
-- gives dist col
(select attrelid as tableid, attname as colname from pg_attribute a where
a.attnum > 0 AND NOT a.attisdropped AND a.attisdistkey = 't'
) d on t.tableid=d.tableid and c.column_name=d.colname
) d on tm.tableid=d.tableid