PostgreSQL backend process high memory usage issue - postgresql

We are evaluating using PostgreSQL to implement a multitenant database,
Currently we are running some tests on single-database-multiple-schema model
(basically, all tenants have the same set of database objects under then own schema within the same database).
The application will maintain a connection pool that will be shared among all tenants/schemas.
e.g. If the database has 500 tenants/schemas and each tenants has 200 tables/views,
the total number of tables/views will be 500 * 200 = 100,000.
Since the connection pool will be used by all tenants, eventually each connection will hit all the tables/views.
In our tests, when the connection hits more views, we found the memory usage of the backend process increases quite fast and most of them are private memory.
Those memory will be hold until the connection is closed.
We have a test case that one backend process uses more the 30GB memory and eventually get an out of memory error.
To help understand the issue, I wrote code to create a simplified test cases
- MTDB_destroy: used to clear tenant schemas
- MTDB_Initialize: used to create a multitenant DB
- MTDB_RunTests: simplified test case, basically select from all tenant views one by one.
The tests I've done was on PostgreSQL 9.0.3 on CentOS 5.4
To make sure I have a clean environment, I re-created database cluster and leave majority configurations as default,
(the only thing I HAVE to change is to increase "max_locks_per_transaction" since MTDB_destroy needs to drop many objects.)
This is what I do to reproduce the issue:
create a new database
create the three functions using the code attached
connect to the new created db and run the initialize scripts
-- Initialize
select MTDB_Initialize('tenant', 100, 100, true);
-- not sure if vacuum analyze is useful here, I just run it
vacuum analyze;
-- check the tables/views created
select table_schema, table_type, count(*) from information_schema.tables where table_schema like 'tenant%' group by table_schema, table_type order by table_schema, table_type;
open another connection to the new created db and run the test scripts
-- get backend process id for current connection
SELECT pg_backend_pid();
-- open a linux console and run ps -p and watch VIRT, RES and SHR
-- run tests
select MTDB_RunTests('tenant', 1);
Observations:
when the connection for running tests was first created,
VIRT = 182MB, RES = 6240K, SHR=4648K
after run the tests once, (took 175 seconds)
VIRT = 1661MB RES = 1.5GB SHR = 55MB
re-run the test again (took 167 seconds)
VIRT = 1661MB RES = 1.5GB SHR = 55MB
re-run the test again (took 165 seconds)
VIRT = 1661MB RES = 1.5GB SHR = 55MB
as we scale up the number of tables, the memory usages go up in the tests too.
Can anyone help explain what's happening here?
Is there a way we can control memory usage of PostgreSQL backend process?
Thanks.
Samuel
-- MTDB_destroy
create or replace function MTDB_destroy (schemaNamePrefix varchar(100))
returns int as $$
declare
curs1 cursor(prefix varchar) is select schema_name from information_schema.schemata where schema_name like prefix || '%';
schemaName varchar(100);
count integer;
begin
count := 0;
open curs1(schemaNamePrefix);
loop
fetch curs1 into schemaName;
if not found then exit; end if;
count := count + 1;
execute 'drop schema ' || schemaName || ' cascade;';
end loop;
close curs1;
return count;
end $$ language plpgsql;
-- MTDB_Initialize
create or replace function MTDB_Initialize (schemaNamePrefix varchar(100), numberOfSchemas integer, numberOfTablesPerSchema integer, createViewForEachTable boolean)
returns integer as $$
declare
currentSchemaId integer;
currentTableId integer;
currentSchemaName varchar(100);
currentTableName varchar(100);
currentViewName varchar(100);
count integer;
begin
-- clear
perform MTDB_Destroy(schemaNamePrefix);
count := 0;
currentSchemaId := 1;
loop
currentSchemaName := schemaNamePrefix || ltrim(currentSchemaId::varchar(10));
execute 'create schema ' || currentSchemaName;
currentTableId := 1;
loop
currentTableName := currentSchemaName || '.' || 'table' || ltrim(currentTableId::varchar(10));
execute 'create table ' || currentTableName || ' (f1 integer, f2 integer, f3 varchar(100), f4 varchar(100), f5 varchar(100), f6 varchar(100), f7 boolean, f8 boolean, f9 integer, f10 integer)';
if (createViewForEachTable = true) then
currentViewName := currentSchemaName || '.' || 'view' || ltrim(currentTableId::varchar(10));
execute 'create view ' || currentViewName || ' as ' ||
'select t1.* from ' || currentTableName || ' t1 ' ||
' inner join ' || currentTableName || ' t2 on (t1.f1 = t2.f1) ' ||
' inner join ' || currentTableName || ' t3 on (t2.f2 = t3.f2) ' ||
' inner join ' || currentTableName || ' t4 on (t3.f3 = t4.f3) ' ||
' inner join ' || currentTableName || ' t5 on (t4.f4 = t5.f4) ' ||
' inner join ' || currentTableName || ' t6 on (t5.f5 = t6.f5) ' ||
' inner join ' || currentTableName || ' t7 on (t6.f6 = t7.f6) ' ||
' inner join ' || currentTableName || ' t8 on (t7.f7 = t8.f7) ' ||
' inner join ' || currentTableName || ' t9 on (t8.f8 = t9.f8) ' ||
' inner join ' || currentTableName || ' t10 on (t9.f9 = t10.f9) ';
end if;
currentTableId := currentTableId + 1;
count := count + 1;
if (currentTableId > numberOfTablesPerSchema) then exit; end if;
end loop;
currentSchemaId := currentSchemaId + 1;
if (currentSchemaId > numberOfSchemas) then exit; end if;
end loop;
return count;
END $$ language plpgsql;
-- MTDB_RunTests
create or replace function MTDB_RunTests(schemaNamePrefix varchar(100), rounds integer)
returns integer as $$
declare
curs1 cursor(prefix varchar) is select table_schema || '.' || table_name from information_schema.tables where table_schema like prefix || '%' and table_type = 'VIEW';
currentViewName varchar(100);
count integer;
begin
count := 0;
loop
rounds := rounds - 1;
if (rounds < 0) then exit; end if;
open curs1(schemaNamePrefix);
loop
fetch curs1 into currentViewName;
if not found then exit; end if;
execute 'select * from ' || currentViewName;
count := count + 1;
end loop;
close curs1;
end loop;
return count;
end $$ language plpgsql;

Are these connections idle in transaction or just idle? Sounds like unfinished transactions are holding onto memory, or maybe you've got a memory leak or something.

For people who see this thread when searching around (as i did), I found what appeared to be the same problem in a different context. Idle processes slowly consuming more and more memory until the OOM killer takes them out (causing periodic DB crashes).
We traced the problem back to really long running PHP scripts which kept one connection open for a long time. We were able to get the memory under control by periodically closing the connection and re-connecting.
From what i've read postgres does a lot of caching so if you have one session hitting a lot of different tables/queries this cache data can continue to grow and grow.
-Ken

Related

Update Null columns to Zero dynamically in Redshift

Here is the code in SAS, It finds the numeric columns with blank and replace with 0's
DATA dummy_table;
SET dummy_table;
ARRAY DUMMY _NUMERIC_;
DO OVER DUMMY;
IF DUMMY=. THEN DUMMY=0;
END;
RUN;
I am trying to replicate this in Redshift, here is what I tried
create or replace procedure sp_replace_null_to_zero(IN tbl_nm varchar) as $$
Begin
Execute 'declare ' ||
'tot_cnt int := (select count(*) from information_schema.columns where table_name = ' || tbl_nm || ');' ||
'init_loop int := 0; ' ||
'cn_nm varchar; '
Begin
While init_loop <= tot_cnt
Loop
Raise info 'init_loop = %', Init_loop;
Raise info 'tot_cnt = %', tot_cnt;
Execute 'Select column_name into cn_nm from information_schema.columns ' ||
'where table_name ='|| tbl_nm || ' and ordinal_position = init_loop ' ||
'and data_type not in (''character varying'',''date'',''text''); '
Raise info 'cn_nm = %', cn_nm;
if cn_nm is not null then
Execute 'Update ' || tbl_nm ||
'Set ' || cn_nm = 0 ||
'Where ' || cn_nm is null or cn_nm =' ';
end if;
init_loop = init_loop + 1;
end loop;
End;
End;
$$ language plpgsql;
Issues I am facing
When I pass the Input parameter here, I am getting 0 count
tot_cnt int := (select count(*) from information_schema.columns where table_name = ' || tbl_nm || ');'
For testing purpose I tried hardcode the table name inside proc, I am getting the error amazon invalid operation: value for domain information_schema.cardinal_number violates check constraint "cardinal_number_domain_check"
Is this even possible in redshift, How can I do this logic or any other workaround.
Need Expertise advise here!!
You can simply run an UPDATE over the table(s) using the NVL(cn_nm,0) function
UPDATE tbl_raw
SET col2 = NVL(col2,0);
However UPDATE is a fairly expensive operation. Consider just using a view over your table that wraps the columns in NVL(cn_nm,0)
CREATE VIEW tbl_clean
AS
SELECT col1
, NVL(col2,0) col2
FROM tbl_raw;

PL/SQL Error Happening

Select * from cat;
Attempting this
You double-fetch your cursor:
Your problem
[...]
OPEN UPDATETRIGGER;
FOR UT IN UPDATETRIGGER -- This performs fetching into UT
LOOP
FETCH UPDATETRIGGER -- Here You Fetch again..
[...]
What your loop should look like
CREATE OR REPLACE TRIGGER Display_Update_Message
BEFORE UPDATE
ON JOBS
FOR EACH ROW
WHEN ( (old.IsFilled != new.IsFilled) AND (new.isFilled = 'yes'))
DECLARE
CURSOR UPDATETRIGGER
IS
SELECT J.JobID JobID,
J.JobName JobName,
J.StopDate StopDate,
JS.LastName LastName,
JS.FirstName FirstName,
JS.Email Email
FROM JOBS J
FULL OUTER JOIN JOBAPPLICATIONS JA ON J.JobID = JA.JobID
FULL OUTER JOIN JOBSEEKERS JS ON JA.JSID = JS.JSID
WHERE J.JobID = :new.JobID;
JobID NUMBER (3);
JobName CHAR (30);
LastName CHAR (15);
FirstName CHAR (15);
Email CHAR (30);
StopDate DATE;
BEGIN
DBMS_OUTPUT.PUT_LINE (
'Seekers affected by closing job ' || JobID || ': ' || :new.JobName);
OPEN UPDATETRIGGER;
LOOP -- infinite loop
FETCH UPDATETRIGGER
INTO JobID,
JobName,
LastName,
FirstName,
Email,
StopDate;
EXIT WHEN UPDATETRIGGER%NOTFOUND; -- loop-breaker
:new.StopDate := SYSDATE;
DBMS_OUTPUT.PUT_LINE (
'--' || UT.LastName || ', ' || UT.FirstName || ' ' || UT.Email);
END LOOP;
CLOSE UPDATETRIGGER;
END;
You do not need to include the JOB table in the query. You have the information you require in the :NEW namespace. Not including the JOB table in the query will stop the ORA-04091 error.
Also you have two sets of cursor control statements. Choose one. I prefer the implicit cursor syntax because it is less typing (and marginally more efficient).
CREATE OR REPLACE TRIGGER Display_Update_Message
BEFORE UPDATE ON JOBS
FOR EACH ROW WHEN ((old.IsFilled != new.IsFilled) AND (new.isFilled = 'yes'))
BEGIN
DBMS_OUTPUT.PUT_LINE('Seekers affected by closing job '
|| :new.JobID || ': ' || :new.JobName);
FOR UT IN (SELECT JS.LastName
, JS.FirstName
, JS.Email Email
FROM JOBAPPLICATIONS JA
FULL OUTER JOIN JOBSEEKERS JS ON JA.JSID = JS.JSID
WHERE JA.JobID = :new.JobID )
LOOP
DBMS_OUTPUT.PUT_LINE('--' || UT.LastName || ', ' || UT.FirstName || ' ' || UT.Email);
END LOOP;
:new.StopDate := sysdate;
END;
/
Incidentally, I'm not sure why you have FULL OUTER JOIN in your cursor. I would have thought INNER JOIN was the correct solution. Surely you only want Job Seekers who have applied for the job you're closing? However, I have left that in because I don't know your business rules, and anyway I've changed enough of your code already :)

Postgresql bulk collect

Im trying to migrate oracle app to postgresql.
In one of the oracle`s functions i have the next code :
V_Step := 3;
command := 'declare
type tab_data is table of ' || tab_name ||
'%ROWTYPE;
CURSOR raw_data is SELECT * FROM ' || tab_name ||
'_vw;
mydata tab_data;
V_COUNTER integer := 0;
BEGIN
open raw_data;
LOOP
V_COUNTER := V_COUNTER + 1;
FETCH raw_data BULK COLLECT INTO mydata LIMIT ' ||
to_char(current_setting('Gaps.BATCH_SIZE')::bigint) || ';
FORALL i IN 1..mydata.COUNT
INSERT INTO ' || tab_name ||
' VALUES mydata(i);
EXIT WHEN raw_data%NOTFOUND;
END LOOP;
RAISE NOTICE ''V_COUNTER = '%', V_COUNTER;
--commit;
close raw_data;
END;';
V_Step := 4;
RAISE NOTICE '%', command;
V_Step := 5;
EXECUTE command;
We are trying to load into a local postgresql table huge amount of data from remote oracle table (I have link). In oracle using bulk make it faster than regular insert into x select * from. Is there any function in postgresql that is similar to oracle`s bulk ?

How can I measure the amount of space taken by blobs on a Firebird 2.1 database?

I have a production database, using Firebird 2.1, where I need to find out how much space is used by each table, including the blobs. The blob-part is the tricky one, because it is not covered using the standard statistical report.
I do not have easy access to the server's desktop, so installing UDFs etc. is not a good solution.
How can I do this easily?
You can count total size of all BLOB fields in a database with following statement:
EXECUTE BLOCK RETURNS (BLOB_SIZE BIGINT)
AS
DECLARE VARIABLE RN CHAR(31) CHARACTER SET UNICODE_FSS;
DECLARE VARIABLE FN CHAR(31) CHARACTER SET UNICODE_FSS;
DECLARE VARIABLE S BIGINT;
BEGIN
BLOB_SIZE = 0;
FOR
SELECT r.rdb$relation_name, r.rdb$field_name
FROM rdb$relation_fields r JOIN rdb$fields f
ON r.rdb$field_source = f.rdb$field_name
WHERE f.rdb$field_type = 261
INTO :RN, :FN
DO BEGIN
EXECUTE STATEMENT
'SELECT SUM(OCTET_LENGTH(' || :FN || ')) FROM ' || :RN ||
' WHERE NOT ' || :FN || ' IS NULL'
INTO :S;
BLOB_SIZE = :BLOB_SIZE + COALESCE(:S, 0);
END
SUSPEND;
END
I modified the code example of Andrej to show the size of each blob field, not only the sum of all blobs.
And used SET TERM so you can copy&paste this snippet directly to tools like FlameRobin.
SET TERM #;
EXECUTE BLOCK
RETURNS (BLOB_SIZE BIGINT, TABLENAME CHAR(31), FIELDNAME CHAR(31) )
AS
DECLARE VARIABLE RN CHAR(31) CHARACTER SET UNICODE_FSS;
DECLARE VARIABLE FN CHAR(31) CHARACTER SET UNICODE_FSS;
DECLARE VARIABLE S BIGINT;
BEGIN
BLOB_SIZE = 0;
FOR
SELECT r.rdb$relation_name, r.rdb$field_name
FROM rdb$relation_fields r JOIN rdb$fields f
ON r.rdb$field_source = f.rdb$field_name
WHERE f.rdb$field_type = 261
INTO :RN, :FN
DO BEGIN
EXECUTE STATEMENT
'SELECT SUM(OCTET_LENGTH(' || :FN || ')) AS BLOB_SIZE, ''' || :RN || ''', ''' || :FN || '''
FROM ' || :RN ||
' WHERE NOT ' || :FN || ' IS NULL'
INTO :BLOB_SIZE, :TABLENAME, :FIELDNAME;
SUSPEND;
END
END
#
SET TERM ;#
This example doesn't work with ORDER BY, maybe a more elegant solution without EXECUTE BLOCK exists.

How do I delete the data from all my tables in ORACLE 10g

I have an ORACLE schema containing hundreds of tables. I would like to delete the data from all the tables (but don't want to DROP the tables).
Is there an easy way to do this or do I have to write an SQL script that retrieves all the table names and runs the TRUNCATE command on each ?
I would like to delete the data using commands in an SQL-Plus session.
If you have any referential integrity constraints (foreign keys) then truncate won't work; you cannot truncate the parent table if any child tables exist, even if the children are empty.
The following PL/SQL should (it's untested, but I've run similar code in the past) iterate over the tables, disabling all the foreign keys, truncating them, then re-enabling all the foreign keys. If a table in another schema has an RI constraint against your table, this script will fail.
set serveroutput on size unlimited
declare
l_sql varchar2(2000);
l_debug number := 1; -- will output results if non-zero
-- will execute sql if 0
l_drop_user varchar2(30) := '' -- set the user whose tables you're dropping
begin
for i in (select table_name, constraint_name from dba_constraints
where owner = l_drop_user
and constraint_type = 'R'
and status = 'ENABLED')
loop
l_sql := 'alter table ' || l_drop_user || '.' || i.table_name ||
' disable constraint ' || i.constraint_name;
if l_debug = 0 then
execute immediate l_sql;
else
dbms_output.put_line(l_sql);
end if;
end loop;
for i in (select table_name from dba_tables
where owner = l_drop_user
minus
select view_name from dba_views
where owner = l_drop_user)
loop
l_sql := 'truncate table ' || l_drop_user || '.' || i.table_name ;
if l_debug = 0 then
execute immediate l_sql;
else
dbms_output.put_line(l_sql);
end if;
end loop;
for i in (select table_name, constraint_name from dba_constraints
where owner = l_drop_user
and constraint_type = 'R'
and status = 'DISABLED')
loop
l_sql := 'alter table ' || l_drop_user || '.' || i.table_name ||
' enable constraint ' || i.constraint_name;
if l_debug = 0 then
execute immediate l_sql;
else
dbms_output.put_line(l_sql);
end if;
end loop;
end;
/
Probably the easiest way is to export the schema without data, then drop an re-import it.
I was looking at this too.
Seems like you do need to go through all the table names.
Have you seen this? Seems to do the trick.
I had to do this recently and wrote a stored procedure which you can run via: exec sp_truncate;. Most of the code is based off this: answer on disabling constraints
CREATE OR REPLACE PROCEDURE sp_truncate AS
BEGIN
-- Disable all constraints
FOR c IN
(SELECT c.owner, c.table_name, c.constraint_name
FROM user_constraints c, user_tables t
WHERE c.table_name = t.table_name
AND c.status = 'ENABLED'
ORDER BY c.constraint_type DESC)
LOOP
DBMS_UTILITY.EXEC_DDL_STATEMENT('ALTER TABLE ' || c.owner || '.' || c.table_name || ' disable constraint ' || c.constraint_name);
DBMS_OUTPUT.PUT_LINE('Disabled constraints for table ' || c.table_name);
END LOOP;
-- Truncate data in all tables
FOR i IN (SELECT table_name FROM user_tables)
LOOP
EXECUTE IMMEDIATE 'TRUNCATE TABLE ' || i.table_name;
DBMS_OUTPUT.PUT_LINE('Truncated table ' || i.table_name);
END LOOP;
-- Enable all constraints
FOR c IN
(SELECT c.owner, c.table_name, c.constraint_name
FROM user_constraints c, user_tables t
WHERE c.table_name = t.table_name
AND c.status = 'DISABLED'
ORDER BY c.constraint_type)
LOOP
DBMS_UTILITY.EXEC_DDL_STATEMENT('ALTER TABLE ' || c.owner || '.' || c.table_name || ' enable constraint ' || c.constraint_name);
DBMS_OUTPUT.PUT_LINE('Enabled constraints for table ' || c.table_name);
END LOOP;
COMMIT;
END sp_truncate;
/
Putting the details from the OTN Discussion Forums: truncating multiple tables with single query thread into one SQL script gives the following which can be run in an SQL-Plus session:
SET SERVEROUTPUT ON
BEGIN
-- Disable constraints
DBMS_OUTPUT.PUT_LINE ('Disabling constraints');
FOR reg IN (SELECT uc.table_name, uc.constraint_name FROM user_constraints uc) LOOP
EXECUTE IMMEDIATE 'ALTER TABLE ' || reg.table_name || ' ' || 'DISABLE' ||
' CONSTRAINT ' || reg.constraint_name || ' CASCADE';
END LOOP;
-- Truncate tables
DBMS_OUTPUT.PUT_LINE ('Truncating tables');
FOR reg IN (SELECT table_name FROM user_tables) LOOP
EXECUTE IMMEDIATE 'TRUNCATE TABLE ' || reg.table_name;
END LOOP;
-- Enable constraints
DBMS_OUTPUT.PUT_LINE ('Enabling constraints');
FOR reg IN (SELECT uc.table_name, uc.constraint_name FROM user_constraints uc) LOOP
EXECUTE IMMEDIATE 'ALTER TABLE ' || reg.table_name || ' ' || 'ENABLE' ||
' CONSTRAINT ' || reg.constraint_name;
END LOOP;
END;
/