How does multi column hash partition work in postgresql - postgresql

create or replace function part_hashint4_noop(value int4, seed int8)
returns int8 as $$
select value + seed;
$$ language sql immutable;
create operator class part_test_int4_ops
for type int4
using hash as
operator 1 =,
function 2 part_hashint4_noop(int4, int8);
create or replace function part_hashtext_length(value text, seed int8)
RETURNS int8 AS $$
select length(coalesce(value, ''))::int8
$$ language sql immutable;
create operator class part_test_text_ops
for type text
using hash as
operator 1 =,
function 2 part_hashtext_length(text, int8);
begin;
create table hp(a int,b text, c int)
partition by hash(a part_test_int4_ops, b part_test_text_ops);
create table hp0 partition of hp for values with (modulus 4, remainder 0);
create table hp1 partition of hp for values with (modulus 4, remainder 1);
create table hp2 partition of hp for values with (modulus 4, remainder 2);
create table hp3 partition of hp for values with (modulus 4, remainder 3);
commit;
Then trying to understand partition pruning of following query plan.
explain (costs off) select * from hp where a = 1 and b = 'xxx';
QUERY PLAN
-------------------------------------------
Seq Scan on hp0 hp
Filter: ((a = 1) AND (b = 'xxx'::text))
(2 rows)
explain (costs off) select * from hp where a = 2 and b = 'xxx';
QUERY PLAN
-------------------------------------------
Seq Scan on hp3 hp
Filter: ((a = 2) AND (b = 'xxx'::text))
(2 rows)
function part_hashtext_length is to get the string length. null value will return 0.
Only found this related post
code Source

Related

CTE with bidirectional entries

I want to walk a directed graph. I need all involved nodes.
I want to support both cases (result should be always 1,2,3):
INSERT INTO foo VALUES (1,2),(2,3);
INSERT INTO foo VALUES (1,2),(3,2);
WITH RECURSIVE traverse(id, path, cycle) AS (
SELECT a, ARRAY[a], false
FROM foo WHERE a = 1
UNION ALL
SELECT GREATEST(a,b), traverse.path || GREATEST(a,b), GREATEST(a,b) = ANY(traverse.path)
FROM traverse
INNER JOIN foo
ON LEAST(a,b) = traverse.id
WHERE NOT cycle
)
SELECT * FROM traverse
Table foo can have up to 50 Mio records. Index is on both column (not multicolumn index). It doesnt work very "fast" - without GREATEST and LEAST its very fast. Any other solutions?
Update: An iterative solution is not that bad after analyzing requirements again:
There are 54 Mio edges and 21 Mio nodes in the db - there are distinct graphs in the db each connected graph has 3 to 100 nodes
it worked well with the question "give me all related nodes" --> 20msec (graph depth = 13)
DROP TABLE IF EXISTS edges, nodes;
CREATE TABLE nodes
(
id INTEGER PRIMARY KEY
);
CREATE TABLE edges
(
"from" INTEGER NOT NULL REFERENCES nodes(id),
"to" INTEGER NOT NULL REFERENCES nodes(id)
);
INSERT INTO nodes SELECT generate_series(1,5);
INSERT INTO edges VALUES
-- circle
(1,2),(2,3),(3,1),
-- other direction
(4,3);
CREATE OR REPLACE FUNCTION walk_graph(param_node_id INTEGER)
RETURNS TABLE (id INTEGER)
LANGUAGE plpgsql
AS $$
DECLARE
var_node_ids INTEGER[] := ARRAY[param_node_id];
var_iteration_node_ids INTEGER[] := ARRAY[param_node_id];
BEGIN
WHILE array_length(var_iteration_node_ids, 1) > 0 LOOP
var_iteration_node_ids := ARRAY(SELECT DISTINCT "to" FROM edges
WHERE "from" = ANY(var_iteration_node_ids)
AND NOT("to" = ANY(var_node_ids))
UNION
SELECT DISTINCT "from" FROM edges
WHERE "to" = ANY(var_iteration_node_ids)
AND NOT("from" = ANY(var_node_ids)));
var_node_ids := var_node_ids || var_iteration_node_ids;
END LOOP;
RETURN QUERY SELECT unnest(var_node_ids);
END $$;
SELECT * FROM walk_graph(2);

Postgresql data validation checks based on table values

I'm trying to create a database that tracks electrical cables. Each cable contains 1 or more cores that are connected to terminals at each end. The number of cores in each cable is defined in a table.
| number_of_cores | cable_id
|----------2-------|---1-----|
The core table is as follows
cable_no | from_id | core_mark | to_id
1001 | 1 | 1 Black | 2
1001 | 2 | 1 White | 4
I want to create a check that will prevent another 1001 cable core from being inserted.
Is this possible in postgresql?
Ideally, if I tried to insert another 1001 cable with another unique core number, the error would be something like "all cores used on cable 1001"
Thanks,
I think what you need is something like a check constraint. (https://www.postgresql.org/docs/current/ddl-constraints.html)
Follow those steps :
1. Create some table properly
create table cable (cable_id int primary key, number_of_cores int);
create table core (core_id int primary key, cable_id int references cable (cable_id), from_id int, core_mark varchar (50), to_id int);
2. Create the function that will verify the inserts
create or replace function test_max_core_number(in_cable_id int)
returns boolean
language plpgsql
as $function$
declare
res boolean := false;
begin
if exists (
select *
from cable
where cable_id = in_cable_id
and number_of_cores > (select count(*) from core where cable_id = in_cable_id )
)
then
res := true;
end if;
return res;
end;
$function$;
3. Add the constraint to your table
alter table core
add constraint cstr_check check (test_max_core_number(cable_id));
4. Now it is time for some testing :)
insert into cable (cable_id, number_of_cores) values (1, 2), (2, 3);
insert into core (core_id, cable_id, from_id, core_mark, to_id)
values
(1, 1, 1, '1 Black', 2)
,(2, 1, 2, '1 White', 4);
Normally all goes fine for now.
5. And now the wanted error !
insert into core (core_id, cable_id, from_id, core_mark, to_id)
values
(3, 1, 3, '1 Green', 2);
Hope this helps !
I think #Jaisus gave a good answer.
I would add only a cross-check into the cable to prevent to set bad values into number_of_cores:
create or replace function test_cable_number_of_cores(in_cable_id int,in_number_of_cores int)
returns boolean
language plpgsql
as $function$
declare
res boolean := false;
begin
res := (in_number_of_cores>0 and (select count(cable_id) from core where cable_id=in_cable_id) <= in_number_of_cores);
return res;
end;
$function$;
alter table cable add check(test_cable_number_of_cores(cable_id, number_of_cores));
-- ok
insert into cable(cable_id, number_of_cores) values (3, 2);
update cable set number_of_cores=3 where cable_id=3;
-- error
update cable set number_of_cores=1 where cable_id=1;

Select that splits rows with range into several rows with smaller ranges?

I have two tables that contain categorized tsrange values. The ranges in each table are non-overlapping per category, but the ranges in b might overlap those in a.
create table a ( id serial primary key, category int, period tsrange );
create table b ( id serial primary key, category int, period tsrange );
What I would like to do is combine these two tables into a CTE for another query. The combined values needs to be the tsranges from table a subtracted by any overlapping tsranges in table b with the same category.
The complication is that in the case where an overlapping b.period is contained inside an a.period, the result of the subtraction is two rows. The Postgres Range - operator does not support this, so I create a function that will return 1 or 2 rows:
create function subtract_tsrange( a tsrange , b tsrange )
returns table (period tsrange)
language 'plpgsql' as $$
begin
if a #> b and not isempty(b) and lower(a) <> lower(b) and upper(b) <> upper(a)
then
period := tsrange(lower(a), lower(b), '[)');
return next;
period := tsrange(upper(b), upper(a), '[)');
return next;
else
period := a - b;
return next;
end if;
return;
end
$$;
There can also be several b.periods overlapping an a.period, so one row from a might be potentially be split into a lot of rows with shorter periods.
Now I want to create a select that takes each row in a and returns:
The original a.period if there is no overlapping b.period with the same category
or
1 or several rows representing the original a.period minus all overlapping b.periods with the same category.
After reading lots of other posts I figure I should use SELECT LATERAL in combination with my function somehow, but I'm still scratching my head as to how?? (We're talking Postgres 9.6 btw!)
Notes: your problem can easily be generalized to every range types, therefore I will use the anyrange pseudo type in my answer, but you don't have to. In fact because of this I had to create a generic constructor for range types, because PostgreSQL have not defined it (yet):
create or replace function to_range(t anyrange, l anyelement, u anyelement, s text default '[)', out to_range anyrange)
language plpgsql as $func$
begin
execute format('select %I($1, $2, $3)', pg_typeof(t)) into to_range using l, u, s;
end
$func$;
Of course, you can use the appropriate range constructor instead of to_range() calls.
Also, I will use the numrange type for testing purposes, as it can be created and checked more easily than the tsrange type, but my answer should work with that as well.
Answer:
I rewrote your function to handle any type of bounds (inclusive, exclusive and even unbounded ranges). Also, it will return an empty result set when a <# b.
create or replace function range_div(a anyrange, b anyrange)
returns setof anyrange
language sql as $func$
select * from unnest(case
when b is null or a <# b then '{}'
when a #> b then array[
to_range(a, case when lower_inf(a) then null else lower(a) end,
case when lower_inf(b) then null else lower(b) end,
case when lower_inc(a) then '[' else '(' end ||
case when lower_inc(b) then ')' else ']' end),
to_range(a, case when upper_inf(b) then null else upper(b) end,
case when upper_inf(a) then null else upper(a) end,
case when upper_inc(b) then '(' else '[' end ||
case when upper_inc(a) then ']' else ')' end)
]
else array[a - b]
end)
$func$;
With this in mind, what you need is somewhat an inverse of aggregation. F.ex. with sum() one can start with an empty value (0) and constantly add some value to that. But you have your initial value and you need to constantly remove some parts of it.
One solution to that is to use recursive CTEs:
with recursive r as (
select *
from a
union
select r.id, r.category, d
from r
left join b using (category)
cross join range_div(r.period, b.period) d -- this is in fact an implicit lateral join
where r.period && b.period
)
select r.*
from r
left join b on r.category = b.category and r.period && b.period
where not isempty(r.period) and b.period is null
My sample data:
create table a (id serial primary key, category int, period numrange);
create table b (id serial primary key, category int, period numrange);
insert into a (category, period) values (1, '[1,4]'), (1, '[2,5]'), (1, '[3,6]'), (2, '(1,6)');
insert into b (category, period) values (1, '[2,3)'), (1, '[1,2]'), (2, '[3,3]');
The query above produces:
id | category | period
3 | 1 | [3,6]
1 | 1 | [3,4]
2 | 1 | [3,5]
4 | 2 | (1,3)
4 | 2 | (3,6)

Can PostgreSQL join to a stored procedure?

In SQL Server and Oracle, joining to (or selecting from) stored procedures that return a resultset is not supported.
Is it possible in PostgreSQL?
If so, a followup question is: is it possible, perhaps via foreign data wrappers, to join to a stored procedure within an MS SQL Server database??
PostgreSQL can join to a stored procedure (function). Tables and rows to get us started.
create schema test;
create table test.test_a (
id integer primary key
);
insert into test.test_a values
(1), (2);
create table test.test_b (
id integer references test.test_a,
n integer not null,
primary key (id, n)
);
insert into test.test_b values
(1, 1), (1, 2), (1, 3), (2, 1), (2, 2);
A simple function.
create or replace function test.return_test_b ()
returns table (
id integer,
n integer
) as
$$
select * from test.test_b
$$ language sql;
Now you can join directly.
select A.id, B.n from test.test_a A
inner join test.return_test_b() B on A.id = B.id;
id n
--
1 1
1 2
1 3
2 1
2 2

How to Auto Increment Alpha-Numeric value in postgresql?

I am using "PostgreSQL 9.3.5"
I have a Table(StackOverflowTable) with columns (SoId,SoName,SoDob).
I want a Sequence generator for column SoId which is a Alpha-numeric value.
I want to auto increment a Alpha-Numeric Value in postgresql.
For eg : SO10001, SO10002, SO10003.....SO99999.
Edit:
If tomorrow i need to generate a Sequence which can be as SO1000E100, SO1000E101,... and which has a good performance. Then what is the best solution!
Use sequences and default value for id:
postgres=# CREATE SEQUENCE xxx;
CREATE SEQUENCE
postgres=# SELECT setval('xxx', 10000);
setval
--------
10000
(1 row)
postgres=# CREATE TABLE foo(id text PRIMARY KEY
CHECK (id ~ '^SO[0-9]+$' )
DEFAULT 'SO' || nextval('xxx'),
b integer);
CREATE TABLE
postgres=# insert into foo(b) values(10);
INSERT 0 1
postgres=# insert into foo(b) values(20);
INSERT 0 1
postgres=# SELECT * FROM foo;
id | b
---------+----
SO10001 | 10
SO10002 | 20
(2 rows)
You can define default value of your column as a concatenation of S and a normal sequence as bellow:
CREATE SEQUENCE sequence_for_alpha_numeric
INCREMENT 1
MINVALUE 1
MAXVALUE 9223372036854775807
START 1
CACHE 1;
CREATE TABLE table1
(
alpha_num_auto_increment_col character varying NOT NULL,
sample_data_col character varying,
CONSTRAINT table1_pkey PRIMARY KEY (alpha_num_auto_increment_col)
)
;
ALTER TABLE table1 ALTER COLUMN alpha_num_auto_increment_col SET DEFAULT TO_CHAR(nextval('sequence_for_alpha_numeric'::regclass),'"S"fm000000');
Test:
^
insert into table1 (sample_data_col) values ('test1');
insert into table1 (sample_data_col) values ('test2');
insert into table1 (sample_data_col) values ('test3');
select * from table1;
alpha_num_auto_increment_col | sample_data_col
------------------------------+-----------------
S000001 | test1
S000002 | test2
S000003 | test3
(3 lignes)
How to use sequences
How to use to_char function.
Create A sequence like below
CREATE SEQUENCE seq_autoid
INCREMENT 1
MINVALUE 1
MAXVALUE 9223372036854775807
START 10000
Create A Function to generate alpha numeric id
create or replace function auto_id () returns varchar as $$
select 'SO'||nextval('seq_autoid')
$$ language sql
and try this example table
create table AAA(id text ,namez text)
insert into AAA values (auto_id(),'MyName')
insert into AAA values (auto_id(),'MyName1')
insert into AAA values (auto_id(),'MyName2')