Convert Postgres Aggregate Function to Inline Array - postgresql

I've been asked how to do a standard deviation on a variable list of values within a row. For example:
select
name, x, y, z, stddev (x, y, z)
from foo;
or
select
order_no, a, b, c, d, e, f, stddev (a, b, c, d, e, f)
from foo;
So essentially just like min => least and max => greatest, I'd like a similar way to turn the aggregate stddev into a "normal" function.
I have been able to create a custom function to calculate standard deviation based on the standard formula, but I can't help but prefer to use the built-in function, if possible. I tried this:
CREATE OR REPLACE FUNCTION std_deviation(variadic inputs numeric[])
RETURNS numeric AS
$BODY$
DECLARE
result numeric;
BEGIN
select stddev (unnest (inputs))
into result;
return result;
end
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
And it complains:
ERROR: set-valued function called in context that cannot accept a set
There is no shortage of traffic on this error message, but I can't quite figure out how to apply the fix to my simple function.
Or, is there a better way to do this from the beginning?

Set-returning functions (SRF) -- such as unnest -- in the SELECT clause is a PostgreSQL specific extension of the SQL standard. And usually doesn't worth to use it (because it's not what it looks like). Also, SRFs cannot be used within aggregate functions.
Use these SRF functions in the FROM clause instead (and use sub-selects where needed):
SELECT name, x, y, z, (SELECT stddev(v) FROM unnest(ARRAY[x, y, z]) v)
FROM foo
If you really want to write a function for that, use the SQL language (it's more clear & PostgreSQL can optimize their use):
CREATE OR REPLACE FUNCTION std_deviation(variadic inputs numeric[])
RETURNS numeric AS
$BODY$
SELECT stddev(v) FROM unnest(inputs) v
$BODY$
LANGUAGE SQL IMMUTABLE;

This seems to do the trick.
CREATE OR REPLACE FUNCTION public.std_deviation(VARIADIC inputs numeric[])
RETURNS numeric AS
$BODY$
DECLARE
result numeric;
BEGIN
with foo as (
select unnest (inputs) as bar
)
select stddev (bar)
into result
from foo;
return result;
end
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;

It turns out pgnumerics already has a function for this.
-- test=# select pgnumerics.stdev('{1345,1301,1368,1322,1310,1370,1318,1350,1303,1299}');
-- stdev
-- ------------------
-- 27.4639157198435
-- (1 row)
CREATE OR REPLACE FUNCTION pgnumerics.stdev (
X double precision []
) RETURNS double precision
AS $$
DECLARE
s double precision;
N integer;
i integer;
xx double precision;
sx double precision;
BEGIN
N := array_upper(X,1) - array_lower(X,1) + 1;
xx:= 0.0;
sx:= 0.0;
for i in 1..N loop
xx:= xx + X[i]*X[i];
sx:= sx + X[i];
end loop;
s := sqrt((N*xx - sx*sx) / (N*(N-1.0)));
return s;
END;
$$ LANGUAGE 'plpgsql';
http://pgnumerics.projects.pgfoundry.org/

Related

Postgresql howto dynamically change parameter value

Is it possible to dynamically set param of numeric() ? eg:
CREATE OR REPLACE somefunction() RETURNS numeric AS
DECLARE
f numeric;
x integer;
BEGIN
x := 2;
SELECT INTO f CAST(something AS numeric(12, x));
RETURN f;
END;
So, I don't need to use CASE inside my plpgsql function if possible :) Tried everything, but it does not work, cast expects constant. Thanks:)
Using Dynamic query:
CREATE OR REPLACE FUNCTION public.somefunction(something numeric)
RETURNS numeric
LANGUAGE plpgsql
AS $function$
DECLARE
f numeric;
x integer;
BEGIN
x := 2;
EXECUTE 'SELECT CAST($1 AS numeric(12, ' || x ||'))' INTO f USING something;
RETURN f;
END;
$function$
;
select somefunction(126.787);
somefunction
--------------
126.79
Alternate where you pass in the scale:
CREATE OR REPLACE FUNCTION public.somefunction(something numeric, scale integer)
RETURNS numeric
LANGUAGE plpgsql
AS $function$
DECLARE
f numeric;
BEGIN
EXECUTE 'SELECT CAST($1 AS numeric(12, ' || scale ||'))' INTO f USING something;
RETURN f;
END;
$function$
;
select somefunction(126.787,2);
somefunction
--------------
126.79
(1 row)
select somefunction(126.787,1);
somefunction
--------------
126.8

Postgresql, define box variable in function

I am trying to define a box variable in a PostgreSQL function. Here is an arbitrary function that I've tried and it does not work:
CREATE OR REPLACE FUNCTION "public"."find_centroid"("argbase_x" int8, "argbase_y" int8, "arg_length" int)
RETURNS TABLE("cent_x" float8, "cent_y" float8) AS $BODY$
DECLARE
edge_x int8;
edge_y int8;
bounds text;
BEGIN
edge_x := "argbase_x" + 5;
edge_y := "argbase_y" + 5;
RETURN QUERY
SELECT sum(location[0])/count("location") as cent_x, sum(location[1])/count("location") as cent_y
from points
WHERE "location" <# box'(("argbase_x","argbase_y"),("argbase_x"+"arg_length","argbase_y"+"arg_length"))';
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
The issue is with how to build a box from arguments box'(("argbase_x","argbase_y"),("argbase_x"+"arg_length","argbase_y"+"arg_length"))'
I've tried various methods so far, like string concatenation for example. But nothing is working and I can't seem to find any resources online that offers an example on how to declare or define a geometric variable in a function.
After several trials and errors. This worked for me:
CREATE OR REPLACE FUNCTION "public"."find_centroid"("argbase_x" int8, "argbase_y" int8, "arg_length" int)
RETURNS TABLE("cent_x" float8, "cent_y" float8) AS $BODY$
DECLARE
edge_x int8;
edge_y int8;
bounds text;
BEGIN
edge_x := "argbase_x" + 5;
edge_y := "argbase_y" + 5;
bounds := CONCAT('((',"argbase_x"::text,',',"argbase_y"::text,'),
RETURN QUERY
SELECT sum(location[0])/count("location") as cent_x, sum(location[1])/count("location") as cent_y
from points
WHERE "location" <# box(bounds);
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
you don't have to define any special "box variable", bounding box is done simply as x BETWEEN min_x AND max_x AND y BETWEEN min_y AND max_y
if I understood the logic of the function correctly it would be
WHERE location[0] BETWEEN "argbase_x" AND "argbase_x"+"arg_length"
AND location[1] BETWEEN "argbase_y" AND "argbase_y"+"arg_length"
for more complex shapes there are no built-in operators in Postgres. You'll have to use WKT notation to construct your shapes and PostGIS functions to convert WKT to geometry type here and identify containment or intersection.

How to pass multiple rows to PostgreSQL function?

How can we pass an array of (an unlimited amount of) rows (ie, a constant table) as the parameter/argument of a PostgreSQL function?
Here's an idea:
CREATE TYPE foo AS (
x bigint,
y smallint,
z varchar(64)
);
CREATE OR REPLACE FUNCTION bar(bigint, foo[]) RETURNS TABLE(a bigint, x bigint, y smallint, z varchar(64)) AS
$$
SELECT $1, x, y, z FROM unnest($2);
$$
LANGUAGE SQL;
The below function call works, but is there a way to make it shorter?
SELECT * FROM bar(1, ARRAY[(1,2,'body1'),(2,1,'body2')]::foo[]);
For example, we can't remove the ::foo[] cast, but is there a way to rewrite things so that we can omit it?
Should we be using a variatic argument?
My Google searches kept leading me here, so I'm going to post an answer that may not match exactly the needs of the OP, but might be helpful to others who see the title How to pass multiple rows to PostgreSQL function?
The OPs original request was for a type:
CREATE TYPE foo AS (
x bigint,
y smallint,
z varchar(64)
);
If you are like me, you may want to pass in the results of a standard SELECT query to a function. So imagine I have a table (rather than a type) created as:
CREATE TABLE foo AS (
x bigint,
y smallint,
z varchar(64)
);
I want to pass to a function the results of:
SELECT * from foo WHERE x = 12345;
The results may be zero or many rows.
According to the postgres docs at https://www.postgresql.org/docs/9.5/static/rowtypes.html creating a table also leads to the creation of a composite type with the same name. Which is helpful, since this automatically handles the CREATE TYPE foo in the original question, which I can now pass in to a function as an array.
Now I can create a function that accepts an array of foo typed values (simplified to focus on what is passed in, and how the records are used, rather than what is returned):
CREATE OR REPLACE FUNCTION bar(someint bigint, foos foo[]) RETURNS ...
LANGUAGE plpgsql
AS $$
DECLARE
foo_record record;
begin
-- We are going to loop through each composite type value in the array
-- The elements of the composite value are referenced just like
-- the columns in the original table row
FOREACH foo_record IN ARRAY foos LOOP
-- do something, maybe like:
INSERT INTO new_foo (
x, y, z
)
VALUES (
foo_record.x,
foo_record.y,
foo_record.z
);
END LOOP;
RETURN...
END;
$$;
This function bar(bigint, foo[]) can then be called quite simply with:
SELECT bar(4126521, ARRAY(SELECT * from foo WHERE x = 12345));
which passes in all the rows of a query on the foo table as a foo typed array. The function as we have seen then performs some action against each of those rows.
Although the example is contrived, and perhaps not exactly what the OP was asking, it fits the title of the question and might save others from having to search more to find what they need.
EDIT naming the function arguments makes things easier
PostgreSQL doesn't have table-valued variables (yet), so nothing's going to be pretty. Passing arrays is inefficient but will work for reasonable-sized inputs.
For bigger inputs, what often works is to pass a refcursor. It's clumsy, but can be practical for larger data sets, sometimes combined with temp tables.
e.g.
CREATE OR REPLACE FUNCTION bar(i bigint, c refcursor) RETURNS TABLE(a bigint, x bigint, y smallint, z varchar(64)) AS
$$
DECLARE
cursrow foo;
BEGIN
LOOP
FETCH NEXT FROM c INTO cursrow;
a := i;
x := cursrow.x;
y := cursrow.y;
z := cursrow.z;
RETURN NEXT;
IF NOT FOUND THEN
EXIT;
END IF;
END LOOP;
RETURN;
END;
$$;
usage:
demo=> BEGIN;
BEGIN
demo=> DECLARE "curs1" CURSOR FOR VALUES (1,2,'body1'), (2,1,'body2');
DECLARE CURSOR
craig=> SELECT bar(1, 'curs1');
bar
---------------
(1,1,2,body1)
(1,2,1,body2)
(1,,,)
(3 rows)
demo=> COMMIT;
COMMIT
Not beautiful. But then, plpgsql never is. It's a pity it doesn't have row-valued lvalues, as being able to write something like (x, y, z) := cursrow or ROW(x, y, z) := cursrow would make it a bit less ugly.
RETURN NEXT works, but only if you return record not named out parameters or TABLE.
And sadly, you can't use SQL (not plpgsql) FETCH ALL in a subexpression so you cannot write
RETURN QUERY NEXT i, cursrow.* FROM (FETCH ALL FROM c) AS cursrow;
It seems that one of the problems is the using of smallint type which can not be converted implicitly from an int constants. And consider the following:
-- drop function if exists bar(bigint, variadic foo[]);
-- drop type if exists foo;
CREATE TYPE foo AS (
x bigint,
y int, -- change type to integer
z varchar(64)
);
CREATE OR REPLACE FUNCTION bar(bigint, variadic foo[]) RETURNS TABLE(
a bigint,
x bigint,
y int, -- and here
z varchar(64)) AS
$$
SELECT $1, x, y, z FROM unnest($2);
$$
LANGUAGE SQL;
-- Voila! It is even simpler then the using of the ARRAY constructor
SELECT * FROM bar(1, (1,2,'body1'), (2,1,'body2'), (3,4,'taddy bear'));
dbfiddle
About variadic parameters

Understanding difference between int literal vs int parameter in PL/pgSQL function

I have a function to left pad bit stings in PostgreSQL 9.5:
CREATE OR REPLACE FUNCTION lpad_bits(val bit varying)
RETURNS bit varying as
$BODY$
BEGIN return val::bit(32) >> (32-length(val));
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE;
which works fine:
# select lpad_bits(b'1001100111000');
lpad_bits
----------------------------------
00000000000000000001001100111000
(1 row)
My problem is when I try to add a parameter to change the amount of padding:
CREATE OR REPLACE FUNCTION lpad_bits(val bit varying, sz integer default 1024)
RETURNS bit varying as
$BODY$
BEGIN return val::bit(sz) >> (sz-length(val));
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE;
The function is now broken:
# select lpad_bits(b'1001100111000', 32);
ERROR: invalid input syntax for integer: "sz"
LINE 1: SELECT val::bit(sz) >> (sz-length(val))
^
QUERY: SELECT val::bit(sz) >> (sz-length(val))
CONTEXT: PL/pgSQL function lpad_bits(bit varying,integer) line 2 at RETURN
I have stared at the bitstring documentation and PL/pgSQL function documentation, am simply not seeing what is fundamentally different between these two implementations.
Why?
PL/pgSQL executes SQL queries like prepared statements. The manual about parameter substituion:
Prepared statements can take parameters: values that are substituted
into the statement when it is executed.
Note the term values. Only actual values can be parameterized, but not key words, identifiers or type names. 32 in bit(32) looks like a value, but the modifier of a data type is only a "value" internally and can't be parameterized. SQL demands to know data types at planning stage, it cannot wait for the execution stage.
You could achieve your goal with dynamic SQL and EXECUTE. As proof of concept:
CREATE OR REPLACE FUNCTION lpad_bits(val varbit, sz int = 32, OUT outval varbit) AS
$func$
BEGIN
EXECUTE format('SELECT $1::bit(%s) >> $2', sz) -- literal
USING val, sz - length(val) -- values
INTO outval;
END
$func$ LANGUAGE plpgsql IMMUTABLE;
Call:
SELECT lpad_bits(b'1001100111000', 32);
Note the distinction between sz being used as literal to build the statement and its second occurrence where it's used as value, that can be passed as parameter.
Faster alternatives
A superior solution for this particular task is to just use lpad() like #Abelisto suggested:
CREATE OR REPLACE FUNCTION lpad_bits2(val varbit, sz int = 32)
RETURNS varbit AS
$func$
SELECT lpad(val::text, sz, '0')::varbit;
$func$ LANGUAGE sql IMMUTABLE;
(Simpler as plain SQL function, which also allows function inlining in the context of outer queries.)
Several times faster than the above function. A minor flaw: we have to cast to text and back to varbit. Unfortunately, lpad() is not currently implemented for varbit. The manual:
The following SQL-standard functions work on bit strings as well as
character strings: length, bit_length, octet_length, position, substring, overlay.
overlay() is available, we can have a cheaper function:
CREATE OR REPLACE FUNCTION lpad_bits3(val varbit, base varbit = '00000000000000000000000000000000')
RETURNS varbit AS
$func$
SELECT overlay(base PLACING val FROM bit_length(base) - bit_length(val))
$func$ LANGUAGE sql IMMUTABLE;
Faster if you can work with varbit values to begin with. (The advantage is (partly) voided, if you have to cast text to varbit anyway.)
Call:
SELECT lpad_bits3(b'1001100111000', '00000000000000000000000000000000');
SELECT lpad_bits3(b'1001100111000', repeat('0', 32)::varbit);
We might overlaod the function with a variant taking an integer to generate base itself:
CREATE OR REPLACE FUNCTION lpad_bits3(val varbit, sz int = 32)
RETURNS varbit AS
$func$
SELECT overlay(repeat('0', sz)::varbit PLACING val FROM sz - bit_length(val))
$func$ LANGUAGE sql IMMUTABLE;
Call:
SELECT lpad_bits3(b'1001100111000', 32;
Related:
Postgresql Convert bit varying to integer
Convert hex in text representation to decimal number
The parser does not allow a variable at that place. The alternative is to use a constant and trim it:
select right((val::bit(128) >> (128 -length(val)))::text, sz)::bit(sz)
from (values (b'1001100111000', 32)) s(val,sz)
;
right
----------------------------------
00000000000000000001001100111000
Or the lpad function as suggested in the comments.

How to access plpgsql composite type array components

Let's say I've created a composite type in Postgresql:
CREATE TYPE custom_type AS
(x integer
y integer);
I need to use it in a function as an array:
...
DECLARE
customVar custom_type[];
BEGIN
....
My question is: how do I access custom_type's specific components?
For example, I want to (re)assign 'x' for the third element in custom_type array...
postgres=> create type pt as (x int, y int);
CREATE TYPE
postgres=> create or replace function fx()
returns void as $$
declare a pt[] = ARRAY[(10,20),(30,40)]; declare xx pt;
begin
for i in array_lower(a, 1) .. array_upper(a,1)
loop
xx = a[i]; xx.x := xx.x + 1; a[i] := xx; raise notice '%', a[i].x;
end loop;
end;
$$ language plpgsql;
CREATE FUNCTION
postgres=> select fx();
NOTICE: 11
NOTICE: 31
fx
────
(1 row)
Significant limit for target of assign statement is possibility to refer only one level nested properties. This limit can be bypassed by auxiliary variables - it is not too friendly - and internal implementation is too simple, but it is fast and enough for typical stored procedure usage although it is not strong in comparison with generic programming languages.
Given:
SELECT ARRAY[(1,2),(3,4)]::custom_type[];
Use an array subscript and then refer to the field by name.
regress=> SELECT (ARRAY[(1,2),(3,4)]::custom_type[])[1].x;
x
---
1
(1 row)