bytea in postgres storing and retrieving bytes - postgresql

I am trying to understand how to work with binary data in postgresql (v 8.3).
Let's say I have a following table
Table "public.message"
Column | Type | Modifiers
---------+---------+-----------
id | integer |
message | bytea |
I would like to store a packet in the message field in this format:
version (1 byte), identifier (1 byte), epoch (4 bytes)
I would like to pack this data into the message field. Lets say I have version=1, identifier=8 and epoch=123456. How would I pack this data into the message field? How would I convert my integer values to hex.. or octal?
I also need to get the message back and parse it. I was looking at the get_byte function, unless there is another way to parse the data out..
Thanks!

Here is some sample code showing how to do it with server-side Perl. Annoyingly, pack/unpack are considered untrusted operations by PG so this has to be created with plperlu by a superuser and then access granted with GRANT EXECUTE to non superusers.
On the other hand, this choice of language makes it easy to deal with more complex packed structures, which is a significant advantage over code that would be based on the SQL get_bytes()/set_bytes() functions. See Perl's pack() features.
1) first step: define a SQL composite type representing an non-packed record.
create type comp as (a smallint, b smallint, c int);
2) make a function to pack the record value into bytea:
create function pack_comp(comp) returns bytea
as $body$
my $arg=shift;
my $retval = pack("CCL", $arg->{a},$arg->{b},$arg->{c});
# encode bytea according to PG doc. For PG>=9.0, use encode_bytea() instead
$retval =~ s!(\\|[^ -~])!sprintf("\\%03o",ord($1))!ge; # from PG doc
return $retval;
$body$ language plperlu;
3) make a function to unpack bytea into the composite type:
create or replace function unpack_comp(bytea) returns comp
as $body$
my $arg=shift;
# decode bytea according to PG doc. For PG>=9.0, use decode_bytea() instead
$arg =~ s!\\(?:\\|(\d{3}))!$1 ? chr(oct($1)) : "\\"!ge;
my ($v,$i,$e)= unpack("CCL", $arg);
return {"a"=>$v, "b"=>$i, "c"=>$e};
$body$ language plperlu;
4) usage:
# select encode(pack_comp((254,14,1000000)::comp), 'hex');
encode
--------------
fe0e40420f00
# select unpack_comp(decode('fe0e40420f00','hex'));
unpack_comp
------------------
(254,14,1000000)
# select * from unpack_comp(decode('fe0e40420f00','hex'));
a | b | c
-----+----+---------
254 | 14 | 1000000

So I was able to figure out how to do it in plpg
Here's the code to pack
CREATE FUNCTION pack_numeric_bytes(i_values NUMERIC[], i_byte_sizes NUMERIC[], i_big_endian BOOLEAN)
RETURNS BYTEA
DECLARE
v_bytes BYTEA := NULL;
v_start INTEGER := 1;
v_byte BYTEA;
v_byte_size INTEGER;
v_value NUMERIC;
v_binary_value TEXT;
v_num NUMERIC;
i INTEGER;
x INTEGER;
v_sql TEXT;
BEGIN
IF array_upper(i_values, 1) != array_upper(i_byte_sizes, 1) THEN
RETURN v_bytes;
END IF;
FOR x IN array_lower(i_values, 1) .. array_upper(i_values, 1) LOOP
/* Given value and size at x position */
v_byte_size := i_byte_sizes[x]::INTEGER;
v_value := i_values[x];
/* Convert number to binary form */
v_sql := $$SELECT $$|| v_value ||$$::bit($$|| v_byte_size*8 ||$$);$$;
EXECUTE v_sql INTO v_binary_value;
IF i_big_endian IS TRUE THEN
/* Convert each byte at a time */
FOR i IN 1 .. v_byte_size LOOP
/* Extract byte from our binary value.
Big endian starts at 1 and then increments of 8 */
v_byte := substring(v_binary_value, v_start, 8);
/* Convert binary 8 bits to an integer */
v_sql := $$SELECT B$$||quote_literal(v_byte)||$$::int8$$;
EXECUTE v_sql INTO v_num;
/* Build bytea of bytes */
v_bytes := COALESCE(v_bytes, '') || set_byte(E' '::BYTEA, 0, v_num::INTEGER);
v_start := v_start + 8;
END LOOP;
ELSE
/* Small endian is extracted starting from last byte */
v_start := (v_byte_size * 8) + 1;
/* Convert each byte at a time */
FOR i IN 1 .. v_byte_size LOOP
v_start := v_start - 8;
v_byte := substring(v_binary_value, v_start, 8);
/* Convert binary 8 bits to an integer */
v_sql := $$SELECT B$$||quote_literal(v_byte)||$$::int8$$;
EXECUTE v_sql INTO v_num;
/* Build bytea of bytes */
v_bytes := COALESCE(v_bytes, '') || set_byte(E' '::BYTEA, 0, v_num::INTEGER);
END LOOP;
END IF; /* END endian check */
v_start := 1;
END LOOP;
RETURN v_bytes;
END;
And here's the code to unpack:
CREATE OR REPLACE FUNCTION public.unpack_numeric_bytes(i_bytes bytea, i_byte_sizes INTEGER[], i_big_endian BOOLEAN)
RETURNS NUMERIC[]
SECURITY DEFINER AS
DECLARE
v_bytes BYTEA;
v_start INTEGER := 1;
v_byte_index INTEGER := 0;
v_bit_shift INTEGER := 0;
v_length INTEGER;
v_size INTEGER;
v_sum_byte_sizes INTEGER;
v_vals NUMERIC[] := '{}';
v_val BIGINT := 0;
i INTEGER;
x INTEGER;
v_sql TEXT;
BEGIN
v_sql := $$SELECT $$|| array_to_string(i_byte_sizes, '+')||$$;$$;
EXECUTE v_sql INTO v_sum_byte_sizes;
IF length(i_bytes) != v_sum_byte_sizes::INTEGER THEN
RETURN v_vals;
END IF;
/* Loop through values of bytea (split by their sizes) */
FOR x IN array_lower(i_byte_sizes, 1) .. array_upper(i_byte_sizes, 1) LOOP
v_size := i_byte_sizes[x];
v_bytes := substring(i_bytes, v_start, v_size);
v_length := length(v_bytes);
IF i_big_endian IS TRUE THEN
v_byte_index := v_length - 1;
FOR i IN 1..v_length LOOP
v_val := v_val + (get_byte(v_bytes, v_byte_index) << v_bit_shift);
v_bit_shift := v_bit_shift + 8;
v_byte_index := v_byte_index - 1;
END LOOP;
ELSE
FOR i IN 1..v_length LOOP
v_val := v_val + (get_byte(v_bytes, v_byte_index) << v_bit_shift);
v_bit_shift := v_bit_shift + 8;
v_byte_index := v_byte_index + 1;
END LOOP;
END IF;
v_vals := array_append(v_vals, v_val::NUMERIC);
/* Calculate next value start index */
v_start := v_start + v_size;
v_byte_index := 0;
v_bit_shift := 0;
v_val := 0;
END LOOP;
RETURN v_vals;
END;
I hope this will help someone.

Related

Conversion of NCLOB to Unicode/HEX/BASE64

My Oracle 12c Database characterset is WE8ISO8859P1
I have a table with column as NCLOB , and want to extract the data 3 different encoding
BASE64
HEX
UNICODE
I tried writing some function , the output is correct , but why new line is present in the encoded string.
Please find the details of the function as below
WITH
FUNCTION EncodeBASE64(InClearChar IN CLOB) RETURN CLOB IS
dest_lob BLOB;
lang_context INTEGER := DBMS_LOB.DEFAULT_LANG_CTX;
dest_offset INTEGER := 1;
src_offset INTEGER := 1;
read_offset INTEGER := 1;
warning INTEGER;
ClobLen INTEGER := DBMS_LOB.GETLENGTH(InClearChar);
amount INTEGER := 1440; -- must be a whole multiple of 3
-- size of a whole multiple of 48 is beneficial to get NEW_LINE after each 64 characters
buffer RAW(1440);
res CLOB := EMPTY_CLOB();
BEGIN
IF InClearChar IS NULL OR NVL(ClobLen, 0) = 0 THEN
RETURN NULL;
ELSIF ClobLen <= 24000 THEN
RETURN UTL_RAW.CAST_TO_VARCHAR2(UTL_ENCODE.BASE64_ENCODE(UTL_RAW.CAST_TO_RAW(InClearChar)));
END IF;
-- UTL_ENCODE.BASE64_ENCODE is limited to 32k/(3/4), process in chunks if bigger
DBMS_LOB.CREATETEMPORARY(dest_lob, TRUE);
DBMS_LOB.CONVERTTOBLOB(dest_lob, InClearChar, DBMS_LOB.LOBMAXSIZE, dest_offset, src_offset,
DBMS_LOB.DEFAULT_CSID, lang_context, warning);
LOOP
EXIT WHEN read_offset >= dest_offset;
DBMS_LOB.READ(dest_lob, amount, read_offset, buffer);
res := res || UTL_RAW.CAST_TO_VARCHAR2(UTL_ENCODE.BASE64_ENCODE(buffer));
read_offset := read_offset + amount;
END LOOP;
DBMS_LOB.FREETEMPORARY(dest_lob);
RETURN res;
END EncodeBASE64;
FUNCTION castToHex(InClearChar IN NCLOB) RETURN CLOB IS
dest_lob BLOB;
lang_context INTEGER := DBMS_LOB.DEFAULT_LANG_CTX;
dest_offset INTEGER := 1;
src_offset INTEGER := 1;
read_offset INTEGER := 1;
warning INTEGER;
ClobLen INTEGER := DBMS_LOB.GETLENGTH(InClearChar);
amount INTEGER := 1440; -- must be a whole multiple of 3
-- size of a whole multiple of 48 is beneficial to get NEW_LINE after each 64 characters
buffer RAW(1440);
res CLOB := EMPTY_CLOB();
BEGIN
IF InClearChar IS NULL OR NVL(ClobLen, 0) = 0 THEN
RETURN NULL;
ELSIF ClobLen <= 24000 THEN
RETURN RAWTOHEX(UTL_RAW.CAST_TO_RAW(InClearChar));
END IF;
-- UTL_ENCODE.BASE64_ENCODE is limited to 32k/(3/4), process in chunks if bigger
DBMS_LOB.CREATETEMPORARY(dest_lob, TRUE);
DBMS_LOB.CONVERTTOBLOB(dest_lob, InClearChar, DBMS_LOB.LOBMAXSIZE, dest_offset, src_offset,
DBMS_LOB.DEFAULT_CSID, lang_context, warning);
LOOP
EXIT WHEN read_offset >= dest_offset;
DBMS_LOB.READ(dest_lob, amount, read_offset, buffer);
res := res || RAWTOHEX(buffer);
read_offset := read_offset + amount;
END LOOP;
DBMS_LOB.FREETEMPORARY(dest_lob);
RETURN res;
END castToHex;
SELECT EncodeBASE64('uživaÄ Ê Í Õ Ø A B C D Etele, kteří temp in the town A B C D Etele, kteří') as encodedBASE64
from dual;
OUTPUT
encodedBASE64 -> dcW+aXZhw4Qgw4ogw40gw5Ugw5ggQSBCIEMgRCBFdGVsZSwga3RlxZnDrSB0ZW1w
IGluIHRoZSB0b3duICBBIEIgQyBEIEV0ZWxlLCBrdGXFmcOt
Question 1 - When I use this encodedBASE64 online to convert to String, it is returning the text correctly , but why new line is present in the encoded string , even If I remove the new line output is not changed
Question 2 - How to convert this string to unicode format ,please help

PBKDF2 function in PostgreSQL

How can the PBKDF2 function be done in PostgreSQL? There does not appear to be a native implementation.
Moved Answer out of Question to adhere to Stack Overflow guidelines. See original revision of post.
Original post (Revision link)
Not able to find it natively, and based on PHP code found on the 'net, I came up with this PBKDF2 function for PostgreSQL. Enjoy.
create or replace function PBKDF2
(salt bytea, pw text, count integer, desired_length integer, algorithm text)
returns bytea
immutable
language plpgsql
as $$
declare
hash_length integer;
block_count integer;
output bytea;
the_last bytea;
xorsum bytea;
i_as_int32 bytea;
i integer;
j integer;
k integer;
begin
algorithm := lower(algorithm);
case algorithm
when 'md5' then
hash_length := 16;
when 'sha1' then
hash_length = 20;
when 'sha256' then
hash_length = 32;
when 'sha512' then
hash_length = 64;
else
raise exception 'Unknown algorithm "%"', algorithm;
end case;
block_count := ceil(desired_length::real / hash_length::real);
for i in 1 .. block_count loop
i_as_int32 := E'\\000\\000\\000'::bytea || chr(i)::bytea;
i_as_int32 := substring(i_as_int32, length(i_as_int32) - 3);
the_last := salt::bytea || i_as_int32;
xorsum := HMAC(the_last, pw::bytea, algorithm);
the_last := xorsum;
for j in 2 .. count loop
the_last := HMAC(the_last, pw::bytea, algorithm);
--
-- xor the two
--
for k in 1 .. length(xorsum) loop
xorsum := set_byte(xorsum, k - 1, get_byte(xorsum, k - 1) # get_byte(the_last, k - 1));
end loop;
end loop;
if output is null then
output := xorsum;
else
output := output || xorsum;
end if;
end loop;
return substring(output from 1 for desired_length);
end $$;
I've tested against other implementations without deviation, but be sure to test it yourself.

Syntax error at or near ","

I have problems with this function and couldn't figure out how to fix it.
Create Function Quy(sdate timestamp)
returns integer as $$
declare
numbmonth integer;
quy integer;
Begin
numbmonth := Date_part('month',sdate);
If numbmonth < 4 then
quy := 1;
else if numbmonth < 7 then
quy := 2;
else if numbmonth < 10 then
quy := 3;
else quy := 4;
return quy;
END;
$$
LANGUAGE plpgsql;
This happens when I try to run the code:
ERROR: syntax error at or near ";"
LINE 16: END;
I really don't understand what is wrong with this.
Multiple syntax errors. The function would work like this:
CREATE OR REPLACE FUNCTION quy(sdate timestamp)
RETURNS integer AS
$func$
DECLARE
numbmonth integer := date_part('month', sdate);
quy integer;
BEGIN
IF numbmonth < 4 THEN
quy := 1;
ELSIF numbmonth < 7 THEN
quy := 2;
ELSIF numbmonth < 10 THEN
quy := 3;
ELSE
quy := 4;
END IF;
RETURN quy;
END
$func$ LANGUAGE plpgsql;
Consult the manual for the basic syntax of IF.
But that's much ado about nothing. To get the quarter of the year use the field specifier QUARTER with date_part() or EXTRACT() in a simple expression:
EXTRACT(QUARTER FROM $timestamp)
EXTRACT is the standard SQL equivalent of date_part().
Either returns double precision, so cast to integer if you need that (::int).
If you still need a function:
CREATE OR REPLACE FUNCTION quy(sdate timestamp)
RETURNS int LANGUAGE sql IMMUTABLE AS
'SELECT EXTRACT(QUARTER FROM $1)::int';
$1 is the reference to the 1st function parameter. Equivalent to sdate in the example. $-notation works in any version of Postgres, while named parameter references in SQL functions were only introduced with Postgres 9.2. See:
PLPGSQL Function to Calculate Bearing
dbfiddle here

replacing values of specific index in postgresql 9.3

CREATE OR REPLACE FUNCTION array_replace(INT[]) RETURNS float[] AS $$
DECLARE
arrFloats ALIAS FOR $1;
J int=0;
x int[]=ARRAY[2,4];
-- xx float[]=ARRAY[2.22,4.33];
b float=2.22;
c float=3.33;
retVal float[];
BEGIN
FOR I IN array_lower(arrFloats, 1)..array_upper(arrFloats, 1) LOOP
FOR K IN array_lower(x, 1)..array_upper(x, 1) LOOP
IF (arrFloats[I]= x[K])THEN
retVal[j] :=b;
j:=j+1;
retVal[j] :=c;
j:=j+1;
ELSE
retVal[j] := arrFloats[I];
j:=j+1;
END IF;
END LOOP;
END LOOP;
RETURN retVal;
END;
$$ LANGUAGE plpgsql STABLE RETURNS NULL ON NULL INPUT;
When I run this query
SELECT array_replace(array[1,20,2,5]);
it give me output like this
"[0:8]={1,1,20,20,2.22,3.33,2,5,5}"
Now I do not know why it is coming this duplicate values. I mean it is straight away a nested loop ...
I need a output like this one
"[0:8]={1,20,2.22,3.33,5}"
You have a double loop with the x array having two elements. On every iteration you push elements onto the result array, hence you get twice as many values.
If I understand you logic correctly, you want to scan the input array for values of another array in that same order. If the same, then replace these values with another array, leaving other values intact. There are no built-in functions to help you here, so you have to do this from scratch:
CREATE FUNCTION array_replace(arrFloats float[]) RETURNS float[] AS $$
DECLARE
searchArr float[] := ARRAY[1.,20.];
replaceArr float[] := ARRAY[1.11,1.,111.,20.2,20.222];
retVal float[];
i int;
ndx int;
len int;
upp int;
low int
BEGIN
low := array_lower(searchArr, 1)
upp := array_upper(searchArr, 1);
len := upp - low + 1;
i := array_lower(arrFloats, 1);
WHILE i <= array_upper(arrFloats, 1) LOOP -- Use WHILE LOOP so can update i
ndx := i; -- index into arrFloats for inner loop
FOR j IN low .. upp LOOP
IF arrFloats[ndx] != searchArr[j] THEN
-- No match so put current element of arrFloats in the result and update i
retVal := retVal || arrFloats[i];
i := i + 1;
EXIT; -- No need to look further, break out of inner loop
END IF;
ndx := ndx + 1;
IF j = upp THEN
-- We have a match so append the replaceArr to retVal and
-- increase i by length of search_array
retVal := retVal || replaceArr;
i := i + len;
END IF;
END LOOP;
END LOOP;
RETURN retVal;
END;
$$ LANGUAGE plpgsql STABLE STRICT;
This function would become much more flexible if you made searchArr and replaceArr into parameters as well.
Test
patrick#puny:~$ psql -d test
psql (9.5.0, server 9.4.5)
Type "help" for help.
test=# select array_replace(array[1,20,2,5]);
array_replace
------------------------------
{1.11,1,111,20.2,20.222,2,5}
(1 row)
test=# select array_replace(array[1,20,2,5,1,20.1,1,20]);
array_replace
------------------------------------------------------------
{1.11,1,111,20.2,20.222,2,5,1,20.1,1.11,1,111,20.2,20.222}
(1 row)
As you can see it works for multiple occurrences of the search array.

PostgreSQL: Is there a function that will convert a base-10 int into a base-36 string?

Is there a function in PostgreSQL that can convert a base 10 number like 30 into a base 36 representation like u?
There are base-64 functions (such as encode) but nothing for base-36. But you could write one of your own or use this one:
CREATE OR REPLACE FUNCTION base36_encode(IN digits bigint, IN min_width int = 0) RETURNS varchar AS $$
DECLARE
chars char[];
ret varchar;
val bigint;
BEGIN
chars := ARRAY['0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'];
val := digits;
ret := '';
IF val < 0 THEN
val := val * -1;
END IF;
WHILE val != 0 LOOP
ret := chars[(val % 36)+1] || ret;
val := val / 36;
END LOOP;
IF min_width > 0 AND char_length(ret) < min_width THEN
ret := lpad(ret, min_width, '0');
END IF;
RETURN ret;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
I think you should ask yourself if the database is the right place for dealing with this sort of data formatting though, presentational issues like this might be better handled closer to final viewing level of your stack.
here's a version that can take numbers of any size, it uses the data type "numeric" which is the postgresql implementation of bignum.
CREATE OR REPLACE FUNCTION base36_encode(IN digits numeric, IN min_width int = 0) RETURNS text AS $$
DECLARE
chars char[] := ARRAY['0','1','2','3','4','5','6','7','8','9','A','B'
,'C','D','E','F','G','H','I','J','K','L','M','N'
,'O','P','Q','R','S','T','U','V','W','X','Y','Z' ] ;
ret text:='';
val numeric:= digits;
BEGIN
IF digits < 0 THEN
val := -val;
END IF;
WHILE val > 0 OR min_width > 0 LOOP
ret := chars[(mod(val,36))+1] || ret;
val := div(val,36);
min_width := min_width-1;
END LOOP;
IF digits < 0 THEN
ret := '-'||ret;
END IF;
RETURN ret;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
Modified Implementation
Modified from other implementation to increase the readability. Any kind of update or modification or suggestion appreciated to increase the readability.
CREATE OR REPLACE FUNCTION fn_base36_encode(IN base10 bigint)
RETURNS varchar
LANGUAGE plpgsql
AS $BODY$
DECLARE
base36 varchar := '';
intval bigint := abs(base10);
char0z char[] := regexp_split_to_array('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ', '');
BEGIN
WHILE intval != 0 LOOP
base36 := char0z[(intval % 36)+1] || base36;
intval := intval / 36;
END LOOP;
IF base10 = 0 THEN base36 := '0'; END IF;
RETURN base36;
END;
$BODY$;