sas hash join vs data step merge - hash

I have to merge three tables, each table has between 100K -200K records. This merge takes alsmost five minutes. I need help to convert the code below into a hash join. Many thanks in advance.
data &dsource..main_input;
merge &dsource..sorted_swf (in=here1)
&dsource..sorted_input2 (in=here2)
&dsource..sorted_input9 (in=here3 );
by control;
if (here1) then do;
%recode_div
if typec = 45 then elig_hu= '1';
else if (status eq '1') or ((status in ('2','3')) and (type in ('1','2','4','6','10','11'))) then elig_hu = '1';
else if (status eq '4') then do;
if (noint in (1,2,3,5,6)) then elig_hu = '1';
else if (noint eq 4) or (10 <= noint <= 43) then elig_hu = '0';
end;
else if (status in ('2','3')) and (type in ('5','7','8','9')) then elig_hu = '0';
else elig_hu = '9';
output;
keep var1 var2 var3 var4;
end;
run;
data want;
set finput.input2;
if _n_ = 1 then do;
%create_hash(in2,control,region,"ftest5.swf");
%create_hash(in9,control,hudadmin,"ftest5.input9");
end;
/*<initialize lookup variables>*/
rc = in2.find();
rc = in9.find();
if rc then do;
/* <handle case where lookup fails>*/
end;
drop rc;
run;

Grab the %create_hash() macro from here
The general use is
data want;
set have;
format <new variables to look up>;
if _n_ = 1 then do;
%create_hash(obj,keyvar1 keyvar2 ..., lookupvar1 lookupvar2 ..., "lookup data set");
end;
<initialize lookup variables>
rc = obj.find();
if rc then do;
<handle case where lookup fails>
end;
drop rc;
run;

Related

postgres functions and transactions with BEGIN and blocks

I have some questions about how postgres functions and transactions work. I am beginners to transactions and functions . please help me to understand the following code.
Currently my function looks like this:
CREATE OR REPLACE FUNCTION what_do_i_do(
arg_i jsonb
)
RETURNS json
LANGUAGE plpgsql
AS
$function$
DECLARE
rcd RECORD;
msg text;
error json;
b boolean;
i integer;
response json;
BEGIN
--block 1
i = arg_i->>'i';
--block 2
a_i = arg_i->>'a_i';
IF arg_i ? 'm' THEN
b = Not (arg_i->>'m')::boolean;
ELSE
b = true;
END IF;
--block 3
select p.aa abc into rcd
from p
where p.pi = i
limit 1;
--block 4
IF NOT FOUND THEN
RAISE '%', msg USING ERRCODE = 'foreign_key_violation';
Return Null;
END IF;
--block 5
IF b = true and rcd.pa = false THEN
--5.1
UPDATE p set aa = true where pi = i;
--5.2
UPDATE e
SET aa = true, ab = false
WHERE aai in (select aai from aa where pi = i and zzz = false)
and dda = false;
--5.3
get diagnostics cnt = row_count;
--6
RETURN response;
--7
EXCEPTION
WHEN OTHERS THEN
GET STACKED DIAGNOSTICS stack_msg = MESSAGE_TEXT, stack = PG_EXCEPTION_CONTEXT;
RAISE WARNING '%: --- Error Stack ---:
message: %
context: %
', module, stack_msg, stack;
RAISE;
END;
$function$;
The statements can be UPDATE,EXCEPTION or plain SELECT queries based on some_id. As I understand from postgre documentation, all statements in this function are executed as a single transaction and committed at the END

loop through multiple rows and output related dataset SAS 9.4

I have a dataset, which is long data. Each subject has 52 week information.
The output I want is:
data new1 new2 new3 ... new52;
set old;
if week = 1 then output new1;
else if week = 2 then output new2;
else....;
run;.
Since there are 52 weeks,I want to write a macro. Don't know how to...
please see the code below using macro to create 52 datasets..
data old;
do week=1 to 52;
output;
end;
run;
%macro new(i);
data new&i;
set old;
if week=&i then output;
run;
%mend new;
%macro loop;
%do j=1 %to 52;
%new(&j);
%end;
%mend loop;
%loop;

replacing values of specific index in postgresql 9.3

CREATE OR REPLACE FUNCTION array_replace(INT[]) RETURNS float[] AS $$
DECLARE
arrFloats ALIAS FOR $1;
J int=0;
x int[]=ARRAY[2,4];
-- xx float[]=ARRAY[2.22,4.33];
b float=2.22;
c float=3.33;
retVal float[];
BEGIN
FOR I IN array_lower(arrFloats, 1)..array_upper(arrFloats, 1) LOOP
FOR K IN array_lower(x, 1)..array_upper(x, 1) LOOP
IF (arrFloats[I]= x[K])THEN
retVal[j] :=b;
j:=j+1;
retVal[j] :=c;
j:=j+1;
ELSE
retVal[j] := arrFloats[I];
j:=j+1;
END IF;
END LOOP;
END LOOP;
RETURN retVal;
END;
$$ LANGUAGE plpgsql STABLE RETURNS NULL ON NULL INPUT;
When I run this query
SELECT array_replace(array[1,20,2,5]);
it give me output like this
"[0:8]={1,1,20,20,2.22,3.33,2,5,5}"
Now I do not know why it is coming this duplicate values. I mean it is straight away a nested loop ...
I need a output like this one
"[0:8]={1,20,2.22,3.33,5}"
You have a double loop with the x array having two elements. On every iteration you push elements onto the result array, hence you get twice as many values.
If I understand you logic correctly, you want to scan the input array for values of another array in that same order. If the same, then replace these values with another array, leaving other values intact. There are no built-in functions to help you here, so you have to do this from scratch:
CREATE FUNCTION array_replace(arrFloats float[]) RETURNS float[] AS $$
DECLARE
searchArr float[] := ARRAY[1.,20.];
replaceArr float[] := ARRAY[1.11,1.,111.,20.2,20.222];
retVal float[];
i int;
ndx int;
len int;
upp int;
low int
BEGIN
low := array_lower(searchArr, 1)
upp := array_upper(searchArr, 1);
len := upp - low + 1;
i := array_lower(arrFloats, 1);
WHILE i <= array_upper(arrFloats, 1) LOOP -- Use WHILE LOOP so can update i
ndx := i; -- index into arrFloats for inner loop
FOR j IN low .. upp LOOP
IF arrFloats[ndx] != searchArr[j] THEN
-- No match so put current element of arrFloats in the result and update i
retVal := retVal || arrFloats[i];
i := i + 1;
EXIT; -- No need to look further, break out of inner loop
END IF;
ndx := ndx + 1;
IF j = upp THEN
-- We have a match so append the replaceArr to retVal and
-- increase i by length of search_array
retVal := retVal || replaceArr;
i := i + len;
END IF;
END LOOP;
END LOOP;
RETURN retVal;
END;
$$ LANGUAGE plpgsql STABLE STRICT;
This function would become much more flexible if you made searchArr and replaceArr into parameters as well.
Test
patrick#puny:~$ psql -d test
psql (9.5.0, server 9.4.5)
Type "help" for help.
test=# select array_replace(array[1,20,2,5]);
array_replace
------------------------------
{1.11,1,111,20.2,20.222,2,5}
(1 row)
test=# select array_replace(array[1,20,2,5,1,20.1,1,20]);
array_replace
------------------------------------------------------------
{1.11,1,111,20.2,20.222,2,5,1,20.1,1.11,1,111,20.2,20.222}
(1 row)
As you can see it works for multiple occurrences of the search array.

Is there any hash function in PostgreSQL?

I am using Sphinx to index my database.
The problem is I have to filter the result by a character varying field.
So I have to find a way to convert character varying to sql_attr_uint.
I know that CRC32 in mysql can do the trick. Is there a CRC32 or any replacement in PostgreSQL?
This is the CRC32 function that defines thinking sphinx (gem):
CREATE OR REPLACE FUNCTION crc32(word text)
RETURNS bigint AS $$
DECLARE tmp bigint;
DECLARE i int;
DECLARE j int;
DECLARE byte_length int;
DECLARE word_array bytea;
BEGIN
IF COALESCE(word, '') = '' THEN
return 0;
END IF;
i = 0;
tmp = 4294967295;
byte_length = bit_length(word) / 8;
word_array = decode(replace(word, E'\\\\', E'\\\\\\\\'), 'escape');
LOOP
tmp = (tmp # get_byte(word_array, i))::bigint;
i = i + 1;
j = 0;
LOOP
tmp = ((tmp >> 1) # (3988292384 * (tmp & 1)))::bigint;
j = j + 1;
IF j >= 8 THEN
EXIT;
END IF;
END LOOP;
IF i >= byte_length THEN
EXIT;
END IF;
END LOOP;
return (tmp # 4294967295);
END
$$ IMMUTABLE LANGUAGE plpgsql;
Maybe you can use decode(substring(md5('foo') for 8), 'hex'). This would get you bytea of first 4 bytes of md5 hash of this string.
You can convert it to integer using something like:
create function bytea_to_integer(bytea)
returns integer strict
language sql as $$
select
(get_byte($1,0)*1::integer<<0*8)
+(get_byte($1,1)*1::integer<<1*8)
+(get_byte($1,2)*1::integer<<2*8)
+(get_byte($1,3)*1::integer<<3*8);
$$;

operations with time firebird

To resolve the issue.
There are two meanings Time: 12:20 and 23:55. How can add the values that-be at the outlet was 36:15.
Or how to keep these values in dB (Time) Time except that, were it possible addition?
Firebird's data type TIME allows range between 00:00:00 and 24:00:00 only.
If you want to store arbitrary amount of time (say, in seconds) use INTEGER or NUMERIC datatype. Then convert it into time string format if needed.
You can use TIMESTAMP in dialect 3
With this you can add 2 times.
i have code here convert seconds in integer
CREATE PROCEDURE P_CONVERT_TIME (
V_TIME_INT INTEGER
)
RETURNS (
V_TIME_STR VARCHAR(20)
)
AS
DECLARE VARIABLE v_max_trans_hour integer;
DECLARE VARIABLE v_max_trans_min integer;
DECLARE VARIABLE v_max_trans_sec integer;
DECLARE VARIABLE v_max_trans_sec_gross integer;
BEGIN
v_max_trans_sec = cast(v_Time_Int as integer);
v_max_trans_hour = coalesce(div(v_max_trans_sec, 3600), 0);
if (v_max_trans_hour > 0) then
v_max_trans_sec = v_max_trans_sec - (3600 * v_max_trans_hour);
v_max_trans_min = coalesce(div(v_max_trans_sec, 60), 0);
if (v_max_trans_min > 0) then
v_max_trans_sec = v_max_trans_sec - (60 * v_max_trans_min);
if (v_max_trans_hour > 0) then begin
if (v_max_trans_hour < 10) then begin
V_Time_Str ='0'||v_max_trans_hour||':'; end else
V_Time_Str=v_max_trans_hour||':';
end else V_Time_Str='00:';
if (v_max_trans_min > 0) then begin
if (v_max_trans_min <10) then begin
V_Time_Str =V_Time_Str ||'0'||v_max_trans_min||':'; end else
V_Time_Str =V_Time_Str ||v_max_trans_min||':';
end else V_Time_Str =V_Time_Str ||'00:';
if (v_max_trans_sec > 0) then begin
if (v_max_trans_sec <10) then begin
V_Time_Str =V_Time_Str ||'0'|| v_max_trans_sec; end else
V_Time_Str =V_Time_Str || v_max_trans_sec;
end else V_Time_Str =V_Time_Str ||'00';
suspend;
END
;