can you please help me to how to convert below sql server function to db2 format.
CREATE FUNCTION [dbo].[GetKeyStructureXml]
(
#pf_wkstn_oid_sh smallint,
)
RETURNS varchar(max)
AS
BEGIN
DECLARE #RtKeys varchar(Max)
set #RtKeys = (SELECT rt.rbase_field_name,
pf_wkstn_oid_sh,
pf_wkstn_oid_lng,
''N'' status_indc,
rt.field_data_type,
rt.field_size,
''Rate Key '' DisplayType,
'''' Author,
0 DateCreated,
rb.field_level_indc,
rb.field_scope_indc
FROM rt_tmplt_key rt inner join rbase_field_dict rb
on rb.rbase_field_name=rt.rbase_field_name
where pf_wkstn_oid_sh = #pf_wkstn_oid_sh
order by rt_key_sqnc_num asc
FOR XML AUTO, BINARY BASE64,root(''TableKeys''))
RETURN #RtKeys;
END;
please provide some guide and help for the above conversion. it is quite confusing to use XML AUTO and BINARY BASE64 in db2.
Example of a scalar function constructing XML output from relational data.
I cast the XML result as text using XMLSERIALIZE just for example. Some 3-rd party tools can't work with DB2 XML data type.
create or replace function test_xml(p_CustomerID int)
returns XML
return
with Customer (CustomerID, CustomerType) as (values
(1, 'S')
, (2, 'A')
)
, SalesOrderHeader (CustomerID, SalesOrderID, Status) as (values
(1, 11, '5')
, (1, 12, '5')
, (1, 13, '5')
, (1, 14, '5')
, (2, 21, '6')
, (2, 22, '6')
, (2, 23, '6')
, (2, 24, '6')
)
SELECT
XMLELEMENT(NAME "Cust", XMLATTRIBUTES(Cust.CustomerID as "CustomerID", Cust.CustomerType as "CustomerType"), OrderHeader.ord)
as col
FROM Customer Cust,
(
select CustomerID
, XMLAGG(XMLELEMENT(NAME "OrderHeader", XMLATTRIBUTES(CustomerID AS "CustomerID", SalesOrderID AS "SalesOrderID", Status as "Status"))) ord
from SalesOrderHeader
group by CustomerID
) OrderHeader
WHERE Cust.CustomerID = OrderHeader.CustomerID
and Cust.CustomerID=p_CustomerID;
values xmlserialize(test_xml(1) as clob(1k));
CREATE FUNCTION GetKeyStructureXml(v_pf_wkstn_oid_sh smallint)
RETURNS xml
LANGUAGE SQL
BEGIN ATOMIC
DECLARE v_RtKeys xml;
SET v_RtKeys=(SELECT XMLELEMENT(
NAME "TableKeys",
XMLAGG(XMLELEMENT( NAME "rt", XMLAttributes( rt.rbase_field_name AS "rbase_field_name",
rt.wkstn_oid_sh AS "wkstn_oid_sh",
rt.wkstn_oid_lng AS "wkstn_oid_lng",
rt_key_sqnc_num AS "rt_key_sqnc_num",
rt.rt_key_rtrvl_cd AS "rt_key_rtrvl_cd",
pf_wkstn_oid_sh AS "pf_wkstn_oid_sh",
pf_wkstn_oid_lng AS "pf_wkstn_oid_lng",
'N' as status_indc,
rt.field_data_type AS "field_data_type",
rt.field_size AS "field_size",
'Rate Key ' as "DisplayType",
'' as "Author",
0 as "DateCreated"),
XMLELEMENT( NAME "rb", XMLAttributes( rb.field_level_indc AS "field_level_indc",
rb.field_scope_indc AS "field_scope_indc")))order by rt_key_sqnc_num asc)OPTION NULL ON NULL)
FROM rt_tmplt_key rt inner join rbase_field_dict rb
on rb.rbase_field_name=rt.rbase_field_name
where pf_wkstn_oid_sh = v_pf_wkstn_oid_sh );
RETURN v_RtKeys;
I'm trying to use tSQLt AssertResultSetsHaveSameMetaData to check the metadata from a query that returns a large number of columns.
When it fails the message that details the 'expected/but was' details is truncated, so I can't the two pieces of information to see what is wrong.
Is there a way to output the message so that it doesn't truncate (for example, to a file)?
It depends on what you are actually testing. I agree that on wide result sets the output from AssertResultsSetsHaveSameMetaData can be a little unwieldy. For stored procedures you would write your tests like this:
create procedure [ProcedureTests].[test SelectProcedure result set contract]
as
begin
create table #expected
(
name varchar(500) not null
, column_ordinal int not null identity(1,1)
, system_type_name varchar(500) not null
, Nullability varchar(16) not null
)
; with expectedCte (name, system_type_name, Nullability)
as
(
select 'ItemId' , 'int' , 'not null'
union all select 'ActorId' , 'int' , 'not null'
union all select 'LanId' , 'nvarchar(200)' , 'not null'
union all select 'ConsumerId' , 'int' , 'not null'
union all select 'ConsumerMoniker' , 'nvarchar(200)' , 'not null'
union all select 'ProfileTypeId' , 'int' , 'not null'
union all select 'ProfileTypeName' , 'varchar(50)' , 'not null'
union all select 'ProfileId' , 'int' , 'not null'
)
insert #expected
(
name
, system_type_name
, Nullability
)
select name, system_type_name, Nullability from expectedCte;
--! Act
select
name
, column_ordinal
, system_type_name
, case is_nullable when 1 then 'null' else 'not null' end as [Nullability]
into
#actual
from
sys.dm_exec_describe_first_result_set_for_object(object_id('mySchema.SelectProcedure'), 0);
--! Assert
exec tSQLt.AssertEqualsTable #expected, #actual;
end;
go
Whilst for views you could use this (slightly different) approach:
alter procedure [ViewTests].[test ViewName resultset contract]
as
begin
create table [ViewTests].[expected]
(
TransactionId int not null
, SourceId int not null
, SourceKey nvarchar(50) not null
, TransactionTypeId int not null
, TransactionStatusId int not null
, LastModified datetime not null
);
--! You comparison may be as simple as this (but see alternative approach below)
exec tSQLt.AssertEqualsTableSchema '[ViewTests].[expected]', 'mySchema.ViewName';
--!
--! Seems that is_nullable column on dm_exec_describe_first_result_set (used by tSQLt.AssertEqualsTableSchema)
--! can be a bit flakey where views are concerned so you may need to ignore nullability when testing
--! this view (so comment out that column in both SELECTs)
--!
select
c.name as [ColumnName]
, c.column_id as [ColumnPosition]
, case
when st.name in ('char', 'varchar', 'varbinary')
then st.name + '(' + case when c.max_length = -1 then 'max' else coalesce(cast(c.max_length as varchar(8)), '???') end + ')'
when st.name in ('nchar', 'nvarchar')
then st.name + '(' + case when c.max_length = -1 then 'max' else coalesce(cast(c.max_length / 2 as varchar(8)), '???') end + ')'
when st.name in ('decimal', 'numeric')
then st.name + '(' + coalesce(cast(c.precision as varchar(8)), '???') + ',' + coalesce(cast(c.scale as varchar(8)), '???') + ')'
when st.name in ('time', 'datetime2', 'datetimeoffset')
then st.name + '(' + coalesce(cast(c.precision as varchar(8)), '???') + ')'
else st.name
end as [DataType]
, c.[precision] as [NumericScale]
, c.scale as [NumericPrecision]
, c.collation_name as [CollationName]
, cast(case c.is_nullable when 1 then 'null' else 'not null' end as varchar(16)) as [Nullability]
into
#expected
from
sys.columns as c
inner join sys.types as st
on st.system_type_id = c.system_type_id
and st.user_type_id = c.user_type_id
where
c.[object_id] = object_id('[ViewTests].[expected]')
select
name as [ColumnName]
, column_ordinal as [ColumnPosition]
, system_type_name as [DataType]
, [precision ]as [NumericScale]
, scale as [NumericPrecision]
, collation_name as [CollationName]
, cast(case is_nullable when 1 then 'null' else 'not null' end as varchar(16)) as [Nullability]
into
#actual
from
sys.dm_exec_describe_first_result_set('select * from mySchema.ViewName, null, null)
exec tSQLt.AssertEqualsTable '#expected', '#actual' ;
end
go
In the even of any failures, the reason will be much clearer as the output is more like AssertEqualsTable.
You could try EXEC [tSQLt].[XmlResultFormatter]; before running your test. This is intended for use in "build server" scenarios, but could probably be pressed into service to show you more of the output in SSMS.
I've got dirty data in a column with variable alpha length. I just want to strip out anything that is not 0-9.
I do not want to run a function or proc. I have a script that is similar that just grabs the numeric value after text, it looks like this:
Update TableName
set ColumntoUpdate=cast(replace(Columnofdirtydata,'Alpha #','') as int)
where Columnofdirtydata like 'Alpha #%'
And ColumntoUpdate is Null
I thought it would work pretty good until I found that some of the data fields I thought would just be in the format Alpha # 12345789 are not.
Examples of data that needs to be stripped
AB ABCDE # 123
ABCDE# 123
AB: ABC# 123
I just want the 123. It is true that all data fields do have the # prior to the number.
I tried substring and PatIndex, but I'm not quite getting the syntax correct or something. Anyone have any advice on the best way to address this?
See this blog post on extracting numbers from strings in SQL Server. Below is a sample using a string in your example:
DECLARE #textval NVARCHAR(30)
SET #textval = 'AB ABCDE # 123'
SELECT LEFT(SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000) + 'X') -1)
Here is an elegant solution if your server supports the TRANSLATE function (on sql server it's available on sql server 2017+ and also sql azure).
First, it replaces any non numeric characters with a # character.
Then, it removes all # characters.
You may need to add additional characters that you know may be present in the second parameter of the TRANSLATE call.
select REPLACE(TRANSLATE([Col], 'abcdefghijklmnopqrstuvwxyz+()- ,#+', '##################################'), '#', '')
You can use stuff and patindex.
stuff(Col, 1, patindex('%[0-9]%', Col)-1, '')
SQL Fiddle
This works well for me:
CREATE FUNCTION [dbo].[StripNonNumerics]
(
#Temp varchar(255)
)
RETURNS varchar(255)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^0-9]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
Then call the function like so to see the original something next to the sanitized something:
SELECT Something, dbo.StripNonNumerics(Something) FROM TableA
In case if there are some characters possible between digits (e.g. thousands separators), you may try following:
declare #table table (DirtyCol varchar(100))
insert into #table values
('AB ABCDE # 123')
,('ABCDE# 123')
,('AB: ABC# 123')
,('AB#')
,('AB # 1 000 000')
,('AB # 1`234`567')
,('AB # (9)(876)(543)')
;with tally as (select top (100) N=row_number() over (order by ##spid) from sys.all_columns),
data as (
select DirtyCol, Col
from #table
cross apply (
select (select C + ''
from (select N, substring(DirtyCol, N, 1) C from tally where N<=datalength(DirtyCol)) [1]
where C between '0' and '9'
order by N
for xml path(''))
) p (Col)
where p.Col is not NULL
)
select DirtyCol, cast(Col as int) IntCol
from data
Output is:
DirtyCol IntCol
--------------------- -------
AB ABCDE # 123 123
ABCDE# 123 123
AB: ABC# 123 123
AB # 1 000 000 1000000
AB # 1`234`567 1234567
AB # (9)(876)(543) 9876543
For update, add ColToUpdate to select list of the data cte:
;with num as (...),
data as (
select ColToUpdate, /*DirtyCol, */Col
from ...
)
update data
set ColToUpdate = cast(Col as int)
CREATE FUNCTION FN_RemoveNonNumeric (#Input NVARCHAR(512))
RETURNS NVARCHAR(512)
AS
BEGIN
DECLARE #Trimmed NVARCHAR(512)
SELECT #Trimmed = #Input
WHILE PATINDEX('%[^0-9]%', #Trimmed) > 0
SELECT #Trimmed = REPLACE(#Trimmed, SUBSTRING(#Trimmed, PATINDEX('%[^0-9]%', #Trimmed), 1), '')
RETURN #Trimmed
END
GO
SELECT dbo.FN_RemoveNonNumeric('ABCDE# 123')
Pretty late to the party, I found the following which I though worked brilliantialy.. if anyone is still looking
SELECT
(SELECT CAST(CAST((
SELECT SUBSTRING(FieldToStrip, Number, 1)
FROM master..spt_values
WHERE Type='p' AND Number <= LEN(FieldToStrip) AND
SUBSTRING(FieldToStrip, Number, 1) LIKE '[0-9]' FOR XML Path(''))
AS xml) AS varchar(MAX)))
FROM
SourceTable
Here's a version which pulls all digits from a string; i.e. given I'm 35 years old; I was born in 1982. The average family has 2.4 children. this would return 35198224. i.e. it's good where you've got numeric data which may have been formatted as a code (e.g. #123,456,789 / 123-00005), but isn't appropriate if you're looking to pull out specific numbers (i.e. as opposed to digits / just the numeric characters) from the text. Also it only handles digits; so won't return negative signs (-) or periods .).
declare #table table (id bigint not null identity (1,1), data nvarchar(max))
insert #table (data)
values ('hello 123 its 45613 then') --outputs: 12345613
,('1 some other string 98 example 4') --outputs: 1984
,('AB ABCDE # 123') --outputs: 123
,('ABCDE# 123') --outputs: 123
,('AB: ABC# 123') --outputs: 123
; with NonNumerics as (
select id
, data original
--the below line replaces all digits with blanks
, replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(data,'0',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9','') nonNumeric
from #table
)
--each iteration of the below CTE removes another non-numeric character from the original string, putting the result into the numerics column
, Numerics as (
select id
, replace(original, substring(nonNumeric,1,1), '') numerics
, replace(nonNumeric, substring(nonNumeric,1,1), '') charsToreplace
, len(replace(nonNumeric, substring(nonNumeric,1,1), '')) charsRemaining
from NonNumerics
union all
select id
, replace(numerics, substring(charsToreplace,1,1), '') numerics
, replace(charsToreplace, substring(charsToreplace,1,1), '') charsToreplace
, len(replace(charsToreplace, substring(charsToreplace,1,1), '')) charsRemaining
from Numerics
where charsRemaining > 0
)
--we select only those strings with `charsRemaining=0`; i.e. the rows for which all non-numeric characters have been removed; there should be 1 row returned for every 1 row in the original data set.
select * from Numerics where charsRemaining = 0
This code works by removing all the digits (i.e. the characters we want) from a the given strings by replacing them with blanks. Then it goes through the original string (which includes the digits) removing all of the characters that were left (i.e. the non-numeric characters), thus leaving only the digits.
The reason we do this in 2 steps, rather than just removing all non-numeric characters in the first place is there are only 10 digits, whilst there are a huge number of possible characters; so replacing that small list is relatively fast; then gives us a list of those non-numeric characters which actually exist in the string, so we can then replace that small set.
The method makes use of recursive SQL, using common table expressions (CTEs).
To add on to Ken's answer, this handles commas and spaces and parentheses
--Handles parentheses, commas, spaces, hyphens..
declare #table table (c varchar(256))
insert into #table
values
('This is a test 111-222-3344'),
('Some Sample Text (111)-222-3344'),
('Hello there 111222 3344 / How are you?'),
('Hello there 111 222 3344 ? How are you?'),
('Hello there 111 222 3344. How are you?')
select
replace(LEFT(SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000) + 'X') -1),'.','')
from #table
Create function fn_GetNumbersOnly(#pn varchar(100))
Returns varchar(max)
AS
BEGIN
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#pn)
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if ISNUMERIC(#c) = 1 and #c <> '-'
Select #r = #r + #c
Select #x = #x +1
end
return #r
End
In your case It seems like the # will always be after teh # symbol so using CHARINDEX() with LTRIM() and RTRIM() would probably perform the best. But here is an interesting method of getting rid of ANY non digit. It utilizes a tally table and table of digits to limit which characters are accepted then XML technique to concatenate back to a single string without the non-numeric characters. The neat thing about this technique is it could be expanded to included ANY Allowed characters and strip out anything that is not allowed.
DECLARE #ExampleData AS TABLE (Col VARCHAR(100))
INSERT INTO #ExampleData (Col) VALUES ('AB ABCDE # 123'),('ABCDE# 123'),('AB: ABC# 123')
DECLARE #Digits AS TABLE (D CHAR(1))
INSERT INTO #Digits (D) VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9')
;WITH cteTally AS (
SELECT
I = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
#Digits d10
CROSS APPLY #Digits d100
--add more cross applies to cover longer fields this handles 100
)
SELECT *
FROM
#ExampleData e
OUTER APPLY (
SELECT CleansedPhone = CAST((
SELECT TOP 100
SUBSTRING(e.Col,t.I,1)
FROM
cteTally t
INNER JOIN #Digits d
ON SUBSTRING(e.Col,t.I,1) = d.D
WHERE
I <= LEN(e.Col)
ORDER BY
t.I
FOR XML PATH('')) AS VARCHAR(100))) o
Declare #MainTable table(id int identity(1,1),TextField varchar(100))
INSERT INTO #MainTable (TextField)
VALUES
('6B32E')
declare #i int=1
Declare #originalWord varchar(100)=''
WHile #i<=(Select count(*) from #MainTable)
BEGIN
Select #originalWord=TextField from #MainTable where id=#i
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#originalWord)
declare #pn varchar(100)=#originalWord
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if(#c!='')
BEGIN
if ISNUMERIC(#c) = 0 and #c <> '-'
BEGIN
Select #r = cast(#r as varchar) + cast(replace((SELECT ASCII(#c)-64),'-','') as varchar)
end
ELSE
BEGIN
Select #r = #r + #c
END
END
Select #x = #x +1
END
Select #r
Set #i=#i+1
END
I have created a function for this
Create FUNCTION RemoveCharacters (#text varchar(30))
RETURNS VARCHAR(30)
AS
BEGIN
declare #index as int
declare #newtexval as varchar(30)
set #index = (select PATINDEX('%[A-Z.-/?]%', #text))
if (#index =0)
begin
return #text
end
else
begin
set #newtexval = (select STUFF ( #text , #index , 1 , '' ))
return dbo.RemoveCharacters(#newtexval)
end
return 0
END
GO
Here is the answer:
DECLARE #t TABLE (tVal VARCHAR(100))
INSERT INTO #t VALUES('123')
INSERT INTO #t VALUES('123S')
INSERT INTO #t VALUES('A123,123')
INSERT INTO #t VALUES('a123..A123')
;WITH cte (original, tVal, n)
AS
(
SELECT t.tVal AS original,
LOWER(t.tVal) AS tVal,
65 AS n
FROM #t AS t
UNION ALL
SELECT tVal AS original,
CAST(REPLACE(LOWER(tVal), LOWER(CHAR(n)), '') AS VARCHAR(100)),
n + 1
FROM cte
WHERE n <= 90
)
SELECT t1.tVal AS OldVal,
t.tval AS NewVal
FROM (
SELECT original,
tVal,
ROW_NUMBER() OVER(PARTITION BY tVal + original ORDER BY original) AS Sl
FROM cte
WHERE PATINDEX('%[a-z]%', tVal) = 0
) t
INNER JOIN #t t1
ON t.original = t1.tVal
WHERE t.sl = 1
You can create SQL CLR scalar function in order to be able to use regular expressions like replace patterns.
Here you can find example of how to create such function.
Having such function will solve the issue with just the following lines:
SELECT [dbo].[fn_Utils_RegexReplace] ('AB ABCDE # 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('ABCDE# 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('AB: ABC# 123', '[^0-9]', '');
More important, you will be able to solve more complex issues as the regular expressions will bring a whole new world of options directly in your T-SQL statements.
Use this:
REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
Demo:
DROP TABLE IF EXISTS #MyTempTable;
CREATE TABLE #MyTempTable (SomeString VARCHAR(255));
INSERT INTO #MyTempTable
VALUES ('ssss123ssg99d362sdg')
, ('hey 62q&*^(n43')
, (NULL)
, ('')
, ('hi')
, ('123');
SELECT SomeString
, REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
FROM #MyTempTable;
DROP TABLE IF EXISTS #MyTempTable;
Results:
SomeString
(No column name)
ssss123ssg99d362sdg
12399362
hey62q&*^(n43
6243
NULL
NULL
hi
123
123
While the OP wanted to "strip out anything that is not 0-9", the post is also tagged with "substring" and "patindex", and the OP mentioned the concern "not quite getting the syntax correct or something". To address that the requirements note that "all data fields do have the # prior to the number" and to provide an answer that addresses the challenges with substring/patindex, consider the following:
/* A sample select */
;WITH SampleValues AS
( SELECT 'AB ABCDE # 123' [Columnofdirtydata]
UNION ALL SELECT 'AB2: ABC# 123')
SELECT
s.Columnofdirtydata,
f1.pos1,
'['+ f2.substr +']' [InspectOutput]
FROM
SampleValues s
CROSS APPLY (SELECT PATINDEX('%# %',s.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(s.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(s.Columnofdirtydata)) [substr]) f2
/* Using update scenario from OP */
UPDATE t1
SET t1.Columntoupdate = CAST(f2.substr AS INT)
FROM
TableName t1
CROSS APPLY (SELECT PATINDEX('%# %',t1.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(t1.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(t1.Columnofdirtydata)) [substr]) f2
Note that my syntax advice for patindex/substring, is to:
consider using APPLY as a way to temporarily alias results from one function for use as parameters in the next. It's not uncommon to (in ETL, for example) need to parse out parameter/position-based substrings in an updatable column of a staging table. If you need to "debug" and potentially fix some parsing logic, this style will help.
consider using LEN('PatternSample') in your substring logic, to account for reusing this pattern or adjusting it when your source data changes (instead of "+ 1"
SUBSTRING() requires a length parameter, but it can be greater than the length of the string. Therefore, if you are getting "the rest of the string" after the pattern, you can just use "The source length"
DECLARE #STR VARCHAR(400)
DECLARE #specialchars VARCHAR(50) = '%[~,#,#,$,%,&,*,(,),!^?:]%'
SET #STR = '1, 45 4,3 68.00-'
WHILE PATINDEX( #specialchars, #STR ) > 0
---Remove special characters using Replace function
SET #STR = Replace(Replace(REPLACE( #STR, SUBSTRING( #STR, PATINDEX( #specialchars, #STR ), 1 ),''),'-',''), ' ','')
SELECT #STR
SELECT REGEXP_REPLACE( col, '[^[:digit:]]', '' ) AS new_col FROM my_table
I have a table roughly as follows:
CREATE TABLE t_table (
f_userid BIGINT NOT NULL
,f_groupaid BIGINT
,f_groupbid BIGINT
,f_groupcid BIGINT
,f_itemid BIGINT
,f_value TEXT
);
The groups are orthogonal, so no hierarchy can be implied beyond the fact that every entry in the table will have a user ID. There is no uniqueness in any of the columns.
So for example a simple setup might be:
INSERT INTO t_table VALUES (1, NULL, NULL, NULL, NULL, 'Value for anything by user 1');
INSERT INTO t_table VALUES (1, 5, 2, NULL, NULL, 'Value for anything by user 1 in groupA 5 groupB 2');
INSERT INTO t_table VALUES (1, 4, NULL, 1, NULL, 'Value for anything by user 1 in groupA 5 and groupC 1');
INSERT INTO t_table VALUES (2, NULL, NULL, NULL, NULL, 'Value for anything by user 2');
INSERT INTO t_table VALUES (2, 1, NULL, NULL, NULL, 'Value for anything by user 2 in groupA 1');
INSERT INTO t_table VALUES (2, 1, 3, 4, 5, 'Value for item 5 by user 2 in groupA 1 and groupB 3 and groupC 4');
For any given set of user/groupA/groupB/groupC/item I want to be able to obtain the most specific item in the table that applies. If any of the given set are NULL then it can only match relevant columns in the table which contain NULL. For example:
// Exact match
SELECT MostSpecific(1, NULL, NULL, NULL, NULL) => "Value for anything by user 1"
// Match the second entry because groupC and item were not specified in the table and the other items matched
SELECT MostSpecific(1, 5, 2, 3, NULL) => "Value for anything by user 1 in groupA 5 groupB 2"
// Does not match the second entry because groupA is NULL in the query and set in the table
SELECT MostSpecific(1, NULL, 2, 3, 4) => "Value for anything by user 1"
The obvious approach here is for the stored procedure to work through the parameters and find out which are NULL and not, and then call the appropriate SELECT statement. But this seems very inefficient. IS there a better way of doing this?
This should do it, just filter out any non matching rows using a WHERE, then rank the remaining rows by how well they match. If any column doesn't match, the whole bop expression will result in NULL, so we filter that out in an outer query where we also order by match and limit the result to only the single best match.
CREATE FUNCTION MostSpecific(BIGINT, BIGINT, BIGINT, BIGINT, BIGINT)
RETURNS TABLE(f_userid BIGINT, f_groupaid BIGINT, f_groupbid BIGINT, f_groupcid BIGINT, f_itemid BIGINT, f_value TEXT) AS
'WITH cte AS (
SELECT *,
CASE WHEN f_groupaid IS NULL THEN 0 WHEN f_groupaid = $2 THEN 1 END +
CASE WHEN f_groupbid IS NULL THEN 0 WHEN f_groupbid = $3 THEN 1 END +
CASE WHEN f_groupcid IS NULL THEN 0 WHEN f_groupcid = $4 THEN 1 END +
CASE WHEN f_itemid IS NULL THEN 0 WHEN f_itemid = $5 THEN 1 END bop
FROM t_table
WHERE f_userid = $1
AND (f_groupaid IS NULL OR f_groupaid = $2)
AND (f_groupbid IS NULL OR f_groupbid = $3)
AND (f_groupcid IS NULL OR f_groupcid = $4)
AND (f_itemid IS NULL OR f_itemid = $5)
)
SELECT f_userid, f_groupaid, f_groupbid, f_groupcid, f_itemid, f_value FROM cte
WHERE bop IS NOT NULL
ORDER BY bop DESC
LIMIT 1'
LANGUAGE SQL
//
An SQLfiddle to test with.
Try something like:
select *
from t_table t
where f_userid = $p_userid
and (t.f_groupaid is not distinct from $p_groupaid or t.f_groupaid is null) --null in f_groupaid matches both null and not null values
and (t.f_groupbid is not distinct from $p_groupbid or t.f_groupbid is null)
and (t.f_groupcid is not distinct from $p_groupcid or t.f_groupcid is null)
order by (t.f_groupaid is not distinct from $p_groupaid)::int -- order by count of matches
+(t.f_groupbid is not distinct from $p_groupbid)::int
+(t.f_groupcid is not distinct from $p_groupcid)::int desc
limit 1;
It will give you the best match on groups.
A is not distinct from B fill return true if A and B are equal or both null.
::int means cast ( as int). Casting boolean true to int will give 1 (You can not add boolean values directly).
SQL Fiddle
create or replace function mostSpecific(
p_userid bigint,
p_groupaid bigint,
p_groupbid bigint,
p_groupcid bigint,
p_itemid bigint
) returns t_table as $body$
select *
from t_table
order by
(p_userid is not distinct from f_userid or f_userid is null)::integer
+
(p_groupaid is not distinct from f_groupaid or f_userid is null)::integer
+
(p_groupbid is not distinct from f_groupbid or f_userid is null)::integer
+
(p_groupcid is not distinct from f_groupcid or f_userid is null)::integer
+
(p_itemid is not distinct from f_itemid or f_userid is null)::integer
desc
limit 1
;
$body$ language sql;