T-SQL Format integer to 2-digit string - tsql

I can't find a simple way to do this in T-SQL.
I have for example a column (SortExport_CSV) that returns an integer '2' thru 90.
If the stored number is a single digit, I need it to convert to a 2 digit string that begins with a 0.
I have tried to use CAST but I get stuck on how to display the style in the preferred format (0#)
Of course it is easy to do this on the front end (SSRS, MSAccess, Excel, etc) but in this instance I have no front end and must supply the raw dataset with the already formatted 2 digit string.

select right ('00'+ltrim(str( <number> )),2 )

You can use T-SQL's built in format function:
declare #number int = 1
select format (#number, '0#')

SELECT RIGHT('0' + CAST(sortexport_csv AS VARCHAR), 2)
FROM your_table

You're all doing too much work:
right(str(100+#x),2)
-- for a function, same idea:
--
create function zeroPad( #yourNum int, #wid int)
as
begin
return right( 1000000+#yourNum), #wid)
end

Convert the value to a string, add a zero in front of it (so that it's two or tree characters), and get the last to characters:
right('0'+convert(varchar(2),Sort_Export_CSV),2)

DECLARE #Number int = 1;
SELECT RIGHT('0'+ CONVERT(VARCHAR, #Number), 2)
--OR
SELECT RIGHT(CONVERT(VARCHAR, 100 + #Number), 2)
GO

Here is tiny function that left pad value with a given padding char
You can specify how many characters to be padded to left..
Create function fsPadLeft(#var varchar(200),#padChar char(1)='0',#len int)
returns varchar(300)
as
Begin
return replicate(#PadChar,#len-Len(#var))+#var
end
To call :
declare #value int; set #value =2
select dbo.fsPadLeft(#value,'0',2)

You could try this
SELECT RIGHT( '0' + convert( varchar(2) , '0' ), 2 ) -- OUTPUTS : 00
SELECT RIGHT( '0' + convert( varchar(2) , '8' ), 2 ) -- OUTPUTS : 08
SELECT RIGHT( '0' + convert( varchar(2) , '9' ), 2 ) -- OUTPUTS : 09
SELECT RIGHT( '0' + convert( varchar(2) , '10' ), 2 ) -- OUTPUTS : 10
SELECT RIGHT( '0' + convert( varchar(2) , '11' ), 2 ) -- OUTPUTS : 11
this should help

here you go
select RIGHT(REPLICATE('0', 2) + CAST(2 AS VARCHAR(2)), 2)
should return 02

try
right('0' + convert(varchar(2), #number),2)

Another example:
select
case when teamId < 10 then '0' + cast(teamId as char(1))
else cast(teamId as char(2)) end
as 'pretty id',
* from team

Try this
--Generate number from 2 to 90
;with numcte as(
select 2 as rn
union all
select rn+1 from numcte where rn<90)
--Program that formats the number based on length
select case when LEN(rn) = 1 then '00'+CAST(rn as varchar(10)) else CAST(rn as varchar(10)) end number
from numcte
Partial Output:
number
002
003
004
005
006
007
008
009
10
11
12
13
14
15
16
17
18
19
20

SELECT
replace(str(month(DATEADD(month, -1, '2012-02-29')), 2),' ' , '0')

You can create a function like this:
CREATE FUNCTION [dbo].[f_convert_int_to_2_digits](#input int)
RETURNS varchar(10)
AS
BEGIN
--return value
DECLARE #return varchar(10)
if #input < 10
begin
set #return = '0' + convert(varchar(1), #valor)
end
else
begin
set #return = convert(varchar(10), #input)
end
-- return result
RETURN #return
END
and then use it everywhere:
select [dbo].[f_convert_int_to_2_digits](<some int>)

Example for converting one digit number to two digit by adding 0 :
DECLARE #int INT = 9
SELECT CASE WHEN #int < 10
THEN FORMAT(CAST(#int AS INT),'0#')
ELSE
FORMAT(CAST(#int AS INT),'0')
END

DECLARE #Number int = 1;
SELECT RIGHT('0'+ CONVERT(VARCHAR, #Number), 2)
--OR
SELECT RIGHT(CONVERT(VARCHAR, 100 + #Number), 2)
GO

The simplest way is:
declare
#len int = 10,
#number int = 122222,
#prefix char = '0'
select replicate(#prefix, #len - len(#number)) + CAST(#number AS VARCHAR)
result:
0000122222

Related

SQL Setting Variable for Current Year Month by Combining two VarChar fields using case statement

I am trying to declare a variable for the current year and month based on a field called YEARMO.
It's a 6 character varchar field with the first 4 characters representing the year and the next 2 characters representing the month (ex. February 2018 as 201802).
I need the month variable to have a 0 in front of it if is a month that only has one digit (any month before October). Current code returns '20182' and would like it to return '201802'.
declare #current_month varchar(6)
set #current_month = CAST( year(getdate()) as varchar(4))
+ case when len(CAST( month(getdate()) as varchar(2))) < 1 then '0' +
CAST( month(getdate()) as varchar(2))
else CAST( month(getdate()) as varchar(2))
end
select #current_month;
You can use Convert function if you are looking for current month
select convert(char(6), getdate(), 112)
Your code is over complicated. Use right instead:
declare #current_month char(6)
set #current_month = CAST( year(getdate()) as varchar(4)) + right('00' + cast(month(getdate()) as varchar(2)), 2)
select #current_month;
Result:
201802
If you really want to use case instead, you should change the 1 to 2:
set #current_month = CAST( year(getdate()) as varchar(4)) +
case when len(CAST(month(getdate()) as varchar(2))) < 2 then
'0' + CAST( month(getdate()) as varchar(2)) else
CAST( month(getdate()) as varchar(2)) end
declare #current_month varchar(6)
set #current_month = CAST(year(getdate())*100 + month(getdate()) as varchar(6))

Select query to remove non-numeric characters

I've got dirty data in a column with variable alpha length. I just want to strip out anything that is not 0-9.
I do not want to run a function or proc. I have a script that is similar that just grabs the numeric value after text, it looks like this:
Update TableName
set ColumntoUpdate=cast(replace(Columnofdirtydata,'Alpha #','') as int)
where Columnofdirtydata like 'Alpha #%'
And ColumntoUpdate is Null
I thought it would work pretty good until I found that some of the data fields I thought would just be in the format Alpha # 12345789 are not.
Examples of data that needs to be stripped
AB ABCDE # 123
ABCDE# 123
AB: ABC# 123
I just want the 123. It is true that all data fields do have the # prior to the number.
I tried substring and PatIndex, but I'm not quite getting the syntax correct or something. Anyone have any advice on the best way to address this?
See this blog post on extracting numbers from strings in SQL Server. Below is a sample using a string in your example:
DECLARE #textval NVARCHAR(30)
SET #textval = 'AB ABCDE # 123'
SELECT LEFT(SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000) + 'X') -1)
Here is an elegant solution if your server supports the TRANSLATE function (on sql server it's available on sql server 2017+ and also sql azure).
First, it replaces any non numeric characters with a # character.
Then, it removes all # characters.
You may need to add additional characters that you know may be present in the second parameter of the TRANSLATE call.
select REPLACE(TRANSLATE([Col], 'abcdefghijklmnopqrstuvwxyz+()- ,#+', '##################################'), '#', '')
You can use stuff and patindex.
stuff(Col, 1, patindex('%[0-9]%', Col)-1, '')
SQL Fiddle
This works well for me:
CREATE FUNCTION [dbo].[StripNonNumerics]
(
#Temp varchar(255)
)
RETURNS varchar(255)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^0-9]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
Then call the function like so to see the original something next to the sanitized something:
SELECT Something, dbo.StripNonNumerics(Something) FROM TableA
In case if there are some characters possible between digits (e.g. thousands separators), you may try following:
declare #table table (DirtyCol varchar(100))
insert into #table values
('AB ABCDE # 123')
,('ABCDE# 123')
,('AB: ABC# 123')
,('AB#')
,('AB # 1 000 000')
,('AB # 1`234`567')
,('AB # (9)(876)(543)')
;with tally as (select top (100) N=row_number() over (order by ##spid) from sys.all_columns),
data as (
select DirtyCol, Col
from #table
cross apply (
select (select C + ''
from (select N, substring(DirtyCol, N, 1) C from tally where N<=datalength(DirtyCol)) [1]
where C between '0' and '9'
order by N
for xml path(''))
) p (Col)
where p.Col is not NULL
)
select DirtyCol, cast(Col as int) IntCol
from data
Output is:
DirtyCol IntCol
--------------------- -------
AB ABCDE # 123 123
ABCDE# 123 123
AB: ABC# 123 123
AB # 1 000 000 1000000
AB # 1`234`567 1234567
AB # (9)(876)(543) 9876543
For update, add ColToUpdate to select list of the data cte:
;with num as (...),
data as (
select ColToUpdate, /*DirtyCol, */Col
from ...
)
update data
set ColToUpdate = cast(Col as int)
CREATE FUNCTION FN_RemoveNonNumeric (#Input NVARCHAR(512))
RETURNS NVARCHAR(512)
AS
BEGIN
DECLARE #Trimmed NVARCHAR(512)
SELECT #Trimmed = #Input
WHILE PATINDEX('%[^0-9]%', #Trimmed) > 0
SELECT #Trimmed = REPLACE(#Trimmed, SUBSTRING(#Trimmed, PATINDEX('%[^0-9]%', #Trimmed), 1), '')
RETURN #Trimmed
END
GO
SELECT dbo.FN_RemoveNonNumeric('ABCDE# 123')
Pretty late to the party, I found the following which I though worked brilliantialy.. if anyone is still looking
SELECT
(SELECT CAST(CAST((
SELECT SUBSTRING(FieldToStrip, Number, 1)
FROM master..spt_values
WHERE Type='p' AND Number <= LEN(FieldToStrip) AND
SUBSTRING(FieldToStrip, Number, 1) LIKE '[0-9]' FOR XML Path(''))
AS xml) AS varchar(MAX)))
FROM
SourceTable
Here's a version which pulls all digits from a string; i.e. given I'm 35 years old; I was born in 1982. The average family has 2.4 children. this would return 35198224. i.e. it's good where you've got numeric data which may have been formatted as a code (e.g. #123,456,789 / 123-00005), but isn't appropriate if you're looking to pull out specific numbers (i.e. as opposed to digits / just the numeric characters) from the text. Also it only handles digits; so won't return negative signs (-) or periods .).
declare #table table (id bigint not null identity (1,1), data nvarchar(max))
insert #table (data)
values ('hello 123 its 45613 then') --outputs: 12345613
,('1 some other string 98 example 4') --outputs: 1984
,('AB ABCDE # 123') --outputs: 123
,('ABCDE# 123') --outputs: 123
,('AB: ABC# 123') --outputs: 123
; with NonNumerics as (
select id
, data original
--the below line replaces all digits with blanks
, replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(data,'0',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9','') nonNumeric
from #table
)
--each iteration of the below CTE removes another non-numeric character from the original string, putting the result into the numerics column
, Numerics as (
select id
, replace(original, substring(nonNumeric,1,1), '') numerics
, replace(nonNumeric, substring(nonNumeric,1,1), '') charsToreplace
, len(replace(nonNumeric, substring(nonNumeric,1,1), '')) charsRemaining
from NonNumerics
union all
select id
, replace(numerics, substring(charsToreplace,1,1), '') numerics
, replace(charsToreplace, substring(charsToreplace,1,1), '') charsToreplace
, len(replace(charsToreplace, substring(charsToreplace,1,1), '')) charsRemaining
from Numerics
where charsRemaining > 0
)
--we select only those strings with `charsRemaining=0`; i.e. the rows for which all non-numeric characters have been removed; there should be 1 row returned for every 1 row in the original data set.
select * from Numerics where charsRemaining = 0
This code works by removing all the digits (i.e. the characters we want) from a the given strings by replacing them with blanks. Then it goes through the original string (which includes the digits) removing all of the characters that were left (i.e. the non-numeric characters), thus leaving only the digits.
The reason we do this in 2 steps, rather than just removing all non-numeric characters in the first place is there are only 10 digits, whilst there are a huge number of possible characters; so replacing that small list is relatively fast; then gives us a list of those non-numeric characters which actually exist in the string, so we can then replace that small set.
The method makes use of recursive SQL, using common table expressions (CTEs).
To add on to Ken's answer, this handles commas and spaces and parentheses
--Handles parentheses, commas, spaces, hyphens..
declare #table table (c varchar(256))
insert into #table
values
('This is a test 111-222-3344'),
('Some Sample Text (111)-222-3344'),
('Hello there 111222 3344 / How are you?'),
('Hello there 111 222 3344 ? How are you?'),
('Hello there 111 222 3344. How are you?')
select
replace(LEFT(SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000) + 'X') -1),'.','')
from #table
Create function fn_GetNumbersOnly(#pn varchar(100))
Returns varchar(max)
AS
BEGIN
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#pn)
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if ISNUMERIC(#c) = 1 and #c <> '-'
Select #r = #r + #c
Select #x = #x +1
end
return #r
End
In your case It seems like the # will always be after teh # symbol so using CHARINDEX() with LTRIM() and RTRIM() would probably perform the best. But here is an interesting method of getting rid of ANY non digit. It utilizes a tally table and table of digits to limit which characters are accepted then XML technique to concatenate back to a single string without the non-numeric characters. The neat thing about this technique is it could be expanded to included ANY Allowed characters and strip out anything that is not allowed.
DECLARE #ExampleData AS TABLE (Col VARCHAR(100))
INSERT INTO #ExampleData (Col) VALUES ('AB ABCDE # 123'),('ABCDE# 123'),('AB: ABC# 123')
DECLARE #Digits AS TABLE (D CHAR(1))
INSERT INTO #Digits (D) VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9')
;WITH cteTally AS (
SELECT
I = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
#Digits d10
CROSS APPLY #Digits d100
--add more cross applies to cover longer fields this handles 100
)
SELECT *
FROM
#ExampleData e
OUTER APPLY (
SELECT CleansedPhone = CAST((
SELECT TOP 100
SUBSTRING(e.Col,t.I,1)
FROM
cteTally t
INNER JOIN #Digits d
ON SUBSTRING(e.Col,t.I,1) = d.D
WHERE
I <= LEN(e.Col)
ORDER BY
t.I
FOR XML PATH('')) AS VARCHAR(100))) o
Declare #MainTable table(id int identity(1,1),TextField varchar(100))
INSERT INTO #MainTable (TextField)
VALUES
('6B32E')
declare #i int=1
Declare #originalWord varchar(100)=''
WHile #i<=(Select count(*) from #MainTable)
BEGIN
Select #originalWord=TextField from #MainTable where id=#i
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#originalWord)
declare #pn varchar(100)=#originalWord
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if(#c!='')
BEGIN
if ISNUMERIC(#c) = 0 and #c <> '-'
BEGIN
Select #r = cast(#r as varchar) + cast(replace((SELECT ASCII(#c)-64),'-','') as varchar)
end
ELSE
BEGIN
Select #r = #r + #c
END
END
Select #x = #x +1
END
Select #r
Set #i=#i+1
END
I have created a function for this
Create FUNCTION RemoveCharacters (#text varchar(30))
RETURNS VARCHAR(30)
AS
BEGIN
declare #index as int
declare #newtexval as varchar(30)
set #index = (select PATINDEX('%[A-Z.-/?]%', #text))
if (#index =0)
begin
return #text
end
else
begin
set #newtexval = (select STUFF ( #text , #index , 1 , '' ))
return dbo.RemoveCharacters(#newtexval)
end
return 0
END
GO
Here is the answer:
DECLARE #t TABLE (tVal VARCHAR(100))
INSERT INTO #t VALUES('123')
INSERT INTO #t VALUES('123S')
INSERT INTO #t VALUES('A123,123')
INSERT INTO #t VALUES('a123..A123')
;WITH cte (original, tVal, n)
AS
(
SELECT t.tVal AS original,
LOWER(t.tVal) AS tVal,
65 AS n
FROM #t AS t
UNION ALL
SELECT tVal AS original,
CAST(REPLACE(LOWER(tVal), LOWER(CHAR(n)), '') AS VARCHAR(100)),
n + 1
FROM cte
WHERE n <= 90
)
SELECT t1.tVal AS OldVal,
t.tval AS NewVal
FROM (
SELECT original,
tVal,
ROW_NUMBER() OVER(PARTITION BY tVal + original ORDER BY original) AS Sl
FROM cte
WHERE PATINDEX('%[a-z]%', tVal) = 0
) t
INNER JOIN #t t1
ON t.original = t1.tVal
WHERE t.sl = 1
You can create SQL CLR scalar function in order to be able to use regular expressions like replace patterns.
Here you can find example of how to create such function.
Having such function will solve the issue with just the following lines:
SELECT [dbo].[fn_Utils_RegexReplace] ('AB ABCDE # 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('ABCDE# 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('AB: ABC# 123', '[^0-9]', '');
More important, you will be able to solve more complex issues as the regular expressions will bring a whole new world of options directly in your T-SQL statements.
Use this:
REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
Demo:
DROP TABLE IF EXISTS #MyTempTable;
CREATE TABLE #MyTempTable (SomeString VARCHAR(255));
INSERT INTO #MyTempTable
VALUES ('ssss123ssg99d362sdg')
, ('hey 62q&*^(n43')
, (NULL)
, ('')
, ('hi')
, ('123');
SELECT SomeString
, REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
FROM #MyTempTable;
DROP TABLE IF EXISTS #MyTempTable;
Results:
SomeString
(No column name)
ssss123ssg99d362sdg
12399362
hey62q&*^(n43
6243
NULL
NULL
hi
123
123
While the OP wanted to "strip out anything that is not 0-9", the post is also tagged with "substring" and "patindex", and the OP mentioned the concern "not quite getting the syntax correct or something". To address that the requirements note that "all data fields do have the # prior to the number" and to provide an answer that addresses the challenges with substring/patindex, consider the following:
/* A sample select */
;WITH SampleValues AS
( SELECT 'AB ABCDE # 123' [Columnofdirtydata]
UNION ALL SELECT 'AB2: ABC# 123')
SELECT
s.Columnofdirtydata,
f1.pos1,
'['+ f2.substr +']' [InspectOutput]
FROM
SampleValues s
CROSS APPLY (SELECT PATINDEX('%# %',s.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(s.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(s.Columnofdirtydata)) [substr]) f2
/* Using update scenario from OP */
UPDATE t1
SET t1.Columntoupdate = CAST(f2.substr AS INT)
FROM
TableName t1
CROSS APPLY (SELECT PATINDEX('%# %',t1.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(t1.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(t1.Columnofdirtydata)) [substr]) f2
Note that my syntax advice for patindex/substring, is to:
consider using APPLY as a way to temporarily alias results from one function for use as parameters in the next. It's not uncommon to (in ETL, for example) need to parse out parameter/position-based substrings in an updatable column of a staging table. If you need to "debug" and potentially fix some parsing logic, this style will help.
consider using LEN('PatternSample') in your substring logic, to account for reusing this pattern or adjusting it when your source data changes (instead of "+ 1"
SUBSTRING() requires a length parameter, but it can be greater than the length of the string. Therefore, if you are getting "the rest of the string" after the pattern, you can just use "The source length"
DECLARE #STR VARCHAR(400)
DECLARE #specialchars VARCHAR(50) = '%[~,#,#,$,%,&,*,(,),!^?:]%'
SET #STR = '1, 45 4,3 68.00-'
WHILE PATINDEX( #specialchars, #STR ) > 0
---Remove special characters using Replace function
SET #STR = Replace(Replace(REPLACE( #STR, SUBSTRING( #STR, PATINDEX( #specialchars, #STR ), 1 ),''),'-',''), ' ','')
SELECT #STR
SELECT REGEXP_REPLACE( col, '[^[:digit:]]', '' ) AS new_col FROM my_table

T-SQL Get percentage of character match of 2 strings

Let's say I have a set of 2 words:
Alexander and Alecsander OR Alexander and Alegzander
Alexander and Aleaxnder, or any other combination. In general we are talking about human error in typing of a word or a set of words.
What I want to achieve is to get the percentage of matching of the characters of the 2 strings.
Here is what I have so far:
DECLARE #table1 TABLE
(
nr INT
, ch CHAR
)
DECLARE #table2 TABLE
(
nr INT
, ch CHAR
)
INSERT INTO #table1
SELECT nr,ch FROM [dbo].[SplitStringIntoCharacters] ('WORD w') --> return a table of characters(spaces included)
INSERT INTO #table2
SELECT nr,ch FROM [dbo].[SplitStringIntoCharacters] ('WORD 5')
DECLARE #resultsTable TABLE
(
ch1 CHAR
, ch2 CHAR
)
INSERT INTO #resultsTable
SELECT DISTINCt t1.ch ch1, t2.ch ch2 FROM #table1 t1
FULL JOIN #table2 t2 ON t1.ch = t2.ch --> returns both matches and missmatches
SELECT * FROM #resultsTable
DECLARE #nrOfMathches INT, #nrOfMismatches INT, #nrOfRowsInResultsTable INT
SELECT #nrOfMathches = COUNT(1) FROM #resultsTable WHERE ch1 IS NOT NULL AND ch2 IS NOT NULL
SELECT #nrOfMismatches = COUNT(1) FROM #resultsTable WHERE ch1 IS NULL OR ch2 IS NULL
SELECT #nrOfRowsInResultsTable = COUNT(1) FROM #resultsTable
SELECT #nrOfMathches * 100 / #nrOfRowsInResultsTable
The SELECT * FROM #resultsTable will return the following:
ch1 ch2
NULL 5
[blank] [blank]
D D
O O
R R
W W
Ok, here is my solution so far:
SELECT [dbo].[GetPercentageOfTwoStringMatching]('valentin123456' ,'valnetin123456')
returns 86%
CREATE FUNCTION [dbo].[GetPercentageOfTwoStringMatching]
(
#string1 NVARCHAR(100)
,#string2 NVARCHAR(100)
)
RETURNS INT
AS
BEGIN
DECLARE #levenShteinNumber INT
DECLARE #string1Length INT = LEN(#string1)
, #string2Length INT = LEN(#string2)
DECLARE #maxLengthNumber INT = CASE WHEN #string1Length > #string2Length THEN #string1Length ELSE #string2Length END
SELECT #levenShteinNumber = [dbo].[LEVENSHTEIN] ( #string1 ,#string2)
DECLARE #percentageOfBadCharacters INT = #levenShteinNumber * 100 / #maxLengthNumber
DECLARE #percentageOfGoodCharacters INT = 100 - #percentageOfBadCharacters
-- Return the result of the function
RETURN #percentageOfGoodCharacters
END
-- =============================================
-- Create date: 2011.12.14
-- Description: http://blog.sendreallybigfiles.com/2009/06/improved-t-sql-levenshtein-distance.html
-- =============================================
CREATE FUNCTION [dbo].[LEVENSHTEIN](#left VARCHAR(100),
#right VARCHAR(100))
returns INT
AS
BEGIN
DECLARE #difference INT,
#lenRight INT,
#lenLeft INT,
#leftIndex INT,
#rightIndex INT,
#left_char CHAR(1),
#right_char CHAR(1),
#compareLength INT
SET #lenLeft = LEN(#left)
SET #lenRight = LEN(#right)
SET #difference = 0
IF #lenLeft = 0
BEGIN
SET #difference = #lenRight
GOTO done
END
IF #lenRight = 0
BEGIN
SET #difference = #lenLeft
GOTO done
END
GOTO comparison
COMPARISON:
IF ( #lenLeft >= #lenRight )
SET #compareLength = #lenLeft
ELSE
SET #compareLength = #lenRight
SET #rightIndex = 1
SET #leftIndex = 1
WHILE #leftIndex <= #compareLength
BEGIN
SET #left_char = substring(#left, #leftIndex, 1)
SET #right_char = substring(#right, #rightIndex, 1)
IF #left_char <> #right_char
BEGIN -- Would an insertion make them re-align?
IF( #left_char = substring(#right, #rightIndex + 1, 1) )
SET #rightIndex = #rightIndex + 1
-- Would an deletion make them re-align?
ELSE IF( substring(#left, #leftIndex + 1, 1) = #right_char )
SET #leftIndex = #leftIndex + 1
SET #difference = #difference + 1
END
SET #leftIndex = #leftIndex + 1
SET #rightIndex = #rightIndex + 1
END
GOTO done
DONE:
RETURN #difference
END
Ultimately, you appear to be looking to solve for the likelihood that two strings are a "fuzzy" match to one another.
SQL provides efficient, optimized built-in functions that will do that for you, and likely with better performance than what you have written. The two functions you are looking for are SOUNDEX and DIFFERENCE.
While neither of them solves exactly what you asked for - i.e. they do not return a percentage match - I believe they solve what you are ultimately trying to achieve.
SOUNDEX returns a 4-character code which is the first letter of the word plus a 3-number code that represents the sound pattern of the word. Consider the following:
SELECT SOUNDEX('Alexander')
SELECT SOUNDEX('Alegzander')
SELECT SOUNDEX('Owleksanndurr')
SELECT SOUNDEX('Ulikkksonnnderrr')
SELECT SOUNDEX('Jones')
/* Results:
A425
A425
O425
U425
J520
*/
What you will notice is that the three-digit number 425 is the same for all of the ones that roughly sound alike. So you could easily match them up and say "You typed 'Owleksanndurr', did you perhaps mean 'Alexander'?"
In addition, there's the DIFFERENCE function, which compares the SOUNDEX discrepancy between two strings and gives it a score.
SELECT DIFFERENCE( 'Alexander','Alexsander')
SELECT DIFFERENCE( 'Alexander','Owleksanndurr')
SELECT DIFFERENCE( 'Alexander', 'Jones')
SELECT DIFFERENCE( 'Alexander','ekdfgaskfalsdfkljasdfl;jl;asdj;a')
/* Results:
4
3
1
1
*/
As you can see, the lower the score (between 0 and 4), the more likely the strings are a match.
The advantage of SOUNDEX over DIFFERENCE is that if you really need to do frequent fuzzy matching, you can store and index the SOUNDEX data in a separate (indexable) column, whereas DIFFERENCE can only calculate the SOUNDEX at the time of comparison.

Check if a varchar is a number (T-SQL)

Is there an easy way to figure out if a varchar is a number?
Examples:
abc123 --> no number
123 --> yes, its a number
ISNUMERIC will not do - it tells you that the string can be converted to any of the numeric types, which is almost always a pointless piece of information to know. For example, all of the following are numeric, according to ISNUMERIC:
£, $, 0d0
If you want to check for digits and only digits, a negative LIKE expression is what you want:
not Value like '%[^0-9]%'
ISNUMERIC will do
Check the NOTES section too in the article.
You can check like this:
declare #vchar varchar(50)
set #vchar ='34343';
select case when #vchar not like '%[^0-9]%' then 'Number' else 'Not a Number' end
Using SQL Server 2012+, you can use the TRY_* functions if you have specific needs. For example,
-- will fail for decimal values, but allow negative values
TRY_CAST(#value AS INT) IS NOT NULL
-- will fail for non-positive integers; can be used with other examples below as well, or reversed if only negative desired
TRY_CAST(#value AS INT) > 0
-- will fail if a $ is used, but allow decimals to the specified precision
TRY_CAST(#value AS DECIMAL(10,2)) IS NOT NULL
-- will allow valid currency
TRY_CAST(#value AS MONEY) IS NOT NULL
-- will allow scientific notation to be used like 1.7E+3
TRY_CAST(#value AS FLOAT) IS NOT NULL
I ran into the need to allow decimal values, so I used not Value like '%[^0-9.]%'
Wade73's answer for decimals doesn't quite work. I've modified it to allow only a single decimal point.
declare #MyTable table(MyVar nvarchar(10));
insert into #MyTable (MyVar)
values
(N'1234')
, (N'000005')
, (N'1,000')
, (N'293.8457')
, (N'x')
, (N'+')
, (N'293.8457.')
, (N'......');
-- This shows that Wade73's answer allows some non-numeric values to slip through.
select * from (
select
MyVar
, case when MyVar not like N'%[^0-9.]%' then 1 else 0 end as IsNumber
from
#MyTable
) t order by IsNumber;
-- Notice the addition of "and MyVar not like N'%.%.%'".
select * from (
select
MyVar
, case when MyVar not like N'%[^0-9.]%' and MyVar not like N'%.%.%' then 1 else 0 end as IsNumber
from
#MyTable
) t
order by IsNumber;
Damien_The_Unbeliever noted that his was only good for digits
Wade73 added a bit to handle decimal points
neizan made an additional tweak as did notwhereuareat.
Unfortunately, none appear to handle negative values and they appear to have issues with a comma in the value...
Here's my tweak to pick up negative values and those with commas
declare #MyTable table(MyVar nvarchar(10));
insert into #MyTable (MyVar)
values
(N'1234')
, (N'000005')
, (N'1,000')
, (N'293.8457')
, (N'x')
, (N'+')
, (N'293.8457.')
, (N'......')
, (N'.')
, (N'-375.4')
, (N'-00003')
, (N'-2,000')
, (N'3-3')
, (N'3000-')
;
-- This shows that Neizan's answer allows "." to slip through.
select * from (
select
MyVar
, case when MyVar not like N'%[^0-9.]%' then 1 else 0 end as IsNumber
from
#MyTable
) t order by IsNumber;
-- Notice the addition of "and MyVar not like '.'".
select * from (
select
MyVar
, case when MyVar not like N'%[^0-9.]%' and MyVar not like N'%.%.%' and MyVar not like '.' then 1 else 0 end as IsNumber
from
#MyTable
) t
order by IsNumber;
--Trying to tweak for negative values and the comma
--Modified when comparison
select * from (
select
MyVar
, case
when MyVar not like N'%[^0-9.,-]%' and MyVar not like '.' and isnumeric(MyVar) = 1 then 1
else 0
end as IsNumber
from
#MyTable
) t
order by IsNumber;
DECLARE #A nvarchar(100) = '12'
IF(ISNUMERIC(#A) = 1)
BEGIN
PRINT 'YES NUMERIC'
END
Neizan's code lets values of just a "." through. At the risk of getting too pedantic, I added one more AND clause.
declare #MyTable table(MyVar nvarchar(10));
insert into #MyTable (MyVar)
values
(N'1234')
, (N'000005')
, (N'1,000')
, (N'293.8457')
, (N'x')
, (N'+')
, (N'293.8457.')
, (N'......')
, (N'.')
;
-- This shows that Neizan's answer allows "." to slip through.
select * from (
select
MyVar
, case when MyVar not like N'%[^0-9.]%' then 1 else 0 end as IsNumber
from
#MyTable
) t order by IsNumber;
-- Notice the addition of "and MyVar not like '.'".
select * from (
select
MyVar
, case when MyVar not like N'%[^0-9.]%' and MyVar not like N'%.%.%' and MyVar not like '.' then 1 else 0 end as IsNumber
from
#MyTable
) t
order by IsNumber;
Do not forget to exclude carriage returns from your data!
As in:
SELECT
Myotherval
, CASE WHEN TRIM(REPLACE([MyVal], char(13) + char(10), '')) not like '%[^0-9]%' and RTRIM(REPLACE([MyVal], char(13) + char(10), '')) not like '.' and isnumeric(REPLACE([MyVal], char(13) + char(10), '')) = 1 THEN 'my number: ' + [MyVal]
ELSE ISNULL(Cast([MyVal] AS VARCHAR(8000)), '')
END AS 'MyVal'
FROM MyTable
In case you want to add a constraint on a field:
Positive integer with fixed length
ALTER TABLE dbo.BankBranchType
ADD CONSTRAINT CK_TransitNumberMustBe5Digits
CHECK (TransitNumber NOT like '%[^0-9]%'
AND LEN(TransitNumber) = 5)
To check the Number, Currency, and Amount, use the below SQL fragment.
#value NOT LIKE '%[^0-9.,]%'
For a quick win, refer to the below example:
Function example:
CREATE FUNCTION [dbo].[fnCheckValueIsNumber](
#value NVARCHAR(255)=NULL
)RETURNS INT AS BEGIN
DECLARE #ReturnValue INT=0
IF EXISTS (SELECT * WHERE #value NOT LIKE '%[^0-9.,]%') SELECT #ReturnValue=1
RETURN #ReturnValue;
Execution result
SELECT [dbo].[fnCheckValueIsNumber]('12345')
RESULT = 1
SELECT [dbo].[fnCheckValueIsNumber]('10020.25')
RESULT = 1
SELECT [dbo].[fnCheckValueIsNumber]('10,020.25')
RESULT = 1
SELECT [dbo].[fnCheckValueIsNumber]('12,345ABCD')
RESULT = 0

Implementing and applying a string split in T-SQL

I have this statement in T-SQL.
SELECT Bay From TABLE where uid in (
select B_Numbers from Info_Step WHERE uid = 'number'
)
I am selecting "multiple" BAYs from TABLE where their uid is equal to a string of numbers like this:
B_Numbers = 1:45:34:98
Therefore, I should be selecting 4 different BAYs from TABLE. I basically need to split the string 1:45:34:98 up into 4 different numbers.
I'm thinking that Split() would work, but it doesn't and I get a syntax error.
Any thoughts from the T-SQL gods would be awesome!
Here is an implementation of a split function that returns the list of numbers as a table:
http://rbgupta.blogspot.com/2007/03/split-function-tsql.html
Looks like this would set you on your way...
Here is a method that uses an auxiliary numbers table to parse the input string. The logic can easily be added to a function that returns a table. That table can then be joined to lookup the correct rows.
Step 1: Create the Numbers table
SET NOCOUNT ON
GO
IF EXISTS
(
SELECT 1
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_NAME = 'Numbers'
AND TABLE_SCHEMA = 'dbo'
AND TABLE_TYPE = 'BASE TABLE'
)
BEGIN
DROP TABLE dbo.Numbers
END
GO
CREATE TABLE dbo.Numbers
(
Number smallint IDENTITY(1, 1) PRIMARY KEY
)
GO
WHILE 1 = 1
BEGIN
INSERT INTO dbo.Numbers DEFAULT VALUES
IF SCOPE_IDENTITY() = 32767
BEGIN
BREAK
END
END
GO
Step 2: Parse the Input String
CREATE FUNCTION dbo.ParseString(#input_string varchar(8000), #delim varchar(8000) = " ")
RETURNS TABLE
AS RETURN
(
SELECT Number
FROM dbo.Numbers
WHERE CHARINDEX
(
#delim + CONVERT(VARCHAR(12),Number) + #delim,
#delim + #input_string + #delim
) > 0
)
GO
**EXAMPLE**
SELECT * FROM dbo.ParseString('1:45:34:98',':')
Step 3: Use the results however you want/need
Number
------
1
34
45
98
End-To-End Example
Create function that returns the appropriate BNumber (of course change it to use the commented out SQL)
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION dbo.GetBNumber (#uid int)
RETURNS VARCHAR(8000)
AS
BEGIN
RETURN '1:45:34:98'
--select B_Numbers from Info_Step WHERE uid = #uid
END
GO
Use the use functions to return the desired results
-- Using Test Data
SELECT N.Number FROM Numbers N
JOIN dbo.ParseString(dbo.GetBNumber(12345),':') Q ON Q.Number = N.Number
-- Using Your Data (Untested but should work.)
SELECT N.Bay
FROM TABLE N
JOIN dbo.ParseString(dbo.GetBNumber(ENTER YOU NUMBER HERE),':') Q ON Q.Number = N.uid
Results
Number
------
1
34
45
98
You should keep your arrays as rows but if I understand your question I think this will work.
SELECT
Bay
From
TABLE
join Info_Step
on B_Numbers like '%'+ uid +'%'
where
Info_Step.uid = 'number'
This query will do a full table scan because of the like operator.
What you can do is loop through the B_Numbers entries and do your own split on : Insert those entries into a temp table and then perform your query.
DECLARE #i int
DECLARE #start int
DECLARE #B_Numbers nvarchar(20)
DECLARE #temp table (
number nvarchar(10)
)
-- SELECT B_Numbers FROM Info_Step WHERE uid = 'number'
SELECT #B_Numbers = '1:45:34:98'
SET #i = 0
SET #start = 0
-- Parse out characters delimited by ":";
-- Would make a nice user defined function.
WHILE #i < len(#B_Numbers)
BEGIN
IF substring(#B_Numbers, #i, 1) = ':'
BEGIN
INSERT INTO #temp
VALUES (substring(#B_Numbers, #start, #i - #start))
SET #start = #i + 1
END
SET #i = #i + 1
END
-- Insert last item
INSERT INTO #temp
VALUES (substring(#B_Numbers, #start, #i - #start + 1))
-- Do query with parsed values
SELECT Bay FROM TABLE WHERE uid in (SELECT * FROM #temp)
You can even try this
declare #str varchar(50)
set #str = '1:45:34:98'
;with numcte as(
select 1 as rn union all select rn+1 from numcte where rn<LEN(#str)),
getchars as(select
ROW_NUMBER() over(order by rn) slno,
rn,chars from numcte
cross apply(select SUBSTRING(#str,rn,1) chars)X where chars = ':')
select top 1
Bay1 = SUBSTRING(#str,0,(select rn from getchars where slno = 1))
,Bay2 = SUBSTRING(#str,
(select rn from getchars where slno = 1) + 1,
(((select rn from getchars where slno = 2)-
(select rn from getchars where slno = 1)
)-1))
,Bay3 = SUBSTRING(#str,
(select rn from getchars where slno = 2) + 1,
(((select rn from getchars where slno = 3)-
(select rn from getchars where slno = 2)
)-1))
,Bay4 = SUBSTRING(#str,
(select rn from getchars where slno = 3)+1,
LEN(#str))
from getchars
Output:
Bay1 Bay2 Bay3 Bay4
1 45 34 98