T-SQL: Pivot but for semicolon-separated values instead of columns - tsql

I've got semicolon-separated values in a column Values in my table:
Values
1;2;3;4;5
I would like to transform it in a procedure to have there values as rows:
Values
1
2
3
4
5
How could I do it in T-SQL?

Solution 1(using xml):
declare #str varchar(20)
declare #xml as xml
set #str= '1;2;3;4;5'
SET #xml = cast(('<x>'+replace(#str,';' ,'</x><x>')+'</x>') as xml)
SELECT col.value('.', 'varchar(10)') as value FROM #xml.nodes('x') as tbl(col)
Solution 2(using recursive cte)
declare #str as varchar(100)
declare #delimiter as char(1)
set #delimiter = ';'
set #str = '1;2;3;4;5' -- original data
set #str = #delimiter + #str + #delimiter
;with num_cte as
(
select 1 as rn
union all
select rn +1 as rn
from num_cte
where rn <= len(#str)
)
, get_delimiter_pos_cte as
(
select
ROW_NUMBER() OVER (ORDER BY rn) as rowid,
rn as delimiterpos
from num_cte
cross apply( select substring(#str,rn,1) AS chars) splittedchars
where chars = #delimiter
)
select substring(#str,a.delimiterpos+1 ,c2.delimiterpos - a.delimiterpos - 1) as Countries
from get_delimiter_pos_cte a
inner join get_delimiter_pos_cte c2 on c2.rowid = a.rowid+1
option(maxrecursion 0)

The thing that struck me as possibly leaving room for an additional answer, or additional improvement was that most of the answers/links given were how to split values like this for a single scalar value as opposed to how to apply that kind of splitting logic for a column of values in a table.
I include both a numbers table solution and an XML solution. The XML solution was inspired by the earlier post priyanka.sarkar. I think that a numbers table solution, using an actual numbers table instead of the CTE as in the below solution is probably the fastest, but the XML approach deserves to be developed upon because it's really nice looking.
So, here goes my attempt.
CREATE PROCEDURE PARSE_DELIMITED_VALUES
AS
WITH FIRST_NUMBERS (N) AS (
SELECT 1 UNION ALL SELECT 1
), SECOND_NUMBERS (N) AS (
SELECT E1.N
FROM FIRST_NUMBERS E1
CROSS JOIN FIRST_NUMBERS E2
), THIRD_NUMBERS (N) AS (
SELECT E1.N
FROM SECOND_NUMBERS E1
CROSS JOIN SECOND_NUMBERS E2
), FOURTH_NUMBERS (N) AS (
SELECT E1.N
FROM THIRD_NUMBERS E1
CROSS JOIN THIRD_NUMBERS E2
), FIFTH_NUMBERS (N) AS (
SELECT E1.N
FROM FOURTH_NUMBERS E1
CROSS JOIN FOURTH_NUMBERS E2
), NUMBERS (N) AS (
SELECT N
FROM NUMBERS
WHERE N <= 8000 /*adjust these as needed to come up with a max number equal to the max character length allowed in the Values column*/
/*or better yet, if you can, just remove this first...numbers... header stuff so long as you create a temp or permanent table that contains the same numbers to work with*/
)
SELECT SUBSTRING(
MYTABLE.Values,
CASE
WHEN NUMBERS.NUMBER = 1 THEN 1
ELSE NUMBERS.NUMBER + 1
END,
CASE CHARINDEX(';', MYTABLE.Values, NUMBERS.NUMBER + 1)
WHEN 0 THEN LEN('^' + MYTABLE.Values + '^') - 2 + 1
ELSE CHARINDEX(';', MYTABLE.Values, NUMBERS.NUMBER + 1)
END
- CASE
WHEN NUMBERS.NUMBER = 1 THEN 1
ELSE NUMBERS.NUMBER + 1
END
) AS PARSED_VALUE
FROM MYTABLE
INNER JOIN NUMBERS
ON NUMBERS.NUMBER <= LEN('^' + MYTABLE.Values + '^') - 2
AND (
NUMBERS.NUMBER = 1
OR SUBSTRING(MYTABLE.Values, NUMBERS.NUMBER, 1) = ';'
)
GO
-- if your values column can contain NULL values I would change the join at the end as follows:
--from INNER JOIN NUMBERS
--to LEFT OUTER JOIN NUMBERS
The above would probably be most performant if the WITH NUMBERS ... CTEs were replaced by a temporary or permanent table containing the same numeric values.
On the other hand the CTE does the job and keeps it more in one place.
CREATE PROCEDURE PARSE_DELIMITED_VALUES
AS
SELECT E.x.value('.', 'VARCHAR(MAX)') AS PARSED_VALUE
FROM (
SELECT CAST('<x>' + REPLACE(Values, ';', '</x><x>') + '</x>' AS XML) my_x
FROM MYTABLE
) TT
CROSS APPLY my_x.nodes('/x') AS E(x)
GO
-- if your values column can contain NULL values I would change the join at the end as follows:
from `CROSS APPLY`
to `OUTER APPLY`

It's not the most elegant approach, but this might be worth a try. It creates a Sql Command as a string, and at the end executes it.
DECLARE #Values VARCHAR(8000)
-- Flatten all values lists into one string
SET #Values = REPLACE(REPLACE((SELECT [Value] FROM [dbo.MyTable] FOR XML PATH('')), '<Value>', ''), '</Value>', ';')
SET #Values = SUBSTRING(#Values, 0, LEN(#Values))
DECLARE #SeparatorIndex INT
SET #SeparatorIndex = (SELECT TOP 1 PATINDEX('%[;]%', #Values))
DECLARE #InsertClause VARCHAR(50)
SET #InsertClause = 'INSERT INTO [dbo.MyTable] VALUES ('
DECLARE #SQL VARCHAR(500)
SET #SQL = #InsertClause + SUBSTRING(#Values, 0, #SeparatorIndex) + '); '
SET #Values = RIGHT(#Values, LEN(#Values) - (#SeparatorIndex - 1))
SET #SQL = REPLACE(#SQL + (SELECT (REPLACE(#Values, ';', '); ' + #InsertClause))) + ')', '; )', '')
EXEC (#SQL)
The command ends up (in Sql Server 2005) as:
INSERT INTO [dbo.MyTable] VALUES (1); INSERT INTO [dbo.MyTable] VALUES (2); INSERT INTO [dbo.MyTable] VALUES (3); INSERT INTO [dbo.MyTable] VALUES (4); INSERT INTO [dbo.MyTable] VALUES (5) ...'

Do you actually mean, "rows," as in, "tuples," (so you can insert the data into another table, one element per row) or do you mean you want the data displayed vertically?
I'd think a string Replace (look up T-SQL's String Functions) would do the trick, no? Depending on the output target, you'd replace ; with CRLF or . You could even use Replace to create dynamic SQL Insert statements that could be executed by the SP to do row inserts (if that was your intent).
For presentation purposes, this is bad practice.
If it is purely for presentation and you are permitted, I'd output everything as XML then XSLT it any way you want. Honestly, I don't remember the last time I operated directly on a recordset. I always output to XML.

Related

Removing all the Alphabets from a string using a single SQL Query [duplicate]

I'm currently doing a data conversion project and need to strip all alphabetical characters from a string. Unfortunately I can't create or use a function as we don't own the source machine making the methods I've found from searching for previous posts unusable.
What would be the best way to do this in a select statement? Speed isn't too much of an issue as this will only be running over 30,000 records or so and is a once off statement.
You can do this in a single statement. You're not really creating a statement with 200+ REPLACEs are you?!
update tbl
set S = U.clean
from tbl
cross apply
(
select Substring(tbl.S,v.number,1)
-- this table will cater for strings up to length 2047
from master..spt_values v
where v.type='P' and v.number between 1 and len(tbl.S)
and Substring(tbl.S,v.number,1) like '[0-9]'
order by v.number
for xml path ('')
) U(clean)
Working SQL Fiddle showing this query with sample data
Replicated below for posterity:
create table tbl (ID int identity, S varchar(500))
insert tbl select 'asdlfj;390312hr9fasd9uhf012 3or h239ur ' + char(13) + 'asdfasf'
insert tbl select '123'
insert tbl select ''
insert tbl select null
insert tbl select '123 a 124'
Results
ID S
1 390312990123239
2 123
3 (null)
4 (null)
5 123124
CTE comes for HELP here.
;WITH CTE AS
(
SELECT
[ProductNumber] AS OrigProductNumber
,CAST([ProductNumber] AS VARCHAR(100)) AS [ProductNumber]
FROM [AdventureWorks].[Production].[Product]
UNION ALL
SELECT OrigProductNumber
,CAST(STUFF([ProductNumber], PATINDEX('%[^0-9]%', [ProductNumber]), 1, '') AS VARCHAR(100) ) AS [ProductNumber]
FROM CTE WHERE PATINDEX('%[^0-9]%', [ProductNumber]) > 0
)
SELECT * FROM CTE
WHERE PATINDEX('%[^0-9]%', [ProductNumber]) = 0
OPTION (MAXRECURSION 0)
output:
OrigProductNumber ProductNumber
WB-H098 098
VE-C304-S 304
VE-C304-M 304
VE-C304-L 304
TT-T092 092
RichardTheKiwi's script in a function for use in selects without cross apply,
also added dot because in my case I use it for double and money values within a varchar field
CREATE FUNCTION dbo.ReplaceNonNumericChars (#string VARCHAR(5000))
RETURNS VARCHAR(1000)
AS
BEGIN
SET #string = REPLACE(#string, ',', '.')
SET #string = (SELECT SUBSTRING(#string, v.number, 1)
FROM master..spt_values v
WHERE v.type = 'P'
AND v.number BETWEEN 1 AND LEN(#string)
AND (SUBSTRING(#string, v.number, 1) LIKE '[0-9]'
OR SUBSTRING(#string, v.number, 1) LIKE '[.]')
ORDER BY v.number
FOR
XML PATH('')
)
RETURN #string
END
GO
Thanks RichardTheKiwi +1
Well if you really can't use a function, I suppose you could do something like this:
SELECT REPLACE(REPLACE(REPLACE(LOWER(col),'a',''),'b',''),'c','')
FROM dbo.table...
Obviously it would be a lot uglier than that, since I only handled the first three letters, but it should give the idea.

How to pivot a table to a view on matching-length delimited cells

Disclaimer: I'm dealing with a rather old legacy system so any comments telling me about poor design are redundant, although I do genuinely appreciate any such sentiment. There is a new version that solves most legacy problems but we still have to maintain the old system, so basically, we have to manage for now.
I have a table that looks like this (yes, that is a single column, I know):
And I need a view (for reporting purposes) that will dynamically process the data in said table and return this:
The values are \n-delimited (shudder) and you can assume there will always be the same number of values in each cell (9 in the example, although other databases could have 4 or 12 or any number), although I suppose having NULL-insertion in the event of missing values couldn't hurt. They will also always be in a matching order (as in the example, 'AUD', 'Australian Dollar', and '$' are all the first values in their respective cells, and so on).
I've found various approaches to splitting a single cell out into a view, but nothing that covers merging data in such a way as I require. Sitting at home with a cold has not helped my research capabilities. Help me StackOverflow, you're my only hope!
Bonus points for tidy, relatively readable SQL examples, although I'm anticipating messiness as a natural by-product of the hackish nature of my required solution.
Something like this. I didn't take the time to build out the tables, but it should be fairly obvious where you can replace my variables with your rows. You will also want to do a replace char(10) where I have used commas. You could package it up in a table valued function and then call as a view.
declare #xml1 xml
declare #xml2 xml
declare #xml3 xml
declare #c1 nvarchar(250)
declare #c2 nvarchar(250)
declare #c3 nvarchar(250)
set #c1 = N'AUD,CAD,EUR,GBP,JPY,NZD,USD,KES,CHF';
set #c2 = N'Australian Dollar,Canadian Dollar,Euro,Pound Sterling,Yen,New Zealand Dollar,United States Dollar,Kenyan Shilling, Swiss Franc';
set #c3 = N'$,$,C,L,Y,$,$,K,F';
-- you'd use replace(#c1, char(10), '</r><r>') etc etc for /n delimited code
set #xml1 = N'<root><r>' + replace(#c1,',','</r><r>') + '</r></root>';
set #xml2 = N'<root><r>' + replace(#c2,',','</r><r>') + '</r></root>';
set #xml3 = N'<root><r>' + replace(#c3,',','</r><r>') + '</r></root>';
select code.code, name.name, symbol.symbol
from
(select ROW_NUMBER() over (order by ##rowcount) as ck,
c.value('.','varchar(max)') as [code]
from #xml1.nodes('//root/r') as a(c)) as code
inner join
(select ROW_NUMBER() over (order by ##rowcount) as nk,
n.value('.','varchar(max)') as [name]
from #xml2.nodes('//root/r') as a(n)) as name on code.ck = name.nk
inner join
(select ROW_NUMBER() over (order by ##rowcount) as sk,
s.value('.','varchar(max)') as [symbol]
from #xml3.nodes('//root/r') as a(s)) as symbol on symbol.sk = name.nk
You can run this as a single script in SSMS for verification that it works. No schema necessary.
Using Jeff Moden's Tally Ho! CSV splitter:
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH
E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
and inline CTE data like this
with
data as (select Num,Currencies from (values
(1,'AUD'+char(10)+'CAD'+char(10)+'USD'+char(10)+'KES')
,(2,'Australian DOllar'+char(10)+'Canadian Dollar'+char(10)+'US Dollar'+char(10)+'Kenyan Shilling')
,(3,'$'+char(10)+'$'+char(10)+'$'+char(10)+'k')
)data(Num,Currencies)
),
The solution is as simple as this:
map as (select * from (values
(1,'Code')
,(2,'Name')
,(3,'Symbol')
)map(Num,Col )
)
select
ItemNumber
,max(Code) as Code
,max(Name) as Name
,max(Symbol) as Symbol
from (
select
map.Num
,map.Col
,c.Item
,c.ItemNumber
from data
join map
on map.Num = data.Num
cross apply dbo.DelimitedSplit8K(data.Currencies,char(10)) c
) t
pivot (max(Item) for Col in (Code,Name,Symbol)) pvt
group by ItemNumber
to give us:
ItemNumber Code Name Symbol
-------------- ---- -------------------- ---------------
1 AUD Australian DOllar $
2 CAD Canadian Dollar $
3 USD US Dollar $
4 KES Kenyan Shilling k
Hope this Helps. Run all together or replace the table variable with a temptable.
Sample Data:
IF OBJECT_ID(N'tempdb..#table') > 0
BEGIN
DROP TABLE #table
END
DECLARE #table TABLE(ATTRIBUTELVAUE VARCHAR(MAX))
INSERT INTO #table
SELECT
'AFN
ALL
DZD
USD
EUR
AOA
XCD
XCD
ARS'
INSERT INTO #table
SELECT
'Afghanistan
Albania
Algeria
American Samoa
Andorra
Angola
Anguilla
Antigua and Barbuda
Argentina'
INSERT INTO #table
SELECT
'AF
AL
DZ
AS
AD
AO
AI
AG
AR'
Query:
IF OBJECT_ID(N'tempdb..#TEMP') > 0
BEGIN
DROP TABLE #TEMP
END
DECLARE #StartLoop INT
DECLARE #EndLoop INT
DECLARE #Code TABLE (ID INT IDENTITY(1, 1),
Code VARCHAR(250))
DECLARE #Name TABLE (ID INT IDENTITY(1, 1),
Name VARCHAR(250))
DECLARE #Symbol TABLE (ID INT IDENTITY(1, 1),
Symbol VARCHAR(250))
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS ID,
*
INTO #Temp
FROM #table
SELECT #StartLoop = MIN(ID),
#EndLoop = MAX(ID)
FROM #Temp
WHILE #StartLoop <= #EndLoop
BEGIN
DECLARE #WorkingString VARCHAR(MAX)
SELECT #WorkingString = ATTRIBUTELVAUE + CHAR(10) + ' '
FROM #Temp
WHERE ID = #StartLoop
--print #WorkingString
WHILE CHARINDEX(CHAR(10), #WorkingString) > 0
BEGIN
DECLARE #SearchCharacter INT
DECLARE #WorkingStringLength INT
DECLARE #TempStringLength INT
DECLARE #TempString VARCHAR(MAX)
SET #WorkingStringLength = LEN(#WorkingString)
SET #SearchCharacter = CHARINDEX(CHAR(10), #WorkingString)
SET #TempString = SUBSTRING(#WorkingString, 1, #SearchCharacter - 1)
SET #TempStringLength = LEN(#TempString)
SET #WorkingString = SUBSTRING(#WorkingString, #SearchCharacter + 1, #WorkingStringLength)
SET #TempString = REPLACE(#TempString, CHAR(13), '')
IF #StartLoop = 1
BEGIN
INSERT INTO #Code
SELECT #TempString
END
IF #StartLoop = 2
BEGIN
INSERT INTO #Name
SELECT #TempString
END
IF #StartLoop = 3
BEGIN
INSERT INTO #Symbol
SELECT #TempString
END
END
SET #StartLoop = #StartLoop + 1
END
SELECT Code,
Name,
Symbol
FROM #Code AS c
JOIN #Name AS n
ON c.ID = n.ID
JOIN #Symbol AS s
ON s.ID = n.ID
Cleanup:
IF OBJECT_ID(N'tempdb..#TEMP') > 0
BEGIN
DROP TABLE #TEMP
END
IF OBJECT_ID(N'tempdb..#table') > 0
BEGIN
DROP TABLE #table
END
Because I needed a view, this ended up being my solution:
CREATE FUNCTION [dbo].[CurrencyTableGenerator]()
RETURNS
#CurrencyTable TABLE(
Code NVARCHAR(250)
,Name NVARCHAR(250)
,Symbol NVARCHAR(250)
)
AS
BEGIN
DECLARE #xml1 XML
DECLARE #xml2 XML
DECLARE #xml3 XML
DECLARE #C1 NVARCHAR(250)
DECLARE #C2 NVARCHAR(250)
DECLARE #c3 NVARCHAR(250)
SET #c1 = (SELECT ...)
SET #c2 = (SELECT ...)
SET #c3 = (SELECT ...)
SET #xml1 = N'<root><r>' + REPLACE(#c1, CHAR(10), '</r><r>') + '</r></root>';
SET #xml2 = N'<root><r>' + REPLACE(#c2, CHAR(10), '</r><r>') + '</r></root>';
SET #xml3 = N'<root><r>' + REPLACE(#c3, CHAR(10), '</r><r>') + '</r></root>';
INSERT INTO #CurrencyTable
SELECT Code.Code, Name.Name, Symbol.Symbol
FROM
(SELECT ROW_NUMBER() OVER (ORDER BY ##ROWCOUNT) AS ck,
c.value('.', 'VARCHAR(250)') AS [Code]
FROM #xml1.nodes('//root/r') AS a(c)) AS Code
INNER JOIN
(SELECT ROW_NUMBER() OVER (ORDER BY ##ROWCOUNT) AS nk,
n.value('.', 'VARCHAR(250)') AS [Name]
FROM #xml2.nodes('//root/r') AS a(n)) AS Name ON Code.ck = Name.nk
INNER JOIN
(SELECT ROW_NUMBER() OVER (ORDER BY ##ROWCOUNT) AS sk,
s.value('.', 'VARCHAR(250)') AS [Symbol]
FROM #xml3.nodes('//root/r') AS a(s)) AS Symbol ON Symbol.sk = Name.nk
RETURN
END
GO
CREATE VIEW [dbo].[CurrencyView]
AS
SELECT * FROM [dbo].[CurrencyTableGenerator]()
GO
Thanks to RThomas for the function.

Select query to remove non-numeric characters

I've got dirty data in a column with variable alpha length. I just want to strip out anything that is not 0-9.
I do not want to run a function or proc. I have a script that is similar that just grabs the numeric value after text, it looks like this:
Update TableName
set ColumntoUpdate=cast(replace(Columnofdirtydata,'Alpha #','') as int)
where Columnofdirtydata like 'Alpha #%'
And ColumntoUpdate is Null
I thought it would work pretty good until I found that some of the data fields I thought would just be in the format Alpha # 12345789 are not.
Examples of data that needs to be stripped
AB ABCDE # 123
ABCDE# 123
AB: ABC# 123
I just want the 123. It is true that all data fields do have the # prior to the number.
I tried substring and PatIndex, but I'm not quite getting the syntax correct or something. Anyone have any advice on the best way to address this?
See this blog post on extracting numbers from strings in SQL Server. Below is a sample using a string in your example:
DECLARE #textval NVARCHAR(30)
SET #textval = 'AB ABCDE # 123'
SELECT LEFT(SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000) + 'X') -1)
Here is an elegant solution if your server supports the TRANSLATE function (on sql server it's available on sql server 2017+ and also sql azure).
First, it replaces any non numeric characters with a # character.
Then, it removes all # characters.
You may need to add additional characters that you know may be present in the second parameter of the TRANSLATE call.
select REPLACE(TRANSLATE([Col], 'abcdefghijklmnopqrstuvwxyz+()- ,#+', '##################################'), '#', '')
You can use stuff and patindex.
stuff(Col, 1, patindex('%[0-9]%', Col)-1, '')
SQL Fiddle
This works well for me:
CREATE FUNCTION [dbo].[StripNonNumerics]
(
#Temp varchar(255)
)
RETURNS varchar(255)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^0-9]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
Then call the function like so to see the original something next to the sanitized something:
SELECT Something, dbo.StripNonNumerics(Something) FROM TableA
In case if there are some characters possible between digits (e.g. thousands separators), you may try following:
declare #table table (DirtyCol varchar(100))
insert into #table values
('AB ABCDE # 123')
,('ABCDE# 123')
,('AB: ABC# 123')
,('AB#')
,('AB # 1 000 000')
,('AB # 1`234`567')
,('AB # (9)(876)(543)')
;with tally as (select top (100) N=row_number() over (order by ##spid) from sys.all_columns),
data as (
select DirtyCol, Col
from #table
cross apply (
select (select C + ''
from (select N, substring(DirtyCol, N, 1) C from tally where N<=datalength(DirtyCol)) [1]
where C between '0' and '9'
order by N
for xml path(''))
) p (Col)
where p.Col is not NULL
)
select DirtyCol, cast(Col as int) IntCol
from data
Output is:
DirtyCol IntCol
--------------------- -------
AB ABCDE # 123 123
ABCDE# 123 123
AB: ABC# 123 123
AB # 1 000 000 1000000
AB # 1`234`567 1234567
AB # (9)(876)(543) 9876543
For update, add ColToUpdate to select list of the data cte:
;with num as (...),
data as (
select ColToUpdate, /*DirtyCol, */Col
from ...
)
update data
set ColToUpdate = cast(Col as int)
CREATE FUNCTION FN_RemoveNonNumeric (#Input NVARCHAR(512))
RETURNS NVARCHAR(512)
AS
BEGIN
DECLARE #Trimmed NVARCHAR(512)
SELECT #Trimmed = #Input
WHILE PATINDEX('%[^0-9]%', #Trimmed) > 0
SELECT #Trimmed = REPLACE(#Trimmed, SUBSTRING(#Trimmed, PATINDEX('%[^0-9]%', #Trimmed), 1), '')
RETURN #Trimmed
END
GO
SELECT dbo.FN_RemoveNonNumeric('ABCDE# 123')
Pretty late to the party, I found the following which I though worked brilliantialy.. if anyone is still looking
SELECT
(SELECT CAST(CAST((
SELECT SUBSTRING(FieldToStrip, Number, 1)
FROM master..spt_values
WHERE Type='p' AND Number <= LEN(FieldToStrip) AND
SUBSTRING(FieldToStrip, Number, 1) LIKE '[0-9]' FOR XML Path(''))
AS xml) AS varchar(MAX)))
FROM
SourceTable
Here's a version which pulls all digits from a string; i.e. given I'm 35 years old; I was born in 1982. The average family has 2.4 children. this would return 35198224. i.e. it's good where you've got numeric data which may have been formatted as a code (e.g. #123,456,789 / 123-00005), but isn't appropriate if you're looking to pull out specific numbers (i.e. as opposed to digits / just the numeric characters) from the text. Also it only handles digits; so won't return negative signs (-) or periods .).
declare #table table (id bigint not null identity (1,1), data nvarchar(max))
insert #table (data)
values ('hello 123 its 45613 then') --outputs: 12345613
,('1 some other string 98 example 4') --outputs: 1984
,('AB ABCDE # 123') --outputs: 123
,('ABCDE# 123') --outputs: 123
,('AB: ABC# 123') --outputs: 123
; with NonNumerics as (
select id
, data original
--the below line replaces all digits with blanks
, replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(data,'0',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9','') nonNumeric
from #table
)
--each iteration of the below CTE removes another non-numeric character from the original string, putting the result into the numerics column
, Numerics as (
select id
, replace(original, substring(nonNumeric,1,1), '') numerics
, replace(nonNumeric, substring(nonNumeric,1,1), '') charsToreplace
, len(replace(nonNumeric, substring(nonNumeric,1,1), '')) charsRemaining
from NonNumerics
union all
select id
, replace(numerics, substring(charsToreplace,1,1), '') numerics
, replace(charsToreplace, substring(charsToreplace,1,1), '') charsToreplace
, len(replace(charsToreplace, substring(charsToreplace,1,1), '')) charsRemaining
from Numerics
where charsRemaining > 0
)
--we select only those strings with `charsRemaining=0`; i.e. the rows for which all non-numeric characters have been removed; there should be 1 row returned for every 1 row in the original data set.
select * from Numerics where charsRemaining = 0
This code works by removing all the digits (i.e. the characters we want) from a the given strings by replacing them with blanks. Then it goes through the original string (which includes the digits) removing all of the characters that were left (i.e. the non-numeric characters), thus leaving only the digits.
The reason we do this in 2 steps, rather than just removing all non-numeric characters in the first place is there are only 10 digits, whilst there are a huge number of possible characters; so replacing that small list is relatively fast; then gives us a list of those non-numeric characters which actually exist in the string, so we can then replace that small set.
The method makes use of recursive SQL, using common table expressions (CTEs).
To add on to Ken's answer, this handles commas and spaces and parentheses
--Handles parentheses, commas, spaces, hyphens..
declare #table table (c varchar(256))
insert into #table
values
('This is a test 111-222-3344'),
('Some Sample Text (111)-222-3344'),
('Hello there 111222 3344 / How are you?'),
('Hello there 111 222 3344 ? How are you?'),
('Hello there 111 222 3344. How are you?')
select
replace(LEFT(SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000) + 'X') -1),'.','')
from #table
Create function fn_GetNumbersOnly(#pn varchar(100))
Returns varchar(max)
AS
BEGIN
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#pn)
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if ISNUMERIC(#c) = 1 and #c <> '-'
Select #r = #r + #c
Select #x = #x +1
end
return #r
End
In your case It seems like the # will always be after teh # symbol so using CHARINDEX() with LTRIM() and RTRIM() would probably perform the best. But here is an interesting method of getting rid of ANY non digit. It utilizes a tally table and table of digits to limit which characters are accepted then XML technique to concatenate back to a single string without the non-numeric characters. The neat thing about this technique is it could be expanded to included ANY Allowed characters and strip out anything that is not allowed.
DECLARE #ExampleData AS TABLE (Col VARCHAR(100))
INSERT INTO #ExampleData (Col) VALUES ('AB ABCDE # 123'),('ABCDE# 123'),('AB: ABC# 123')
DECLARE #Digits AS TABLE (D CHAR(1))
INSERT INTO #Digits (D) VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9')
;WITH cteTally AS (
SELECT
I = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
#Digits d10
CROSS APPLY #Digits d100
--add more cross applies to cover longer fields this handles 100
)
SELECT *
FROM
#ExampleData e
OUTER APPLY (
SELECT CleansedPhone = CAST((
SELECT TOP 100
SUBSTRING(e.Col,t.I,1)
FROM
cteTally t
INNER JOIN #Digits d
ON SUBSTRING(e.Col,t.I,1) = d.D
WHERE
I <= LEN(e.Col)
ORDER BY
t.I
FOR XML PATH('')) AS VARCHAR(100))) o
Declare #MainTable table(id int identity(1,1),TextField varchar(100))
INSERT INTO #MainTable (TextField)
VALUES
('6B32E')
declare #i int=1
Declare #originalWord varchar(100)=''
WHile #i<=(Select count(*) from #MainTable)
BEGIN
Select #originalWord=TextField from #MainTable where id=#i
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#originalWord)
declare #pn varchar(100)=#originalWord
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if(#c!='')
BEGIN
if ISNUMERIC(#c) = 0 and #c <> '-'
BEGIN
Select #r = cast(#r as varchar) + cast(replace((SELECT ASCII(#c)-64),'-','') as varchar)
end
ELSE
BEGIN
Select #r = #r + #c
END
END
Select #x = #x +1
END
Select #r
Set #i=#i+1
END
I have created a function for this
Create FUNCTION RemoveCharacters (#text varchar(30))
RETURNS VARCHAR(30)
AS
BEGIN
declare #index as int
declare #newtexval as varchar(30)
set #index = (select PATINDEX('%[A-Z.-/?]%', #text))
if (#index =0)
begin
return #text
end
else
begin
set #newtexval = (select STUFF ( #text , #index , 1 , '' ))
return dbo.RemoveCharacters(#newtexval)
end
return 0
END
GO
Here is the answer:
DECLARE #t TABLE (tVal VARCHAR(100))
INSERT INTO #t VALUES('123')
INSERT INTO #t VALUES('123S')
INSERT INTO #t VALUES('A123,123')
INSERT INTO #t VALUES('a123..A123')
;WITH cte (original, tVal, n)
AS
(
SELECT t.tVal AS original,
LOWER(t.tVal) AS tVal,
65 AS n
FROM #t AS t
UNION ALL
SELECT tVal AS original,
CAST(REPLACE(LOWER(tVal), LOWER(CHAR(n)), '') AS VARCHAR(100)),
n + 1
FROM cte
WHERE n <= 90
)
SELECT t1.tVal AS OldVal,
t.tval AS NewVal
FROM (
SELECT original,
tVal,
ROW_NUMBER() OVER(PARTITION BY tVal + original ORDER BY original) AS Sl
FROM cte
WHERE PATINDEX('%[a-z]%', tVal) = 0
) t
INNER JOIN #t t1
ON t.original = t1.tVal
WHERE t.sl = 1
You can create SQL CLR scalar function in order to be able to use regular expressions like replace patterns.
Here you can find example of how to create such function.
Having such function will solve the issue with just the following lines:
SELECT [dbo].[fn_Utils_RegexReplace] ('AB ABCDE # 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('ABCDE# 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('AB: ABC# 123', '[^0-9]', '');
More important, you will be able to solve more complex issues as the regular expressions will bring a whole new world of options directly in your T-SQL statements.
Use this:
REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
Demo:
DROP TABLE IF EXISTS #MyTempTable;
CREATE TABLE #MyTempTable (SomeString VARCHAR(255));
INSERT INTO #MyTempTable
VALUES ('ssss123ssg99d362sdg')
, ('hey 62q&*^(n43')
, (NULL)
, ('')
, ('hi')
, ('123');
SELECT SomeString
, REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
FROM #MyTempTable;
DROP TABLE IF EXISTS #MyTempTable;
Results:
SomeString
(No column name)
ssss123ssg99d362sdg
12399362
hey62q&*^(n43
6243
NULL
NULL
hi
123
123
While the OP wanted to "strip out anything that is not 0-9", the post is also tagged with "substring" and "patindex", and the OP mentioned the concern "not quite getting the syntax correct or something". To address that the requirements note that "all data fields do have the # prior to the number" and to provide an answer that addresses the challenges with substring/patindex, consider the following:
/* A sample select */
;WITH SampleValues AS
( SELECT 'AB ABCDE # 123' [Columnofdirtydata]
UNION ALL SELECT 'AB2: ABC# 123')
SELECT
s.Columnofdirtydata,
f1.pos1,
'['+ f2.substr +']' [InspectOutput]
FROM
SampleValues s
CROSS APPLY (SELECT PATINDEX('%# %',s.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(s.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(s.Columnofdirtydata)) [substr]) f2
/* Using update scenario from OP */
UPDATE t1
SET t1.Columntoupdate = CAST(f2.substr AS INT)
FROM
TableName t1
CROSS APPLY (SELECT PATINDEX('%# %',t1.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(t1.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(t1.Columnofdirtydata)) [substr]) f2
Note that my syntax advice for patindex/substring, is to:
consider using APPLY as a way to temporarily alias results from one function for use as parameters in the next. It's not uncommon to (in ETL, for example) need to parse out parameter/position-based substrings in an updatable column of a staging table. If you need to "debug" and potentially fix some parsing logic, this style will help.
consider using LEN('PatternSample') in your substring logic, to account for reusing this pattern or adjusting it when your source data changes (instead of "+ 1"
SUBSTRING() requires a length parameter, but it can be greater than the length of the string. Therefore, if you are getting "the rest of the string" after the pattern, you can just use "The source length"
DECLARE #STR VARCHAR(400)
DECLARE #specialchars VARCHAR(50) = '%[~,#,#,$,%,&,*,(,),!^?:]%'
SET #STR = '1, 45 4,3 68.00-'
WHILE PATINDEX( #specialchars, #STR ) > 0
---Remove special characters using Replace function
SET #STR = Replace(Replace(REPLACE( #STR, SUBSTRING( #STR, PATINDEX( #specialchars, #STR ), 1 ),''),'-',''), ' ','')
SELECT #STR
SELECT REGEXP_REPLACE( col, '[^[:digit:]]', '' ) AS new_col FROM my_table

Set operation on TSQL (SQL 2005/2008)

When a set is given say {1,2,3,4,5,6}
The task is to separe pair of subsets
{1,2},
{1,3},
{1,4},
{1,5},
{1,6},
{2,3},
{2,4},
{2,5},
{2,6},
{3,4},
{3,5},
{3,6},
{4,5},
{5,6}
So when i have a table
Table Element
1
2
3
4
5
6
What is the way to list out all possible pair of comma separated subset ?
(Duplicates can be ignored (i.e) {1,2} is identical to {2,1})
SELECT T1.elem, T2.elem
FROM MyTable T1
INNER JOIN MyTable T2
ON T2.elem > T1.elem
...gets you most of the way there - if you want these shown as sets then...
SELECT '{' + CAST(T1.elem AS VARCHAR(12)) + ', ' + CAST(T2.elem AS VARCHAR(12)) + '}'
FROM MyTable T1
INNER JOIN MyTable T2
ON T2.elem > T1.elem
...is what you're after.
Here is a solution to the problem using a CTE. It isn’t particularly elegant, but it gets the job done.
DECLARE #set TABLE (Element INT);
INSERT INTO #set(Element) VALUES (1);
INSERT INTO #set(Element) VALUES (2);
INSERT INTO #set(Element) VALUES (3);
INSERT INTO #set(Element) VALUES (4);
INSERT INTO #set(Element) VALUES (5);
INSERT INTO #set(Element) VALUES (6);
;WITH array (Element1, Element2, Row)
AS
(
SELECT t.Element
, t2.Element
, ROW_NUMBER() OVER(ORDER BY t.Element)
FROM #set AS t
CROSS JOIN #set AS t2
WHERE t.Element <> t2.Element
)
SELECT a.Element1
, a.Element2
, '{' + CONVERT(VARCHAR(5),a.Element1) + ',' + CONVERT(VARCHAR(5),a.Element2) + '}' AS 'Subset'
FROM array AS a
WHERE NOT EXISTS (SELECT *
FROM array AS sa
WHERE sa.Element1 = a.Element2
AND sa.Element2 = a.Element1
AND sa.Row < a.Row
);

Implementing and applying a string split in T-SQL

I have this statement in T-SQL.
SELECT Bay From TABLE where uid in (
select B_Numbers from Info_Step WHERE uid = 'number'
)
I am selecting "multiple" BAYs from TABLE where their uid is equal to a string of numbers like this:
B_Numbers = 1:45:34:98
Therefore, I should be selecting 4 different BAYs from TABLE. I basically need to split the string 1:45:34:98 up into 4 different numbers.
I'm thinking that Split() would work, but it doesn't and I get a syntax error.
Any thoughts from the T-SQL gods would be awesome!
Here is an implementation of a split function that returns the list of numbers as a table:
http://rbgupta.blogspot.com/2007/03/split-function-tsql.html
Looks like this would set you on your way...
Here is a method that uses an auxiliary numbers table to parse the input string. The logic can easily be added to a function that returns a table. That table can then be joined to lookup the correct rows.
Step 1: Create the Numbers table
SET NOCOUNT ON
GO
IF EXISTS
(
SELECT 1
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_NAME = 'Numbers'
AND TABLE_SCHEMA = 'dbo'
AND TABLE_TYPE = 'BASE TABLE'
)
BEGIN
DROP TABLE dbo.Numbers
END
GO
CREATE TABLE dbo.Numbers
(
Number smallint IDENTITY(1, 1) PRIMARY KEY
)
GO
WHILE 1 = 1
BEGIN
INSERT INTO dbo.Numbers DEFAULT VALUES
IF SCOPE_IDENTITY() = 32767
BEGIN
BREAK
END
END
GO
Step 2: Parse the Input String
CREATE FUNCTION dbo.ParseString(#input_string varchar(8000), #delim varchar(8000) = " ")
RETURNS TABLE
AS RETURN
(
SELECT Number
FROM dbo.Numbers
WHERE CHARINDEX
(
#delim + CONVERT(VARCHAR(12),Number) + #delim,
#delim + #input_string + #delim
) > 0
)
GO
**EXAMPLE**
SELECT * FROM dbo.ParseString('1:45:34:98',':')
Step 3: Use the results however you want/need
Number
------
1
34
45
98
End-To-End Example
Create function that returns the appropriate BNumber (of course change it to use the commented out SQL)
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION dbo.GetBNumber (#uid int)
RETURNS VARCHAR(8000)
AS
BEGIN
RETURN '1:45:34:98'
--select B_Numbers from Info_Step WHERE uid = #uid
END
GO
Use the use functions to return the desired results
-- Using Test Data
SELECT N.Number FROM Numbers N
JOIN dbo.ParseString(dbo.GetBNumber(12345),':') Q ON Q.Number = N.Number
-- Using Your Data (Untested but should work.)
SELECT N.Bay
FROM TABLE N
JOIN dbo.ParseString(dbo.GetBNumber(ENTER YOU NUMBER HERE),':') Q ON Q.Number = N.uid
Results
Number
------
1
34
45
98
You should keep your arrays as rows but if I understand your question I think this will work.
SELECT
Bay
From
TABLE
join Info_Step
on B_Numbers like '%'+ uid +'%'
where
Info_Step.uid = 'number'
This query will do a full table scan because of the like operator.
What you can do is loop through the B_Numbers entries and do your own split on : Insert those entries into a temp table and then perform your query.
DECLARE #i int
DECLARE #start int
DECLARE #B_Numbers nvarchar(20)
DECLARE #temp table (
number nvarchar(10)
)
-- SELECT B_Numbers FROM Info_Step WHERE uid = 'number'
SELECT #B_Numbers = '1:45:34:98'
SET #i = 0
SET #start = 0
-- Parse out characters delimited by ":";
-- Would make a nice user defined function.
WHILE #i < len(#B_Numbers)
BEGIN
IF substring(#B_Numbers, #i, 1) = ':'
BEGIN
INSERT INTO #temp
VALUES (substring(#B_Numbers, #start, #i - #start))
SET #start = #i + 1
END
SET #i = #i + 1
END
-- Insert last item
INSERT INTO #temp
VALUES (substring(#B_Numbers, #start, #i - #start + 1))
-- Do query with parsed values
SELECT Bay FROM TABLE WHERE uid in (SELECT * FROM #temp)
You can even try this
declare #str varchar(50)
set #str = '1:45:34:98'
;with numcte as(
select 1 as rn union all select rn+1 from numcte where rn<LEN(#str)),
getchars as(select
ROW_NUMBER() over(order by rn) slno,
rn,chars from numcte
cross apply(select SUBSTRING(#str,rn,1) chars)X where chars = ':')
select top 1
Bay1 = SUBSTRING(#str,0,(select rn from getchars where slno = 1))
,Bay2 = SUBSTRING(#str,
(select rn from getchars where slno = 1) + 1,
(((select rn from getchars where slno = 2)-
(select rn from getchars where slno = 1)
)-1))
,Bay3 = SUBSTRING(#str,
(select rn from getchars where slno = 2) + 1,
(((select rn from getchars where slno = 3)-
(select rn from getchars where slno = 2)
)-1))
,Bay4 = SUBSTRING(#str,
(select rn from getchars where slno = 3)+1,
LEN(#str))
from getchars
Output:
Bay1 Bay2 Bay3 Bay4
1 45 34 98