T-SQL split string by - and space - tsql

I'm having difficult time with T-SQL and I was wondering if somebody could me point me to the right track.
I have the following variable called #input
DECLARE #input nvarchar(100);
SET #input= '27364 - John Smith';
-- SET #input= '27364 - John Andrew Smith';
I need to split this string in 3 parts (ID,Firstname and LastName) or 4 if the string contains a MiddleName. For security reason I cannot use functions.
My aproach was use Substring and Charindex.
SET #Id = SUBSTRING(#input, 1, CASE CHARINDEX('-', #input)
WHEN 0
THEN LEN(#input)
ELSE
CHARINDEX('-', #input) - 2
END);
SET #FirstName = SUBSTRING(#input, CASE CHARINDEX(' ', #input)
WHEN 0
THEN LEN(#input) + 1
ELSE
CHARINDEX(' ', #input) + 1
END, 1000);
SET #LastName = SUBSTRING(#input, CASE CHARINDEX(' ', #input)
WHEN 0
THEN LEN(#input) + 1
ELSE
CHARINDEX('0', #input) + 1
END, 1000);
Select #PartyCode,#FirstName,#LastName
I am stuck because I don't know how to proceed and also the code has to be smart enough to add a fourth split if Middlename exists.
Any thoughts?
Thanks in advance

Hopefully this is part of a normalization project. This data is breaking 1NF and one really should avoid that...
Try it like this
The advantages
typesafe values
ad-hoc SQL
set based
If you want you might use a CASE WHEN to check if the last part is NULL and place Part2 into Part3 in this case...
DECLARE #input table(teststring nvarchar(100));
INSERT INTO #input VALUES
(N'27364 - John Smith'),(N'27364 - John Andrew Smith');
WITH Splitted AS
(
SELECT CAST(N'<x>' + REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(teststring,N' - ',N' '),N'&',N'&'),N'<',N'<'),N'>',N'>'),N' ',N'</x><x>') + N'</x>' AS XML) testXML
FROM #input
)
SELECT testXML.value('/x[1]','int') AS Number
,testXML.value('/x[2]','nvarchar(max)') AS Part1
,testXML.value('/x[3]','nvarchar(max)') AS Part2
,testXML.value('/x[4]','nvarchar(max)') AS Part3
FROM Splitted
The result
Number Part1 Part2 Part3
27364 John Smith NULL
27364 John Andrew Smith

SQL Server 2016 has a new built-in function called STRING_SPLIT()
Assuming creating built-in functions, but CLR functions are not allowed:
CREATE FUNCTION dbo.WORD_SPLIT
(
#String AS nvarchar(4000)
)
RETURNS TABLE
AS
RETURN
(
WITH Spaces AS
(
SELECT Spaced.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 1)) AS ordinal
FROM STRING_SPLIT(#String, ' ') AS Spaced
)
, Tabs AS
(
SELECT Tabbed.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY s.ordinal, (SELECT 1)) AS ordinal
FROM Spaces AS s
CROSS APPLY STRING_SPLIT(s.[value], ' ') AS Tabbed
)
, NewLines1 AS
(
SELECT NewLined1.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY t.ordinal, (SELECT 1)) AS ordinal
FROM Tabs AS t
CROSS APPLY STRING_SPLIT(t.[value], CHAR(13)) AS NewLined1
)
, NewLines2 AS
(
SELECT NewLined2.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl1.ordinal, (SELECT 1)) AS ordinal
FROM NewLines1 AS nl1
CROSS APPLY STRING_SPLIT(nl1.[value], CHAR(10)) AS NewLined2
)
SELECT LTRIM(RTRIM(nl2.[value])) AS [value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl2.ordinal, (SELECT 1)) AS ordinal
FROM NewLines2 AS nl2
WHERE LTRIM(RTRIM(nl2.[value])) <> ''
)
GO
Usage:
-- Not Normailized
SELECT i.*, split.[value], split.[ordinal]
FROM #input AS i
CROSS APPLY dbo.WORD_SPLIT(i.teststring) AS split
-- Normalized
;WITH Splitted AS
(
SELECT split.[value], split.[ordinal]
FROM #input AS i
CROSS APPLY dbo.WORD_SPLIT(i.teststring) AS split
)
SELECT *
FROM (SELECT [value], 'part' + CONVERT(nvarchar(20), [ordinal]) AS [parts] FROM Splitted) AS s
PIVOT (MAX([value]) FOR [parts] IN ([part1], [part2], [part3], [part4])
Or assuming that, per-security, you are not allowed to make schema changes:
WITH Splitting AS
(
SELECT teststring AS [value]
FROM #input
)
WITH Spaces AS
(
SELECT Spaced.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 1)) AS ordinal
FROM Splitting AS sp
CROSS APPLY STRING_SPLIT(sp.[value], ' ') AS Spaced
)
, Tabs AS
(
SELECT Tabbed.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY s.ordinal, (SELECT 1)) AS ordinal
FROM Spaces AS s
CROSS APPLY STRING_SPLIT(s.[value], ' ') AS Tabbed
)
, NewLines1 AS
(
SELECT NewLined1.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY t.ordinal, (SELECT 1)) AS ordinal
FROM Tabs AS t
CROSS APPLY STRING_SPLIT(t.[value], CHAR(13)) AS NewLined1
)
, NewLines2 AS
(
SELECT NewLined2.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl1.ordinal, (SELECT 1)) AS ordinal
FROM NewLines1 AS nl1
CROSS APPLY STRING_SPLIT(nl1.[value], CHAR(10)) AS NewLined2
)
, Splitted AS
(
SELECT LTRIM(RTRIM(nl2.[value])) AS [teststring], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl2.ordinal, (SELECT 1)) AS ordinal
FROM NewLines2 AS nl2
WHERE LTRIM(RTRIM(nl2.[value])) <> ''
)
SELECT *
FROM (SELECT [value], 'part' + CONVERT(nvarchar(20), [ordinal]) AS [parts] FROM Splitted) AS s
PIVOT (MAX([value]) FOR [parts] IN ([part1], [part2], [part3], [part4])
Hopefully helpful!

Related

TSQL - in a string, replace a character with a fixed one every 2 characters

I can't replace every 2 characters of a string with a '.'
select STUFF('abcdefghi', 3, 1, '.') c3,STUFF('abcdefghi', 5, 1,
'.') c5,STUFF('abcdefghi', 7, 1, '.') c7,STUFF('abcdefghi', 9, 1, '.')
c9
if I use STUFF I should subsequently overlap the strings c3, c5, c7 and c9. but I can't find a method
can you help me?
initial string:
abcdefghi
the result I would like is
ab.de.gh.
the string can be up to 50 characters
Create a numbers / tally / digits table, if you don't have one already, then you can use this to target each character position:
with digits as ( /* This would be a real table, here it's just to test */
select n from (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10))x(n)
), t as (
select 'abcdefghi' as s
)
select String_Agg( case when d.n%3 = 0 then '.' else Substring(t.s, d.n, 1) end, '')
from t
cross apply digits d
where d.n <Len(t.s)
Using for xml with existing table
with digits as (
select n from (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10))x(n)
),
r as (
select t.id, case when d.n%3=0 then '.' else Substring(t.s, d.n, 1) end ch
from t
cross apply digits d
where d.n <Len(t.s)
)
select result=(select '' + ch
from r r2
where r2.id=r.id
for xml path('')
)
from r
group by r.id
You can try it like this:
Easiest might be a quirky update ike here:
DECLARE #string VARCHAR(100)='abcdefghijklmnopqrstuvwxyz';
SELECT #string = STUFF(#string,3*A.pos,1,'.')
FROM (SELECT TOP(LEN(#string)/3) ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM master..spt_values) A(pos);
SELECT #string;
Better/Cleaner/Prettier was a recursive CTE:
We use a declared table to have some tabular sample data
DECLARE #tbl TABLE(ID INT IDENTITY, SomeString VARCHAR(200));
INSERT INTO #tbl VALUES('')
,('a')
,('ab')
,('abc')
,('abcd')
,('abcde')
,('abcdefghijklmnopqrstuvwxyz');
--the query
WITH recCTE AS
(
SELECT ID
,SomeString
,(LEN(SomeString)+1)/3 AS CountDots
,1 AS OccuranceOfDot
,SUBSTRING(SomeString,4,LEN(SomeString)) AS RestString
,CAST(LEFT(SomeString,2) AS VARCHAR(MAX)) AS Growing
FROM #tbl
UNION ALL
SELECT t.ID
,r.SomeString
,r.CountDots
,r.OccuranceOfDot+2
,SUBSTRING(RestString,4,LEN(RestString))
,CONCAT(Growing,'.',LEFT(r.RestString,2))
FROM #tbl t
INNER JOIN recCTE r ON t.ID=r.ID
WHERE r.OccuranceOfDot/2<r.CountDots-1
)
SELECT TOP 1 WITH TIES ID,Growing
FROM recCTE
ORDER BY ROW_NUMBER() OVER(PARTITION BY ID ORDER BY OccuranceOfDot DESC);
--the result
1
2 a
3 ab
4 ab
5 ab
6 ab.de
7 ab.de.gh.jk.mn.pq.st.vw.yz
The idea in short
We use a recursive CTE to walk along the string
we add the needed portion together with a dot
We stop, when the remaining length is to short to continue
a little magic is the ORDER BY ROW_NUMBER() OVER() together with TOP 1 WITH TIES. This will allow all first rows (frist per ID) to appear.

SQL Server - Select with Group By together Raw_Number

I'm using SQL Server 2000 (80). So, it's not possible to use the LAG function.
I have a code a data set with four columns:
Purchase_Date
Facility_no
Seller_id
Sale_id
I need to identify missing Sale_ids. So every sale_id is a 100% sequential, so the should not be any gaps in order.
This code works for a specific date and store if specified. But i need to work on entire data set looping looping through every facility_id and every seller_id for ever purchase_date
declare #MAXCOUNT int
set #MAXCOUNT =
(
select MAX(Sale_Id)
from #table
where
Facility_no in (124) and
Purchase_date = '2/7/2020'
and Seller_id = 1
)
;WITH TRX_COUNT AS
(
SELECT 1 AS Number
union all
select Number + 1 from TRX_COUNT
where Number < #MAXCOUNT
)
select * from TRX_COUNT
where
Number NOT IN
(
select Sale_Id
from #table
where
Facility_no in (124)
and Purchase_Date = '2/7/2020'
and seller_id = 1
)
order by Number
OPTION (maxrecursion 0)
My Dataset
This column:
case when
Sale_Id=0 or 1=Sale_Id-LAG(Sale_Id) over (partition by Facility_no, Purchase_Date, Seller_id)
then 'OK' else 'Previous Missing' end
will tell you which Seller_Ids have some sale missing. If you want to go a step further and have exactly your desired output, then filter out and distinct the 'Previous Missing' ones, and join with a tally table on not exists.
Edit: OP mentions in comments they can't use LAG(). My suggestion, then, would be:
Make a temp table that that has the max(sale_id) group by facility/seller_id
Then you can get your missing results by this pseudocode query:
Select ...
from temptable t
inner join tally N on t.maxsale <=N.num
where not exists( select ... from sourcetable s where s.facility=t.facility and s.seller=t.seller and s.sale=N.num)
> because the only way to "construct" nonexisting combinations is to construct them all and just remove the existing ones.
This one worked out
; WITH cte_Rn AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY Facility_no, Purchase_Date, Seller_id ORDER BY Purchase_Date) AS [Rn_Num]
FROM (
SELECT
Facility_no,
Purchase_Date,
Seller_id,
Sale_id
FROM MyTable WITH (NOLOCK)
) a
)
, cte_Rn_0 as (
SELECT
Facility_no,
Purchase_Date,
Seller_id,
Sale_id,
-- [Rn_Num] AS 'Skipped Sale'
-- , case when Sale_id = 0 Then [Rn_Num] - 1 Else [Rn_Num] End AS 'Skipped Sale for 0'
, [Rn_Num] - 1 AS 'Skipped Sale for 0'
FROM cte_Rn a
)
SELECT
Facility_no,
Purchase_Date,
Seller_id,
Sale_id,
-- [Skipped Sale],
[Skipped Sale for 0]
FROM cte_Rn_0 a
WHERE NOT EXISTS
(
select * from cte_Rn_0 b
where b.Sale_id = a.[Skipped Sale for 0]
and a.Facility_no = b.Facility_no
and a.Purchase_Date = b.Purchase_Date
and a.Seller_id = b.Seller_id
)
--ORDER BY Purchase_Date ASC

T-SQL - Insert new row into table from a select of a group of four rows

I am logging the filters that users select on my web application and need to create a new row which shows what group of filters they have selected.
Due to way in which the filters are logged in the table they come in groups of four which can be seen in screenshot below:
Based on these groups of four I need to Insert a new row that contains a comma separated string into the Message field of the value(s) after the : symbol.
seems like there is no identifier that would group the four rows together (can't see full timestamp in your sample). here is an example with a grouping column that's computed based on mod 4 and recursive cte
DECLARE #tab TABLE (id int identity(1,1), msg varchar(100))
INSERT INTO #tab
VALUES
('selected: a')
,('selected: b')
,('selected: c')
,('selected: d')
,('selected: e')
,('selected: f')
,('selected: g')
,('selected: h')
,('selected: i')
,('selected: j')
,('selected: k')
,('selected: l')
;with src --add grouping column
as
(
SELECT
id
,msg = left(msg, charindex(':', msg, 1)-1)
,val = LTRIM(SUBSTRING(msg, CHARINDEX(':', msg, 1)+1, len(msg) ))
,nid = ROW_NUMBER() over (partition by id % 4 order by id)
FROM #tab
)
,source --add column for recursion
as
(
SELECT
*,
rn = ROW_NUMBER() OVER (PARTITION BY nid ORDER BY id)
FROM src
)
,rcdef -- define recursive cte
AS (
SELECT
nid
,msg
,val = convert(varchar(max), val)
,rn = 1
FROM source
WHERE rn = 1
UNION ALL
SELECT
rcdef.nid
, rcdef.msg
,val = convert(varchar(max), rcdef.val +',' + source.val) --concatenate vals
,rn = rcdef.rn + 1
FROM rcdef
INNER JOIN source on source.nid = rcdef.nid and source.rn = rcdef.rn + 1
)
SELECT
nid,
val = msg + ': ' + max(val)
FROM rcdef
GROUP BY nid, msg

Get characters before underscore and separated by comma from a string in SQL Server 2008

I tried this query
DECLARE #AdvancedSearchSelectedDropdownName TABLE (
SelectedIds VARCHAR(2048),
AdvanceSearchOptionTypeId INT
)
INSERT INTO #AdvancedSearchSelectedDropdownName
VALUES ('4_0,5_1,6_2,7_3', 23),
('62_3', 21), ('2_4', 23)
DECLARE #selectedIds VARCHAR(MAX) = '';
SELECT #selectedIds +=
CASE WHEN SelectedIds IS NULL
THEN #selectedIds + ISNULL(SelectedIds + ',', '')
WHEN SelectedIds IS NOT NULL
THEN SUBSTRING(SelectedIds, 0, CHARINDEX('_', SelectedIds, 0)) + ','
END
FROM #AdvancedSearchSelectedDropdownName WHERE advanceSearchOptionTypeId = 23
SELECT #selectedIds
Current output: 4,2
Required output: 4,5,6,7,2
We may have n number of comma separated values in the SelectedIds column.
You might go this route:
WITH Casted AS
(
SELECT *
,CAST('<x><y>' + REPLACE(REPLACE(SelectedIds,'_','</y><y>'),',','</y></x><x><y>') + '</y></x>' AS XML) SplittedToXml
FROM #AdvancedSearchSelectedDropdownName
)
SELECT *
FROM Casted;
This will return your data in this form:
<x>
<y>4</y>
<y>0</y>
</x>
<x>
<y>5</y>
<y>1</y>
</x>
<x>
<y>6</y>
<y>2</y>
</x>
<x>
<y>7</y>
<y>3</y>
</x>
Now we can grab all the x and just the first y:
WITH Casted AS
(
SELECT *
,CAST('<x><y>' + REPLACE(REPLACE(SelectedIds,'_','</y><y>'),',','</y></x><x><y>') + '</y></x>' AS XML) SplittedToXml
FROM #AdvancedSearchSelectedDropdownName
)
SELECT Casted.AdvanceSearchOptionTypeId AS TypeId
,x.value('y[1]/text()[1]','int') AS IdValue
FROM Casted
CROSS APPLY SplittedToXml.nodes('/x') A(x);
The result:
TypeId IdValue
23 4
23 5
23 6
23 7
21 62
23 2
Hint: Do not store comma delimited values!
It is a very bad idea to store your data in this format. You can use a generic format like my XML to store this or a structure of related side tables. But such construction tend to turn out as a real pain in the neck...
After a little re-think. Perhaps something a little more straightforward.
Now, if you have a limited number of _N
Example
;with cte as (
Select *
,RN = Row_Number() over(Order by (Select NULL))
From #AdvancedSearchSelectedDropdownName A
)
Select AdvanceSearchOptionTypeId
,IDs = replace(
replace(
replace(
replace(
replace(
stuff((Select ',' +SelectedIds From cte Where AdvanceSearchOptionTypeId=A.AdvanceSearchOptionTypeId Order by RN For XML Path ('')),1,1,'')
,'_0','')
,'_1','')
,'_2','')
,'_3','')
,'_4','')
From cte A
Group By AdvanceSearchOptionTypeId
Returns
AdvanceSearchOptionTypeId IDs
21 62
23 4,5,6,7,2
If interested in a helper function.
Tired of extracting strings (left, right, charindex, patindex, ...) I modified s split/parse function to accept TWO non-like delimiters. In this case a , and _.
Example
;with cte as (
Select A.AdvanceSearchOptionTypeId
,B.*
,RN = Row_Number() over(Order by (Select NULL))
From #AdvancedSearchSelectedDropdownName A
Cross Apply [dbo].[tvf-Str-Extract](','+A.SelectedIds,',','_') B
)
Select AdvanceSearchOptionTypeId
,IDs = stuff((Select ',' +RetVal From cte Where AdvanceSearchOptionTypeId=A.AdvanceSearchOptionTypeId Order by RN,RetVal For XML Path ('')),1,1,'')
From cte A
Group By AdvanceSearchOptionTypeId
Returns
AdvanceSearchOptionTypeId IDs
21 62
23 4,5,6,7,2
The TVF if Interested
CREATE FUNCTION [dbo].[tvf-Str-Extract] (#String varchar(max),#Delimiter1 varchar(100),#Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(#String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(#Delimiter1) From cte2 t Where Substring(#String,t.N,DataLength(#Delimiter1)) = #Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(#Delimiter1,#String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetVal = left(RetVal,charindex(#Delimiter2,RetVal)-1)
From (
Select *,RetVal = Substring(#String, N, L)
From cte4
) A
Where charindex(#Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare #String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[tvf-Str-Extract] (#String,'[[',']]')
*/
Disclaimer.As per first Normal form, you should not store multiple values in a single cell. I would suggest you to avoid storing this way.
Still the approach would be: Create a UDF function which separates comma separated list into a table valued variable. Below code I have not tested. but, it gives idea on how to approach this problem.
Refer to CSV to table approaches
Declare #selectedIds varchar(max) = '';
SET #selectedIds = SELECT STUFF
(SELECT ','+ (SUBSTRING(c.value, 0, CHARINDEX('_', c.value, 0))
FROM #AdvancedSearchSelectedDropdownName AS tv
CROSS APPLY dbo.udfForCSVToList(SelectedIds) AS c
WHERE advanceSearchOptionTypeId = 23
FOR XML PATH('')),1,2,'');
SELECT #selectedIds

Parse Numeric Ranges in PostgreSQL

I would like to produce a string containing some parsed numeric ranges.
I have a table with some data
b_id,s_id
1,50
1,51
1,53
1,61
1,62
1,63
2,91
2,95
2,96
2,97
Using only SQL in PostgreSQL, how could I produce this output:
b_id,s_seqs
1,"50-51,53,61-63"
2,"91,95-97"
How on earth do I do that?
select b_id, string_agg(seq, ',' order by seq_no) as s_seqs
from (
select
b_id, seq_no,
replace(regexp_replace(string_agg(s_id::text, ','), ',.+,', '-'), ',', '-') seq
from (
select
b_id, s_id,
sum(mark) over w as seq_no
from (
select
b_id, s_id,
(s_id- 1 <> lag(s_id, 1, s_id) over w)::int as mark
from my_table
window w as (partition by b_id order by s_id)
) s
window w as (partition by b_id order by s_id)
) s
group by 1, 2
) s
group by 1;
Here you can find a step-by-step analyse from the innermost query towards the outside.