mssql (tsql) procedure replace order - tsql

...
<td class="m92_t_col5" id="preis_0">xx</td>
...
i want to change to
...
<td id="preis_0" class="m92_t_col5">xxx</td>
...
So id="" must be first and then class="". Is this possible to do with tsql? Text in id or class is generic...

I need to go find some soap but given your requirements, this is an example of how to achieve the desired replacement.
-- This will probably not perform terribly well for a number of
-- reasons, not the least of which we are doing lots of string manipulation
-- within tsql.
-- Much of this query nonsense could be consolidated into fewer queries
-- but given the dearth of information, I chose to build out the solution
-- in a tumbling data anti-pattern
;
WITH SAMPLE_DATA AS
(
-- gin up some demo data
-- with random spacing and ids to make valid test cases
select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
union all select '<td class="m93_t_col50" id="preis_3">xy</td>'
union all select '<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
, ORDINALS AS
(
-- Find the starting position of the keywords
SELECT SD.*
, CHARINDEX('class=', SD.html, 0) AS class_ordinal
, CHARINDEX('id=', SD.html, 0) AS id_ordinal
-- You will really need something in here to keep stuff straight
-- otherwise when we bring it all back together, it'll be wrong
, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS original_sequence
FROM SAMPLE_DATA SD
)
, NEEDS_MODIFIED AS
(
-- identify the rows that need modified
-- and use the ordinals in previous query to find the close position
SELECT
O.*
, CHARINDEX('"', O.html, O.class_ordinal+7) + 1 AS class_ordinal_end_quote
, CHARINDEX('"', O.html, O.id_ordinal+4) + 1 AS id_ordinal_end_quote
FROM
ORDINALS O
WHERE
O.id_ordinal > O.class_ordinal
)
, FIND_PARTS AS
(
-- strip out the parts
SELECT
NM.*
, SUBSTRING(NM.html, class_ordinal, class_ordinal_end_quote - class_ordinal) AS class
, SUBSTRING(NM.html, id_ordinal, id_ordinal_end_quote - id_ordinal) AS id
FROM
NEEDS_MODIFIED NM
)
, DONE AS
(
SELECT
-- This is the heart of the matter
-- having identified the correct text values for class and id
-- we will now perform a triple replace
-- Replace 1 is swapping the class text with somthing that should not exist in source
-- Replace 2 replaces the id text with our class text
-- Replace 3 removes our placeholder value with id
REPLACE(REPLACE(REPLACE(FP.html, FP.class, '~~|~'), FP.id, FP.class), '~~|~', FP.id) AS html
, FP.original_sequence
FROM
FIND_PARTS FP
UNION ALL
SELECT
O.html
, O.original_sequence
FROM
ORDINALS O
WHERE
O.id_ordinal < O.class_ordinal
)
SELECT
D.html
FROM
DONE D
ORDER BY
D.original_sequence
Input
<td class="m92_t_col5" id="preis_0">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td class="m93_t_col50" id="preis_3">xy</td>
<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>
Output
<td id="preis_0" class="m92_t_col5">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td id="preis_3" class="m93_t_col50">xy</td>
<td id="preis_5" style="fuzzy" class="m95_t_col5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>
After doing some thinking, you might have been trying to ask for that as a scalar function. This will probably have even worse performance but it solves the problem.
-- Same logic as above, now in function form
CREATE FUNCTION dbo.ClassIdSwap
(
#input varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
DECLARE
#class_ordinal int
, #class_text varchar(max)
, #class_ordinal_end_quote int
, #id_ordinal int
, #id_text varchar(max)
, #id_ordinal_end_quote int
, #out_html varchar(max)
SELECT
#class_ordinal = CHARINDEX('class=', #input, 0)
, #id_ordinal = CHARINDEX('id=', #input, 0)
SELECT
#class_ordinal_end_quote = CHARINDEX('"', #input, #class_ordinal+7) + 1
, #id_ordinal_end_quote = CHARINDEX('"', #input, #id_ordinal+4) + 1
-- bail out early
IF (#id_ordinal < #class_ordinal)
BEGIN
RETURN #input
END
SELECT
#class_text = SUBSTRING(#input, #class_ordinal, #class_ordinal_end_quote - #class_ordinal)
, #id_text = SUBSTRING(#input, #id_ordinal, #id_ordinal_end_quote - #id_ordinal)
RETURN (REPLACE(REPLACE(REPLACE(#input, #class_text, '~~|~'), #id_text, #class_text), '~~|~', #id_text))
END
Usage
;
WITH SAMPLE_DATA AS
(
-- gin up some demo data
-- with random spacing and ids to make valid test cases
select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
union all select '<td class="m93_t_col50" id="preis_3">xy</td>'
union all select '<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
SELECT
D.html
, dbo.ClassIdSwap(D.html) AS modified
FROM
SAMPLE_DATA D

Related

Split question and answer text by multiple bookends

I have a field containing multiple questions and answers. I need to extract the answers into a column each.
Text Example:
Sorry I had to add as a picture as the text kept disappearing.
I need to extract the text between the first instance of the yellow and green highlight (not including the highlighted sections) as the first line in the select clause, followed by the second instance between the yellow and green highlight as the second line in the select clause etc etc.
There are 5 questions (between the pink and blue highlight) and 5 answers (between the yellow and green highlight).
I tried the code below using the text in the yellow and green highlight as bookends but I got the same error message as below.
Then I tried the following code using the question as the first bookend:
SELECT distinct subjectidname
, title
, i.description
, SUBSTRING(i.description, CHARINDEX('<b>Please indicate your company''s export status:</b><br />', i.description),
CHARINDEX('<br /><br />',i.description) -
CHARINDEX('<b>Please indicate your company''s export status:</b><br />', i.description) + Len('<br /><br />'))
from FilteredIncident i
Both efforts resulted in an error message:
Msg 537, Level 16, State 3, Line 2 Invalid length parameter passed to
the LEFT or SUBSTRING function.
And it also does not account for the 2nd, 3rd, 4th & 5th instances.
What is the best way to extract the 5 answers from the description box containing a single line of text?
Start with a string splitter that can split on a string and returns an index for each row:
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter VARCHAR(16))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+ Len( #pDelimiter ) FROM cteTally t WHERE SUBSTRING(#pString,t.N, Len( #pDelimiter ) ) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1 ,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l;
(Credit to Jeff Moden for years of successful string splitting.)
Then pick the right substrings to split on:
declare #QandA as NVarChar(1000) = '<b>Q1:</b><br />A1<br /><br /><b>Q2:</b><br />A2<br /><br /><b>Q3:</b><br />A3<br /><br /><b>Q4:</b><br />A4<br /><br />';
-- A single split gets Q/A pairs:
select ItemNumber, Item
from dbo.DelimitedSplit8K( #QandA, '<br /><br />' )
order by ItemNumber;
-- A second split gets Q's and A's:
with QAPairs as (
select ItemNumber as QuestionNumber, Item as QA
from dbo.DelimitedSplit8K( #QandA, '<br /><br />' ) )
select QuestionNumber, QA, ItemNumber, Item, case when ItemNumber % 2 = 1 then 'Q' else 'A' end as 'Q/A'
from QAPairs cross apply
dbo.DelimitedSplit8K( QA, '<br />' );
dbfiddle.
That ought to be a good start. There is a bit of cleanup to do, e.g. there is a spurious empty Q/A pair since the string ends with a '<br /><br />' which, as a delimiter, must mean there is a Q/A pair on each side.
This example retrieves the data from a table a breaks down each row into its component questions and answers:
-- Sample data.
declare #QandAs as Table ( QandAId Int Identity, QandA NVarChar(1000) );
insert into #QandAs ( QandA ) values
( '<b>Q1a:</b><br />A1a<br /><br /><b>Q2a:</b><br />A2a<br /><br /><b>Q3a:</b><br />A3a<br /><br /><b>Q4a:</b><br />A4a<br /><br />' ),
( '<b>Q1b:</b><br />A1b<br /><br /><b>Q2b:</b><br />A2b<br /><br /><b>Q3b:</b><br />A3b<br /><br /><b>Q4b:</b><br />A4b<br /><br />' );
select * from #QandAs;
-- A single split gets Q/A pairs:
with QAPairs as (
select QandAId, ItemNumber, Item, Row_Number() over ( partition by QandAId order by ItemNumber desc ) as RN
from #QandAs cross apply
dbo.DelimitedSplit8K( QandA, '<br /><br />' ) )
select QandAId, ItemNumber, Item, RN
from QAPairs
where RN > 1 -- Eliminate the extraneaous empty Q/A pair at the end of the string.
order by QandAId, ItemNumber;
-- A second split gets Q's and A's:
with QAPairs as (
select QandAId, ItemNumber as QuestionNumber, Item as QA, Row_Number() over ( partition by QandAId order by ItemNumber desc ) as RN
from #QandAs cross apply
dbo.DelimitedSplit8K( QandA, '<br /><br />' ) )
select QandAId, QuestionNumber, QA, ItemNumber, Item, case when ItemNumber % 2 = 1 then 'Q' else 'A' end as 'Q/A'
from QAPairs cross apply
dbo.DelimitedSplit8K( QA, '<br />' )
where RN > 1 -- Eliminate the extraneaous empty Q/A pair at the end of the string.
order by QandAId, QuestionNumber, ItemNumber;
dbfiddle.

How to convert time into days

I have got this data in a table. Using SQL server. I need to create a report in which I need to add this duration and convert into days, datatype of duration column is timestamp. When I use Sum on this i get this error Operand data type varchar is invalid for sum operator.
What is the best way of converting this to number of days?
<table>
<thead>
<tr>
<th>TicketNumber</th>
<th>duration</th>
</tr>
</thead>
<tbody>
<tr>
<td>521163</td>
<td>44:08:11</td>
</tr>
</tbody>
<tbody>
<tr>
<td>906868</td>
<td>404:55:27</td>
</tr>
</tbody>
<tbody>
<tr>
<td>140013</td>
<td>412:08:03</td>
</tr>
</tbody>
</table>
Not clear from your description if you have a TABLE or HTML code! You speak about table but present HTML code.
If you have a table then we need to get a table, which means queries to create the table and insert the sample data. If you have HTML code, which you need to parse then you DO NOT HAVE A TABLE but a text.
Assuming that you have HTML code as presented in the question then you have an XML code which mean that you can use the function OPENXML to parse the data from the HTML code as presented here:
DECLARE #HTML as XML
SELECT #HTML = '
<table>
<thead>
<tr>
<th>TicketNumber</th>
<th>duration</th>
</tr>
</thead>
<tbody>
<tr>
<td>521163</td>
<td>44:08:11</td>
</tr>
</tbody>
<tbody>
<tr>
<td>906868</td>
<td>404:55:27</td>
</tr>
</tbody>
<tbody>
<tr>
<td>140013</td>
<td>412:08:03</td>
</tr>
</tbody>
</table>
'
SELECT #HTML
------------------ First we will learn how to parse the HTML data to get table structure
-- Note! This base on exact format of the HTML code!
-- Any change in the format will fail this solution
DECLARE #handle INT
DECLARE #PrepareXmlStatus INT
EXEC #PrepareXmlStatus= sp_xml_preparedocument #handle OUTPUT, #HTML
SELECT id, MyTime
FROM OPENXML(#handle, '/table/tbody/tr', 1)
WITH (
id NVARCHAR(MAX) '(td)[1]',
MyTime NVARCHAR(MAX) '(td)[2]'
)
EXEC sp_xml_removedocument #handle
Using this approach, we can now use the data in the result SET from the above query and calculate the SUM of the times, as presented here (this should be your solution):
DECLARE #HTML as XML
SELECT #HTML = '
<table>
<thead>
<tr>
<th>TicketNumber</th>
<th>duration</th>
</tr>
</thead>
<tbody>
<tr>
<td>521163</td>
<td>44:08:11</td>
</tr>
</tbody>
<tbody>
<tr>
<td>906868</td>
<td>404:55:27</td>
</tr>
</tbody>
<tbody>
<tr>
<td>140013</td>
<td>412:08:03</td>
</tr>
</tbody>
</table>
'
DECLARE #handle INT
DECLARE #PrepareXmlStatus INT
EXEC #PrepareXmlStatus= sp_xml_preparedocument #handle OUTPUT, #HTML
---------------- Now we can use the above way of parsiong the HTML in order to calculate the SUM of the times
EXEC #PrepareXmlStatus= sp_xml_preparedocument #handle OUTPUT, #HTML
;With MyCTE as (
SELECT id, MyTime
FROM OPENXML(#handle, '/table/tbody/tr', 1)
WITH (
id NVARCHAR(MAX) '(td)[1]',
MyTime NVARCHAR(MAX) '(td)[2]'
)
),
-- Convert Seconds, minutes, hours to INT and sum
MyCTE1 as(
SELECT
MySec = SUM(CONVERT(INT,PARSENAME(REPLACE(MyTime,':','.'),1))),
MyMin = SUM(CONVERT(INT,PARSENAME(REPLACE(MyTime,':','.'),2))),
MyHour = SUM(CONVERT(INT,PARSENAME(REPLACE(MyTime,':','.'),3)))
from MyCTE
),
-- Move from seconds to minutes and from minutes to hours if the value above 60
MyCTE2 as(
SELECT
MySec = (MySec%60),
MyMin = (MyMin + (MySec/60)) % 60,
MyHour = MyHour + ((MyMin + (MySec/60)) / 60)
from MyCTE1
)
SELECT
CONVERT(VARCHAR(10), MyHour) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MyMin),2) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MySec),2)
FROM MyCTE2
EXEC sp_xml_removedocument #handle
Using existing table
If you already have a table (not HTML code) and all you need to find the number of days, hours, minutes, seconds then you simply need to use the end part of the above query. We do not need to parse the HTML code so we can jump to the CTE named MyCTE01.
For example, using the op DDL+DML which was provided in the comment:
CREATE TABLE [dbo].[OpenTickets] ( [TicketNumber] [varchar](50) NULL ,[Duration] [varchar](20) NULL )
GO
INSERT INTO [dbo].[OpenTickets] VALUES
( 521163 ,'44:08:11' ) ,( 746008 ,'45:38:35' ) ,( 120025 ,'48:50:31' ) ,( 780125 ,'75:15:59' ) ,( 903960 ,'76:23:49' ) ,
( 937883 ,'178:20:27' ) ,( 524404 ,'80:16:08' ) ,( 374972 ,'81:17:28' )
GO
SELECT * FROM [OpenTickets]
GO
;With MyCTE1 as(
SELECT
MySec = SUM(CONVERT(INT,PARSENAME(REPLACE([Duration],':','.'),1))),
MyMin = SUM(CONVERT(INT,PARSENAME(REPLACE([Duration],':','.'),2))),
MyHour = SUM(CONVERT(INT,PARSENAME(REPLACE([Duration],':','.'),3)))
from [OpenTickets]
),
-- Move from seconds to minutes and from minutes to hours if the value above 60
MyCTE2 as(
SELECT
MySec = (MySec%60),
MyMin = (MyMin + (MySec/60)) % 60,
MyHour = MyHour + ((MyMin + (MySec/60)) / 60)
from MyCTE1
)
SELECT
MyDays = MyHour/24,
Total = 'Days: ' + CONVERT(VARCHAR(10), MyHour/24) + ' and ' +
CONVERT(VARCHAR(10), MyHour % 24) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MyMin),2) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MySec),2),
TotalTime = CONVERT(VARCHAR(10), MyHour) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MyMin),2) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MySec),2)
FROM MyCTE2
GO

TSQL - Parse address function, unable to return result

Function px_explode will be provided with two parameters:
separator
string
Final result will look like this:
SELECT * FROM dbo.px_explode('xxy', 'alfaxxybetaxxygama')
and will return
But...
Query won't finish execution, so I assume that I ran into an infinite loop here, now assuming this, my question might be.
How can I avoid the infinite loop I ran into and what am I missing?
Code:
CREATE FUNCTION dbo.px_explode
(#separator VARCHAR(10), #string VARCHAR(2000))
RETURNS #expl_tbl TABLE
(val VARCHAR(100))
AS
BEGIN
IF (CHARINDEX(#separator, #string) = 0) and (LTRIM(RTRIM(#string)) <> '')
INSERT INTO #expl_tbl VALUES(LTRIM(RTRIM(#string)))
ELSE
BEGIN
WHILE CHARINDEX(#separator, #string) > 0
BEGIN
IF (LTRIM(RTRIM(LEFT(#string, CHARINDEX(#separator, #string) - 1)))
<> '')
INSERT INTO #expl_tbl VALUES(LTRIM(RTRIM(LEFT(#string,
CHARINDEX(#separator, #string) - 1))))
END
IF LTRIM(RTRIM(#string)) <> ''
INSERT INTO #expl_tbl VALUES(LTRIM(RTRIM(#string)))
END
RETURN
END
Loops are bad and so are mutli-statement table valued functions (e.g. where you define the table). If performance is important then you want a tally table and and inline table valued function (iTVF).
For a high-performing way to resolve this I would first grab a copy of Ngrams8k. The solution you're looking for will look like this:
DECLARE #string varchar(8000) = 'alfaxxybetaxxygama',
#delimiter varchar(20) = 'xxy'; -- use
SELECT
itemNumber = row_number() over (ORDER BY d.p),
itemIndex = isnull(nullif(d.p+l.d, 0),1),
item = SUBSTRING
(
#string,
d.p+l.d, -- delimiter position + delimiter length
isnull(nullif(charindex(#delimiter, #string, d.p+l.d),0) - (d.p+l.d), 8000)
)
FROM (values (len(#string), len(#delimiter))) l(s,d) -- 1 is fine for l.d but keeping uniform
CROSS APPLY
(
SELECT -(l.d) union all
SELECT ng.position
FROM dbo.NGrams8K(#string, l.d) as ng
WHERE token = #delimiter
) as d(p); -- delimiter.position
Which returns
itemNumber itemIndex item
-------------------- -------------------- ---------
1 1 alfa
2 8 beta
3 15 gama
Against a table it would look like this:
DECLARE #table table (string varchar(8000));
INSERT #table VALUES ('abcxxyXYZxxy123'), ('alfaxxybetaxxygama');
DECLARE #delimiter varchar(100) = 'xxy';
SELECT *
FROM #table t
CROSS APPLY
(
SELECT
itemNumber = row_number() over (ORDER BY d.p),
itemIndex = isnull(nullif(d.p+l.d, 0),1),
item = SUBSTRING
(
t.string,
d.p+l.d, -- delimiter position + delimiter length
isnull(nullif(charindex(#delimiter, t.string, d.p+l.d),0) - (d.p+l.d), 8000)
)
FROM (values (len(t.string), len(#delimiter))) l(s,d) -- 1 is fine for l.d but keeping uniform
CROSS APPLY
(
SELECT -(l.d) union all
SELECT ng.position
FROM dbo.NGrams8K(t.string, l.d) as ng
WHERE token = #delimiter
) as d(p) -- delimiter.position
) split;
Results:
string itemNumber itemIndex item
------------------------- -------------------- -------------------- ------------------
abcxxyXYZxxy123 1 1 abc
abcxxyXYZxxy123 2 7 XYZ
abcxxyXYZxxy123 3 13 123
alfaxxybetaxxygama 1 1 alfa
alfaxxybetaxxygama 2 8 beta
alfaxxybetaxxygama 3 15 gama
My favourite is the XML splitter. This needs no function and is fully inlineable. If you can introduce a function to your database, the suggested links in Gareth's comment give you some very good ideas.
This is simple and quite straight forward:
DECLARE #YourString VARCHAR(100)='alfaxxybetaxxygama';
SELECT nd.value('text()[1]','nvarchar(max)')
FROM (SELECT CAST('<x>' + REPLACE((SELECT #YourString AS [*] FOR XML PATH('')),'xxy','</x><x>') + '</x>' AS XML)) AS A(Casted)
CROSS APPLY A.Casted.nodes('/x') AS B(nd);
This will first transform your string to an XML like this
<x>alfa</x>
<x>beta</x>
<x>gama</x>
... simply by replacing the delimiters xxy with XML tags. The rest is easy reading from XML .nodes()

T-SQL split string by - and space

I'm having difficult time with T-SQL and I was wondering if somebody could me point me to the right track.
I have the following variable called #input
DECLARE #input nvarchar(100);
SET #input= '27364 - John Smith';
-- SET #input= '27364 - John Andrew Smith';
I need to split this string in 3 parts (ID,Firstname and LastName) or 4 if the string contains a MiddleName. For security reason I cannot use functions.
My aproach was use Substring and Charindex.
SET #Id = SUBSTRING(#input, 1, CASE CHARINDEX('-', #input)
WHEN 0
THEN LEN(#input)
ELSE
CHARINDEX('-', #input) - 2
END);
SET #FirstName = SUBSTRING(#input, CASE CHARINDEX(' ', #input)
WHEN 0
THEN LEN(#input) + 1
ELSE
CHARINDEX(' ', #input) + 1
END, 1000);
SET #LastName = SUBSTRING(#input, CASE CHARINDEX(' ', #input)
WHEN 0
THEN LEN(#input) + 1
ELSE
CHARINDEX('0', #input) + 1
END, 1000);
Select #PartyCode,#FirstName,#LastName
I am stuck because I don't know how to proceed and also the code has to be smart enough to add a fourth split if Middlename exists.
Any thoughts?
Thanks in advance
Hopefully this is part of a normalization project. This data is breaking 1NF and one really should avoid that...
Try it like this
The advantages
typesafe values
ad-hoc SQL
set based
If you want you might use a CASE WHEN to check if the last part is NULL and place Part2 into Part3 in this case...
DECLARE #input table(teststring nvarchar(100));
INSERT INTO #input VALUES
(N'27364 - John Smith'),(N'27364 - John Andrew Smith');
WITH Splitted AS
(
SELECT CAST(N'<x>' + REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(teststring,N' - ',N' '),N'&',N'&'),N'<',N'<'),N'>',N'>'),N' ',N'</x><x>') + N'</x>' AS XML) testXML
FROM #input
)
SELECT testXML.value('/x[1]','int') AS Number
,testXML.value('/x[2]','nvarchar(max)') AS Part1
,testXML.value('/x[3]','nvarchar(max)') AS Part2
,testXML.value('/x[4]','nvarchar(max)') AS Part3
FROM Splitted
The result
Number Part1 Part2 Part3
27364 John Smith NULL
27364 John Andrew Smith
SQL Server 2016 has a new built-in function called STRING_SPLIT()
Assuming creating built-in functions, but CLR functions are not allowed:
CREATE FUNCTION dbo.WORD_SPLIT
(
#String AS nvarchar(4000)
)
RETURNS TABLE
AS
RETURN
(
WITH Spaces AS
(
SELECT Spaced.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 1)) AS ordinal
FROM STRING_SPLIT(#String, ' ') AS Spaced
)
, Tabs AS
(
SELECT Tabbed.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY s.ordinal, (SELECT 1)) AS ordinal
FROM Spaces AS s
CROSS APPLY STRING_SPLIT(s.[value], ' ') AS Tabbed
)
, NewLines1 AS
(
SELECT NewLined1.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY t.ordinal, (SELECT 1)) AS ordinal
FROM Tabs AS t
CROSS APPLY STRING_SPLIT(t.[value], CHAR(13)) AS NewLined1
)
, NewLines2 AS
(
SELECT NewLined2.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl1.ordinal, (SELECT 1)) AS ordinal
FROM NewLines1 AS nl1
CROSS APPLY STRING_SPLIT(nl1.[value], CHAR(10)) AS NewLined2
)
SELECT LTRIM(RTRIM(nl2.[value])) AS [value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl2.ordinal, (SELECT 1)) AS ordinal
FROM NewLines2 AS nl2
WHERE LTRIM(RTRIM(nl2.[value])) <> ''
)
GO
Usage:
-- Not Normailized
SELECT i.*, split.[value], split.[ordinal]
FROM #input AS i
CROSS APPLY dbo.WORD_SPLIT(i.teststring) AS split
-- Normalized
;WITH Splitted AS
(
SELECT split.[value], split.[ordinal]
FROM #input AS i
CROSS APPLY dbo.WORD_SPLIT(i.teststring) AS split
)
SELECT *
FROM (SELECT [value], 'part' + CONVERT(nvarchar(20), [ordinal]) AS [parts] FROM Splitted) AS s
PIVOT (MAX([value]) FOR [parts] IN ([part1], [part2], [part3], [part4])
Or assuming that, per-security, you are not allowed to make schema changes:
WITH Splitting AS
(
SELECT teststring AS [value]
FROM #input
)
WITH Spaces AS
(
SELECT Spaced.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 1)) AS ordinal
FROM Splitting AS sp
CROSS APPLY STRING_SPLIT(sp.[value], ' ') AS Spaced
)
, Tabs AS
(
SELECT Tabbed.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY s.ordinal, (SELECT 1)) AS ordinal
FROM Spaces AS s
CROSS APPLY STRING_SPLIT(s.[value], ' ') AS Tabbed
)
, NewLines1 AS
(
SELECT NewLined1.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY t.ordinal, (SELECT 1)) AS ordinal
FROM Tabs AS t
CROSS APPLY STRING_SPLIT(t.[value], CHAR(13)) AS NewLined1
)
, NewLines2 AS
(
SELECT NewLined2.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl1.ordinal, (SELECT 1)) AS ordinal
FROM NewLines1 AS nl1
CROSS APPLY STRING_SPLIT(nl1.[value], CHAR(10)) AS NewLined2
)
, Splitted AS
(
SELECT LTRIM(RTRIM(nl2.[value])) AS [teststring], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl2.ordinal, (SELECT 1)) AS ordinal
FROM NewLines2 AS nl2
WHERE LTRIM(RTRIM(nl2.[value])) <> ''
)
SELECT *
FROM (SELECT [value], 'part' + CONVERT(nvarchar(20), [ordinal]) AS [parts] FROM Splitted) AS s
PIVOT (MAX([value]) FOR [parts] IN ([part1], [part2], [part3], [part4])
Hopefully helpful!

Table Valued Function [XML Reader] Very Slow - Alternatives?

I have the following query that really kills performance and want to know what alternatives their are to an xml reader subquery. The purpose of this query is to export data with some html code.
An example of the table data is as follows.
p_s_id | p_c_id | notes
-----------------------
1 | 1 | this note is really long.
2 | 1 | This is fun.
3 | null | long note here
4 | 2 | this is not fun
5 | 2 | this is not fun
6 | 3 | long note here
I want to take all distinct notes that have the same p_c_id and join them together as shown below.
Any additional information can be provided so feel free to comment.
select distinct
p_c_id
,'<br/><br/>'+(select distinct '• ' +cast(note as nvarchar(max)) + ' <br/> '
from dbo.spec_notes_join m2
where m.p_c_id = m2.p_c_id
and isnull(note,'') <> ''
for xml path(''), type).value('.[1]', 'nvarchar(max)') as notes_spec
from dbo.spec_notes_join m
so the export would look as follows:
p_c_id | notes
--------------
1 | <br/><br/> • this note is really long. <br/> &bull This is fun <br/>
2 | <br/><br/> • This is not fun. <br/>
3 | <br/><br/> • long note here. <br/>
I think you will get slightly better performance you skip the distinct in the outer query and do a group by p_c_id instead.
select p_c_id,
'<br/><br/>'+(select distinct '• ' +cast(note as nvarchar(max)) + ' <br/> '
from dbo.spec_notes_join m2
where m.p_c_id = m2.p_c_id and
isnull(note,'') <> ''
for xml path(''), type).value('.', 'nvarchar(max)') as notes_spec
from dbo.spec_notes_join m
group by p_c_id
You could also try concatenating with a CLR User-Defined Aggregate Function.
Other alternatives can be found here Concatenating Row Values in Transact-SQL.
While this alternative skips the XML, I don’t know if it improves performance—if you could test and post results as a comment, I’d apreciate it. (It worked on my quick mock up, you may need to do some minor debugging on your own structures.)
Start with this function:
CREATE FUNCTION dbo.Testing
(
#p_c_id int
)
RETURNS varchar(max)
AS
BEGIN
DECLARE #ReturnString varchar(max)
SELECT #ReturnString = isnull(#ReturnString + ' <br/> , <br/><br/>• ', '<br/><br/>• ') + Name
from (select distinct note
from spec_notes_join
where p_c_id = #p_c_id
and isnull(note, '') <> '') xx
SET #ReturnString = #ReturnString + ' <br/> '
RETURN #ReturnString
END
GO
and then embed it in your query:
SELECT p_c_id, dbo.Testing(p_c_id)
from (select distinct p_c_id
from dbo.spec_notes_join) xx
This may perform poorly because of the function called required for each row. A possibly quicker variant would be to write the function as a table-valued function, and reference it by a CROSS APPLY in the join clause.