How to convert time into days - tsql

I have got this data in a table. Using SQL server. I need to create a report in which I need to add this duration and convert into days, datatype of duration column is timestamp. When I use Sum on this i get this error Operand data type varchar is invalid for sum operator.
What is the best way of converting this to number of days?
<table>
<thead>
<tr>
<th>TicketNumber</th>
<th>duration</th>
</tr>
</thead>
<tbody>
<tr>
<td>521163</td>
<td>44:08:11</td>
</tr>
</tbody>
<tbody>
<tr>
<td>906868</td>
<td>404:55:27</td>
</tr>
</tbody>
<tbody>
<tr>
<td>140013</td>
<td>412:08:03</td>
</tr>
</tbody>
</table>

Not clear from your description if you have a TABLE or HTML code! You speak about table but present HTML code.
If you have a table then we need to get a table, which means queries to create the table and insert the sample data. If you have HTML code, which you need to parse then you DO NOT HAVE A TABLE but a text.
Assuming that you have HTML code as presented in the question then you have an XML code which mean that you can use the function OPENXML to parse the data from the HTML code as presented here:
DECLARE #HTML as XML
SELECT #HTML = '
<table>
<thead>
<tr>
<th>TicketNumber</th>
<th>duration</th>
</tr>
</thead>
<tbody>
<tr>
<td>521163</td>
<td>44:08:11</td>
</tr>
</tbody>
<tbody>
<tr>
<td>906868</td>
<td>404:55:27</td>
</tr>
</tbody>
<tbody>
<tr>
<td>140013</td>
<td>412:08:03</td>
</tr>
</tbody>
</table>
'
SELECT #HTML
------------------ First we will learn how to parse the HTML data to get table structure
-- Note! This base on exact format of the HTML code!
-- Any change in the format will fail this solution
DECLARE #handle INT
DECLARE #PrepareXmlStatus INT
EXEC #PrepareXmlStatus= sp_xml_preparedocument #handle OUTPUT, #HTML
SELECT id, MyTime
FROM OPENXML(#handle, '/table/tbody/tr', 1)
WITH (
id NVARCHAR(MAX) '(td)[1]',
MyTime NVARCHAR(MAX) '(td)[2]'
)
EXEC sp_xml_removedocument #handle
Using this approach, we can now use the data in the result SET from the above query and calculate the SUM of the times, as presented here (this should be your solution):
DECLARE #HTML as XML
SELECT #HTML = '
<table>
<thead>
<tr>
<th>TicketNumber</th>
<th>duration</th>
</tr>
</thead>
<tbody>
<tr>
<td>521163</td>
<td>44:08:11</td>
</tr>
</tbody>
<tbody>
<tr>
<td>906868</td>
<td>404:55:27</td>
</tr>
</tbody>
<tbody>
<tr>
<td>140013</td>
<td>412:08:03</td>
</tr>
</tbody>
</table>
'
DECLARE #handle INT
DECLARE #PrepareXmlStatus INT
EXEC #PrepareXmlStatus= sp_xml_preparedocument #handle OUTPUT, #HTML
---------------- Now we can use the above way of parsiong the HTML in order to calculate the SUM of the times
EXEC #PrepareXmlStatus= sp_xml_preparedocument #handle OUTPUT, #HTML
;With MyCTE as (
SELECT id, MyTime
FROM OPENXML(#handle, '/table/tbody/tr', 1)
WITH (
id NVARCHAR(MAX) '(td)[1]',
MyTime NVARCHAR(MAX) '(td)[2]'
)
),
-- Convert Seconds, minutes, hours to INT and sum
MyCTE1 as(
SELECT
MySec = SUM(CONVERT(INT,PARSENAME(REPLACE(MyTime,':','.'),1))),
MyMin = SUM(CONVERT(INT,PARSENAME(REPLACE(MyTime,':','.'),2))),
MyHour = SUM(CONVERT(INT,PARSENAME(REPLACE(MyTime,':','.'),3)))
from MyCTE
),
-- Move from seconds to minutes and from minutes to hours if the value above 60
MyCTE2 as(
SELECT
MySec = (MySec%60),
MyMin = (MyMin + (MySec/60)) % 60,
MyHour = MyHour + ((MyMin + (MySec/60)) / 60)
from MyCTE1
)
SELECT
CONVERT(VARCHAR(10), MyHour) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MyMin),2) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MySec),2)
FROM MyCTE2
EXEC sp_xml_removedocument #handle
Using existing table
If you already have a table (not HTML code) and all you need to find the number of days, hours, minutes, seconds then you simply need to use the end part of the above query. We do not need to parse the HTML code so we can jump to the CTE named MyCTE01.
For example, using the op DDL+DML which was provided in the comment:
CREATE TABLE [dbo].[OpenTickets] ( [TicketNumber] [varchar](50) NULL ,[Duration] [varchar](20) NULL )
GO
INSERT INTO [dbo].[OpenTickets] VALUES
( 521163 ,'44:08:11' ) ,( 746008 ,'45:38:35' ) ,( 120025 ,'48:50:31' ) ,( 780125 ,'75:15:59' ) ,( 903960 ,'76:23:49' ) ,
( 937883 ,'178:20:27' ) ,( 524404 ,'80:16:08' ) ,( 374972 ,'81:17:28' )
GO
SELECT * FROM [OpenTickets]
GO
;With MyCTE1 as(
SELECT
MySec = SUM(CONVERT(INT,PARSENAME(REPLACE([Duration],':','.'),1))),
MyMin = SUM(CONVERT(INT,PARSENAME(REPLACE([Duration],':','.'),2))),
MyHour = SUM(CONVERT(INT,PARSENAME(REPLACE([Duration],':','.'),3)))
from [OpenTickets]
),
-- Move from seconds to minutes and from minutes to hours if the value above 60
MyCTE2 as(
SELECT
MySec = (MySec%60),
MyMin = (MyMin + (MySec/60)) % 60,
MyHour = MyHour + ((MyMin + (MySec/60)) / 60)
from MyCTE1
)
SELECT
MyDays = MyHour/24,
Total = 'Days: ' + CONVERT(VARCHAR(10), MyHour/24) + ' and ' +
CONVERT(VARCHAR(10), MyHour % 24) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MyMin),2) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MySec),2),
TotalTime = CONVERT(VARCHAR(10), MyHour) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MyMin),2) + ':' +
RIGHT('00' + CONVERT(VARCHAR(2), MySec),2)
FROM MyCTE2
GO

Related

Split question and answer text by multiple bookends

I have a field containing multiple questions and answers. I need to extract the answers into a column each.
Text Example:
Sorry I had to add as a picture as the text kept disappearing.
I need to extract the text between the first instance of the yellow and green highlight (not including the highlighted sections) as the first line in the select clause, followed by the second instance between the yellow and green highlight as the second line in the select clause etc etc.
There are 5 questions (between the pink and blue highlight) and 5 answers (between the yellow and green highlight).
I tried the code below using the text in the yellow and green highlight as bookends but I got the same error message as below.
Then I tried the following code using the question as the first bookend:
SELECT distinct subjectidname
, title
, i.description
, SUBSTRING(i.description, CHARINDEX('<b>Please indicate your company''s export status:</b><br />', i.description),
CHARINDEX('<br /><br />',i.description) -
CHARINDEX('<b>Please indicate your company''s export status:</b><br />', i.description) + Len('<br /><br />'))
from FilteredIncident i
Both efforts resulted in an error message:
Msg 537, Level 16, State 3, Line 2 Invalid length parameter passed to
the LEFT or SUBSTRING function.
And it also does not account for the 2nd, 3rd, 4th & 5th instances.
What is the best way to extract the 5 answers from the description box containing a single line of text?
Start with a string splitter that can split on a string and returns an index for each row:
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter VARCHAR(16))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+ Len( #pDelimiter ) FROM cteTally t WHERE SUBSTRING(#pString,t.N, Len( #pDelimiter ) ) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1 ,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l;
(Credit to Jeff Moden for years of successful string splitting.)
Then pick the right substrings to split on:
declare #QandA as NVarChar(1000) = '<b>Q1:</b><br />A1<br /><br /><b>Q2:</b><br />A2<br /><br /><b>Q3:</b><br />A3<br /><br /><b>Q4:</b><br />A4<br /><br />';
-- A single split gets Q/A pairs:
select ItemNumber, Item
from dbo.DelimitedSplit8K( #QandA, '<br /><br />' )
order by ItemNumber;
-- A second split gets Q's and A's:
with QAPairs as (
select ItemNumber as QuestionNumber, Item as QA
from dbo.DelimitedSplit8K( #QandA, '<br /><br />' ) )
select QuestionNumber, QA, ItemNumber, Item, case when ItemNumber % 2 = 1 then 'Q' else 'A' end as 'Q/A'
from QAPairs cross apply
dbo.DelimitedSplit8K( QA, '<br />' );
dbfiddle.
That ought to be a good start. There is a bit of cleanup to do, e.g. there is a spurious empty Q/A pair since the string ends with a '<br /><br />' which, as a delimiter, must mean there is a Q/A pair on each side.
This example retrieves the data from a table a breaks down each row into its component questions and answers:
-- Sample data.
declare #QandAs as Table ( QandAId Int Identity, QandA NVarChar(1000) );
insert into #QandAs ( QandA ) values
( '<b>Q1a:</b><br />A1a<br /><br /><b>Q2a:</b><br />A2a<br /><br /><b>Q3a:</b><br />A3a<br /><br /><b>Q4a:</b><br />A4a<br /><br />' ),
( '<b>Q1b:</b><br />A1b<br /><br /><b>Q2b:</b><br />A2b<br /><br /><b>Q3b:</b><br />A3b<br /><br /><b>Q4b:</b><br />A4b<br /><br />' );
select * from #QandAs;
-- A single split gets Q/A pairs:
with QAPairs as (
select QandAId, ItemNumber, Item, Row_Number() over ( partition by QandAId order by ItemNumber desc ) as RN
from #QandAs cross apply
dbo.DelimitedSplit8K( QandA, '<br /><br />' ) )
select QandAId, ItemNumber, Item, RN
from QAPairs
where RN > 1 -- Eliminate the extraneaous empty Q/A pair at the end of the string.
order by QandAId, ItemNumber;
-- A second split gets Q's and A's:
with QAPairs as (
select QandAId, ItemNumber as QuestionNumber, Item as QA, Row_Number() over ( partition by QandAId order by ItemNumber desc ) as RN
from #QandAs cross apply
dbo.DelimitedSplit8K( QandA, '<br /><br />' ) )
select QandAId, QuestionNumber, QA, ItemNumber, Item, case when ItemNumber % 2 = 1 then 'Q' else 'A' end as 'Q/A'
from QAPairs cross apply
dbo.DelimitedSplit8K( QA, '<br />' )
where RN > 1 -- Eliminate the extraneaous empty Q/A pair at the end of the string.
order by QandAId, QuestionNumber, ItemNumber;
dbfiddle.

How to retrieve multiple columns count with different where clause?

I need to construct the following table.
<style type="text/css">
.tg {border-collapse:collapse;border-spacing:0;}
.tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;}
.tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;}
.tg .tg-baqh{text-align:center;vertical-align:top}
.tg .tg-lap0{font-size:100%;text-align:center;vertical-align:top}
.tg .tg-yw4l{vertical-align:top}
</style>
<table class="tg" style="undefined;table-layout: fixed; width: 593px">
<colgroup>
<col style="width: 67px">
<col style="width: 72px">
<col style="width: 116px">
<col style="width: 116px">
<col style="width: 116px">
<col style="width: 106px">
</colgroup>
<tr>
<th class="tg-lap0">State</th>
<th class="tg-baqh">City</th>
<th class="tg-baqh">Three Days Ago</th>
<th class="tg-baqh">Two Days Ago</th>
<th class="tg-baqh">One Day Ago</th>
<th class="tg-baqh">Total</th>
</tr>
<tr>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
</tr>
<tr>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
</tr>
<tr>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
</tr>
<tr>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
<td class="tg-yw4l"></td>
</tr>
</table>
Where state and city are simple selects, but three_days_ago, two_days_ago, one_day_ago are count of DB entries for the same city name.
All this is in same table in DB.
So I image my query like something like this:
$union = pg_query($db,
"
(
SELECT estado, municipio, COUNT(*)
FROM focos_bdq
WHERE bioma LIKE 'Amazônia'
AND satelite LIKE 'AQUA_M-T'
AND data_hora_gmt::date='$three_days_ago'
GROUP BY municipio, estado
ORDER BY COUNT(*) DESC
)
UNION ALL
(
SELECT estado, municipio, COUNT(*)
FROM focos_bdq
WHERE bioma LIKE 'Amazônia'
AND data_hora_gmt::date='$two_days_ago'
GROUP BY municipio, estado
ORDER BY COUNT(*) DESC
)
UNION ALL
(
SELECT estado, municipio, COUNT(*)
FROM focos_bdq
WHERE bioma LIKE 'Amazônia'
AND data_hora_gmt::date='$one_day_ago'
GROUP BY municipio, estado
ORDER BY COUNT(*) DESC
)
"
But this query is resulting in one very long table, when I need to put dates in same row.
hard to tell without actual data, something like this should do the trick:
with
3d as (
SELECT estado, municipio, COUNT(*)
FROM focos_bdq
WHERE bioma LIKE 'Amazônia'
AND satelite LIKE 'AQUA_M-T'
AND data_hora_gmt::date='$three_days_ago'
GROUP BY municipio, estado
ORDER BY COUNT(*) DESC
)
, 2d as (
SELECT estado, municipio, COUNT(*)
FROM focos_bdq
WHERE bioma LIKE 'Amazônia'
AND data_hora_gmt::date='$two_days_ago'
GROUP BY municipio, estado
ORDER BY COUNT(*) DESC
)
, 1d as (
SELECT estado, municipio, COUNT(*)
FROM focos_bdq
WHERE bioma LIKE 'Amazônia'
AND data_hora_gmt::date='$one_day_ago'
GROUP BY municipio, estado
ORDER BY COUNT(*) DESC
)
select estado, municipio, d3.count,d2.count,d1.count,d3.count+d2.count+d1.count
from d3
left outer join d2 on d3.estado = d2.estado and d3.municipio = d2.municipio
left outer join d1 on d1.estado = d2.estado and d1.municipio = d2.municipio

Selecting users,groups and site from alfresco database

I would like to select users,in which groups they belong to, and their sites. I want it with a single postgre sql query.
I've found these queries, but i want them combined with join if possible.
select * from alf_permission
select * from ALF_AUTHORITY
select * from ALF_CHILD_ASSOC where CHILD_NODE_NAME like ‘group%’
select * from ALF_CHILD_ASSOC where QNAME_LOCALNAME like ‘GROUP%’
select
node_id,
string_agg(string_value, ',')
from (
select
node_id,
qname_id,
(select local_name from alf_qname where id = qname_id) as qname_type,
string_value
from alf_node_properties
where node_id in (
select id from alf_node
where type_qname_id = (
select id from alf_qname where local_name = 'person'
)
and qname_id in (
select id
from alf_qname
where local_name in (
'username',
'firstName',
'lastName',
'email'
)
)
)
) alf_users
group by node_id;
I would advise to create webscript possibly in Java that will do this fast and efficient!
I found a solution!
I put the script in Company Home > Data Dictionary > Web Scripts > org > alfresco > test
As you can see i created the "test" folder in "alfresco" folder, and in it i put these three files.
hello.get.html.ftl file
<table border>
<tr>
<th>Username</th>
<th>Groups</th>
<th>Sites</th>
</tr>
<#list test as child>
<tr>
<td>${child['username']}</td>
<td>${child['groups']}</td>
<td>${child['sites']}</td>
</tr>
</#list>
</table>
hello.get.desc.xml
<webscript>
<shortname>Hello</shortname>
<description>Polite greeting</description>
<url>/test/hello</url>
<authentication>user</authentication>
</webscript>
hello.get.js
var gens = search.luceneSearch("TYPE:\"{http://www.alfresco.org/model/content/1.0}person\"");
var a = [];
for (var i=0; i<gens.length;i++) {
var username = gens[i].properties["cm:userName"];
var b = [];
var groups = people.getContainerGroups(gens[i]);
for(var j=0; j<groups.length; j++) {
b.push(groups[j].properties['authorityDisplayName']);
}
var sites = siteService.listUserSites(username);
var g=[]
for(var j=0; j<sites.length; j++) {
g.push(sites[j]['shortName']);
}
//a.push('\n\n\n'+username+'\groups--> '+ b.join(', ') + '\nsites--> '+g.join(', '));
a.push({
'username' : username,
'groups' : b.join(', '),
'sites' : g.join(', ')
})
}
model.test = a;
you can access the result in your_domain_name/alfresco/service/test/hello
Try this query to get persons and groups
select
ca.id,
ca.parent_node_id,
ca.child_node_id,
ca.qname_localname,
np.string_value
from alf_child_assoc as ca
join alf_node as child on child.id = ca.child_node_id
join alf_node as parent on parent.id = ca.parent_node_id
join alf_qname as q1 on q1.id = parent.type_qname_id
join alf_qname as q2 on q2.id = child.type_qname_id
join alf_node_properties as np on np.node_id = parent.id
where q1.local_name = 'authorityContainer'
and q2.local_name = 'person'
and np.qname_id = (select id from alf_qname where local_name =
'authorityDisplayName')
order by ca.qname_localname;`

Table Valued Function [XML Reader] Very Slow - Alternatives?

I have the following query that really kills performance and want to know what alternatives their are to an xml reader subquery. The purpose of this query is to export data with some html code.
An example of the table data is as follows.
p_s_id | p_c_id | notes
-----------------------
1 | 1 | this note is really long.
2 | 1 | This is fun.
3 | null | long note here
4 | 2 | this is not fun
5 | 2 | this is not fun
6 | 3 | long note here
I want to take all distinct notes that have the same p_c_id and join them together as shown below.
Any additional information can be provided so feel free to comment.
select distinct
p_c_id
,'<br/><br/>'+(select distinct '• ' +cast(note as nvarchar(max)) + ' <br/> '
from dbo.spec_notes_join m2
where m.p_c_id = m2.p_c_id
and isnull(note,'') <> ''
for xml path(''), type).value('.[1]', 'nvarchar(max)') as notes_spec
from dbo.spec_notes_join m
so the export would look as follows:
p_c_id | notes
--------------
1 | <br/><br/> • this note is really long. <br/> &bull This is fun <br/>
2 | <br/><br/> • This is not fun. <br/>
3 | <br/><br/> • long note here. <br/>
I think you will get slightly better performance you skip the distinct in the outer query and do a group by p_c_id instead.
select p_c_id,
'<br/><br/>'+(select distinct '• ' +cast(note as nvarchar(max)) + ' <br/> '
from dbo.spec_notes_join m2
where m.p_c_id = m2.p_c_id and
isnull(note,'') <> ''
for xml path(''), type).value('.', 'nvarchar(max)') as notes_spec
from dbo.spec_notes_join m
group by p_c_id
You could also try concatenating with a CLR User-Defined Aggregate Function.
Other alternatives can be found here Concatenating Row Values in Transact-SQL.
While this alternative skips the XML, I don’t know if it improves performance—if you could test and post results as a comment, I’d apreciate it. (It worked on my quick mock up, you may need to do some minor debugging on your own structures.)
Start with this function:
CREATE FUNCTION dbo.Testing
(
#p_c_id int
)
RETURNS varchar(max)
AS
BEGIN
DECLARE #ReturnString varchar(max)
SELECT #ReturnString = isnull(#ReturnString + ' <br/> , <br/><br/>• ', '<br/><br/>• ') + Name
from (select distinct note
from spec_notes_join
where p_c_id = #p_c_id
and isnull(note, '') <> '') xx
SET #ReturnString = #ReturnString + ' <br/> '
RETURN #ReturnString
END
GO
and then embed it in your query:
SELECT p_c_id, dbo.Testing(p_c_id)
from (select distinct p_c_id
from dbo.spec_notes_join) xx
This may perform poorly because of the function called required for each row. A possibly quicker variant would be to write the function as a table-valued function, and reference it by a CROSS APPLY in the join clause.

mssql (tsql) procedure replace order

...
<td class="m92_t_col5" id="preis_0">xx</td>
...
i want to change to
...
<td id="preis_0" class="m92_t_col5">xxx</td>
...
So id="" must be first and then class="". Is this possible to do with tsql? Text in id or class is generic...
I need to go find some soap but given your requirements, this is an example of how to achieve the desired replacement.
-- This will probably not perform terribly well for a number of
-- reasons, not the least of which we are doing lots of string manipulation
-- within tsql.
-- Much of this query nonsense could be consolidated into fewer queries
-- but given the dearth of information, I chose to build out the solution
-- in a tumbling data anti-pattern
;
WITH SAMPLE_DATA AS
(
-- gin up some demo data
-- with random spacing and ids to make valid test cases
select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
union all select '<td class="m93_t_col50" id="preis_3">xy</td>'
union all select '<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
, ORDINALS AS
(
-- Find the starting position of the keywords
SELECT SD.*
, CHARINDEX('class=', SD.html, 0) AS class_ordinal
, CHARINDEX('id=', SD.html, 0) AS id_ordinal
-- You will really need something in here to keep stuff straight
-- otherwise when we bring it all back together, it'll be wrong
, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS original_sequence
FROM SAMPLE_DATA SD
)
, NEEDS_MODIFIED AS
(
-- identify the rows that need modified
-- and use the ordinals in previous query to find the close position
SELECT
O.*
, CHARINDEX('"', O.html, O.class_ordinal+7) + 1 AS class_ordinal_end_quote
, CHARINDEX('"', O.html, O.id_ordinal+4) + 1 AS id_ordinal_end_quote
FROM
ORDINALS O
WHERE
O.id_ordinal > O.class_ordinal
)
, FIND_PARTS AS
(
-- strip out the parts
SELECT
NM.*
, SUBSTRING(NM.html, class_ordinal, class_ordinal_end_quote - class_ordinal) AS class
, SUBSTRING(NM.html, id_ordinal, id_ordinal_end_quote - id_ordinal) AS id
FROM
NEEDS_MODIFIED NM
)
, DONE AS
(
SELECT
-- This is the heart of the matter
-- having identified the correct text values for class and id
-- we will now perform a triple replace
-- Replace 1 is swapping the class text with somthing that should not exist in source
-- Replace 2 replaces the id text with our class text
-- Replace 3 removes our placeholder value with id
REPLACE(REPLACE(REPLACE(FP.html, FP.class, '~~|~'), FP.id, FP.class), '~~|~', FP.id) AS html
, FP.original_sequence
FROM
FIND_PARTS FP
UNION ALL
SELECT
O.html
, O.original_sequence
FROM
ORDINALS O
WHERE
O.id_ordinal < O.class_ordinal
)
SELECT
D.html
FROM
DONE D
ORDER BY
D.original_sequence
Input
<td class="m92_t_col5" id="preis_0">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td class="m93_t_col50" id="preis_3">xy</td>
<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>
Output
<td id="preis_0" class="m92_t_col5">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td id="preis_3" class="m93_t_col50">xy</td>
<td id="preis_5" style="fuzzy" class="m95_t_col5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>
After doing some thinking, you might have been trying to ask for that as a scalar function. This will probably have even worse performance but it solves the problem.
-- Same logic as above, now in function form
CREATE FUNCTION dbo.ClassIdSwap
(
#input varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
DECLARE
#class_ordinal int
, #class_text varchar(max)
, #class_ordinal_end_quote int
, #id_ordinal int
, #id_text varchar(max)
, #id_ordinal_end_quote int
, #out_html varchar(max)
SELECT
#class_ordinal = CHARINDEX('class=', #input, 0)
, #id_ordinal = CHARINDEX('id=', #input, 0)
SELECT
#class_ordinal_end_quote = CHARINDEX('"', #input, #class_ordinal+7) + 1
, #id_ordinal_end_quote = CHARINDEX('"', #input, #id_ordinal+4) + 1
-- bail out early
IF (#id_ordinal < #class_ordinal)
BEGIN
RETURN #input
END
SELECT
#class_text = SUBSTRING(#input, #class_ordinal, #class_ordinal_end_quote - #class_ordinal)
, #id_text = SUBSTRING(#input, #id_ordinal, #id_ordinal_end_quote - #id_ordinal)
RETURN (REPLACE(REPLACE(REPLACE(#input, #class_text, '~~|~'), #id_text, #class_text), '~~|~', #id_text))
END
Usage
;
WITH SAMPLE_DATA AS
(
-- gin up some demo data
-- with random spacing and ids to make valid test cases
select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
union all select '<td class="m93_t_col50" id="preis_3">xy</td>'
union all select '<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
SELECT
D.html
, dbo.ClassIdSwap(D.html) AS modified
FROM
SAMPLE_DATA D