SQL Server 2008 - split multi-value column into rows with unique values

SQL Server 2008 - split multi-value column into rows with unique values - tsql

In a SQL Server 2008 database, I have a column with multiple values separated by semi-colons. Some values contain colons. Sample data:
key:value;key2:value;blah;foo;bar;A sample value:whee;others
key:value;blah;bar;others
A sample value:whee
I want to get all the unique values from each row in separate rows:
key:value
key2:value
blah
foo
bar
A sample value:whee
others
I've looked at various split functions, but they all seem to deal with hard-coded strings, not strings coming from a column in a table. How can I do this?
Edit: Thomas' answer got it! Here was my final query:
With SampleInputs As
(
select distinct myColumn from [myDatabase].[dbo].myTable where myColumn != ''
)
, XmlCte As
(
Select Cast( '<z>' + Replace( myColumn, ';', '</z><z>' ) + '</z>' As xml ) As XmlValue
From SampleInputs As I
)
Select Distinct Y.z.value('.','nvarchar(max)') As Value
From XmlCte
Cross Apply XmlValue.nodes('//z') Y(z)
I'm guessing the XmlValue.nodes and Y.z.value stuff is magic. O_o

With a split function you use cross apply:
select distinct SS.part
from YourTable
cross apply dbo.SplitString(YourColumn, ';') as SS
Here the SplitString takes two arguments, the string column and the separator and has a column called part where the values are returned.

With SampleInputs As
(
Select 'key:value;key2:value;blah;foo;bar;A sample value:whee;others' As [Data]
Union All Select 'key:value;blah;bar;others'
Union All Select 'A sample value:whee'
)
, XmlCte As
(
Select Cast( '<z>' + Replace( I.[Data], ';', '</z><z>' ) + '</z>' As xml ) As XmlValue
From SampleInputs As I
)
Select Distinct Y.z.value('.','nvarchar(max)') As Value
From XmlCte
Cross Apply XmlValue.nodes('//z') Y(z)
Update
Here's a version of the above that handles entities:
With SampleInputs As
(
Select 'key:value;key2:value;blah;foo;bar;A sample value:whee;others' As [Data]
Union All Select 'key:value;blah;bar;others'
Union All Select 'A sample value:whee'
Union All Select 'A sample value:<Oops>'
)
, XmlGoo As
(
Select Cast(
Replace(
Replace( Cast( Z.XmlValue As nvarchar(max) ), '{{', '<z>' )
, '}}', '</z>')
As Xml ) As Xmlvalue
From (
Select Cast(
(
Select '{{' + Replace( [Data], ';', '}}{{' ) + '}}'
From SampleInputs
For Xml Path(''), type
) As Xml ) As XmlValue
) As Z
)
Select Distinct Z.data.value('.', 'nvarchar(max)')
From XmlGoo
Cross Apply XmlValue.nodes('/z') Z(data)

Related

How to move the data to the next line based on spaces in sqlserver 2008 R2

Input : Keep the column value into next line if word to word space is 3 space and length of the word is >9 .
declare #Table table(CL1 varchar(50))
INSERT INTO #Table
SELECT 'Ohh my GOD'
UNION ALL
SELECT 'hindunewspaer is no1 paper'
select * from #Table
o/p :
CL1
ohh
my god
hindunewpaer
is no1 paper

Used a Split/Parse function. Can be inline if needed.
EDIT - Switch to a Parser which is not limited to 8K because the final
string could easily be larger than 8K
Example
;with cte0 as (
Select Seq=Row_Number() over (Order by (Select null)),RetSeq,RetVal
From #Table A
Cross Apply (
Select RetSeq
,RetVal=case when len(RetVal)>9 then '~~~' else '' end+RetVal+case when len(RetVal)>9 then '~~~' else '' end
From [dbo].[udf-Str-Parse](Replace(CL1,' ','~~~ '),' ')
) B ),
cte1 as ( Select S=Stuff((Select ' '+RetVal From cte0 Order by Seq For XML Path ('')),1,1,'') )
Select CL1 = RetVal
From cte1 A
Cross Apply [dbo].[udf-Str-Parse](A.S,'~~~') B
Order By RetSeq
Returns
CL1
Ohh
my GOD
hindunewspaer
is no1 paper
The Split/Parse Function if Needed
CREATE FUNCTION [dbo].[udf-Str-Parse] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(#String,#Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Thanks Shnugo for making this XML safe
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
--Select * from [dbo].[udf-Str-Parse]('this,is,<test>,for,< & >',',')

Sort data before concatenating using STUFF FOR XML

I have the following query that I am using for an SSRS Report:
SELECT ROW_NUMBER() OVER ( ORDER BY Judge.EventJudgeID ) AS JudgeRow ,
Judge.EventID ,
Judge.Judge_PersonID ,
STUFF(( SELECT DISTINCT
',' + CAST(Fights.FightNumber AS VARCHAR(MAX)) AS [text()]
FROM dbo.tblFights Fights ,
dbo.tblFightJudge FRJudge
WHERE Fights.FightID = FRJudge.fightid
AND ( Judge.Judge_PersonID = FRJudge.judge1id
OR Judge.Judge_PersonID = FRJudge.judge2id
OR Judge.Judge_PersonID = FRJudge.judge3id
)
FOR
XML PATH('')
), 1, 1, '') AS BoutsJudged ,
Persons.LastName + ' ' + Persons.FirstName AS JudgeName ,
Events.EventName ,
Events.EventDate
FROM dbo.tblEventJudge Judge
INNER JOIN dbo.tblPersons Persons ON PersonID = Judge_PersonID
INNER JOIN dbo.tblEvents Events ON Events.EventID = Judge.EventID
WHERE Judge.EventID = 1278;
The problem is that the STUFF command returns the following string:
1,10,11,12,13,14,15,16,17,18,19,2,3,4,5,6,7,8,9
How can I make it sort the numbers before concatenating it into a string?

Try this
SELECT ROW_NUMBER() OVER ( ORDER BY Judge.EventJudgeID ) AS JudgeRow ,
Judge.EventID ,
Judge.Judge_PersonID ,
STUFF(Select ',' + CAST(Fights.FightNumber AS VARCHAR(MAX)) AS [text()] From ( SELECT DISTINCT Fights.FightNumber
FROM dbo.tblFights Fights ,
dbo.tblFightJudge FRJudge
WHERE Fights.FightID = FRJudge.fightid
AND ( Judge.Judge_PersonID = FRJudge.judge1id
OR Judge.Judge_PersonID = FRJudge.judge2id
OR Judge.Judge_PersonID = FRJudge.judge3id
)
) X
ORDER BY Fights.FightNumber
FOR
XML PATH('')
), 1, 1, '') AS BoutsJudged ,
Persons.LastName + ' ' + Persons.FirstName AS JudgeName ,
Events.EventName ,
Events.EventDate
FROM dbo.tblEventJudge Judge
INNER JOIN dbo.tblPersons Persons ON PersonID = Judge_PersonID
INNER JOIN dbo.tblEvents Events ON Events.EventID = Judge.EventID
WHERE Judge.EventID = 1278;
You can check below sqls,
Before :
Select *,
STUFF((Select Distinct ','+Cast(high as varchar(MAX))
from master..spt_values where type = 'p' and number < 20
for xml Path('')),1,1,'')
from master..spt_values where type = 'p' and number < 20
After :
Select *,
STUFF((Select ','+Cast(high as varchar(MAX)) from (Select distinct high
from master..spt_values where type = 'p' and number < 20) x Order by high for xml Path('')),1,1,'')
from master..spt_values where type = 'p' and number < 20

I apologize for this solution being pedantic, but I have a hard time parsing code and need to see things in steps. Also, Microsoft adds a feature to do this in the 2012 release, but this code should work in most releases. First, use a database open to users in most SQLServers...
USE MASTER; SELECT TOP 3 TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION FROM INFORMATION_SCHEMA.COLUMNS;
/*TABLE_NAME COLUMN_NAME ORDINAL_POSITION
spt_fallback_db xserver_name 1
spt_fallback_db xdttm_ins 2
spt_fallback_db xdttm_last_ins_upd 3
*/
Now, breaking down this approach (to sorting a list within a column)...
(1) Adding FOR XML PATH('') to a 1 column query pivots it to one row, but adds XML tags for the column header...
SELECT TOP 3 COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS FOR XML PATH('');
/*<COLUMN_NAME>xserver_name</COLUMN_NAME><COLUMN_NAME>xdttm_ins</COLUMN_NAME><COLUMN_NAME>xdttm_last_ins_upd</COLUMN_NAME>*/
(2) Concatination nullifies the column header, eliminating the tags. Any string will work, I want comma space...
SELECT TOP 3 ', ' + COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS FOR XML PATH('');
/*, xserver_name, xdttm_ins, xdttm_last_ins_upd*/
(3) Other columns will need their own SELECT, so FOR XML must be a subquery, and ORDER BY is a legal prefix in a FOR XML subquery ;)...
SELECT TOP 2 TABLE_NAME
, (SELECT ', ' + COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMNS.TABLE_NAME = TABLES.TABLE_NAME
ORDER BY ORDINAL_POSITION FOR XML PATH('')
) LIST_OF_COLUMNS
FROM INFORMATION_SCHEMA.TABLES
ORDER BY TABLE_NAME;
/*TABLE_NAME LIST_OF_COLUMNS
spt_fallback_db , xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_dbid, name, dbid, status, version
spt_fallback_dev , xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_low, xfallback_drive, low, high, status, name, phyname
*/
(4) Finally, SUBSTRING is more familiar to me than STUFF for removing a known prefix...
SELECT TOP 2 TABLE_NAME
, SUBSTRING((SELECT ', ' + COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMNS.TABLE_NAME = TABLES.TABLE_NAME
ORDER BY ORDINAL_POSITION FOR XML PATH('')
)
, 2+1--Add 1 to start substring after the first 2 characters
, 99999) LIST_OF_COLUMNS
FROM INFORMATION_SCHEMA.TABLES
ORDER BY TABLE_NAME;
/*TABLE_NAME LIST_OF_COLUMNS
spt_fallback_db xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_dbid, name, dbid, status, version
spt_fallback_dev xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_low, xfallback_drive, low, high, status, name, phyname
*/
Pedantically yours - Jim Gettman

Counting words in column

i must get count number the tag
<name></name>
in column.
<users><name>Tomek</name><name>Pawel</name><name>Krzysiek</name></users>
In this example data, queries should return 3.

Using XPath you can easily implement the logic.
Example XPath for your scenario : count(/users/name)
Result : 3
Test Here

Dynamic sql solution:
DECLARE #Table TABLE (Names NVARCHAR(1100))
INSERT INTO #Table VALUES
('<users><name>Tomek</name><name>Pawel</name><name>Krzysiek</name></users>'),
('<users><name>Tomek</name><name>Pawel</name><name>Krzysiek</name></users>'),
('<users><name>Tomek</name><name>Pawel</name><name>Krzysiek</name></users>')
DECLARE #Sql NVARCHAR(MAX)
SET #Sql = ''
SELECT
#Sql = #Sql +
REPLACE(
REPLACE(
REPLACE(
REPLACE(Names,'</name>',''' as Names UNION ALL ')
,'<name>','SELECT ''')
,'</users>','')
,'<users>','')+CHAR(10)
FROM #Table
SET #Sql = LEFT(#Sql,LEN(#Sql)-11)
SET #Sql = 'SELECT COUNT(Names) AS Names FROM (' + #Sql + ') as AllNames'
EXEC(#Sql)

if you work with xml data then try this variant
DECLARE #XMLdata XML = N'<users><name>Tomek</name><name>Pawel</name><name>Krzysiek</name></users>'
SELECT COUNT(*)
FROM #XMLdata.nodes('/users/name') col ( name )
This variant can be usefull when data storaged like a string (varchar)
--create temp table for testing
IF OBJECT_ID('Tempdb..#Tags') IS NOT NULL
DROP TABLE #Tags
CREATE TABLE #Tags
(
SampleText VARCHAR(1000)
)
INSERT INTO #Tags
( SampleText )
VALUES ( '<users><name>Tomek</name><name>Pawel</name><name>Krzysiek</name></users>' ),
( '<users><name>Somik</name><name>Pawel</name><name>Krzysiek</name></users>' ),
( '<users><name>Krolik</name><name>Pawel</name><name>Krzysiek</name></users>' ),
( '<users><name>Domik</name><name>Pawel</name><name>Krzysiek</name></users>' ),
( '<users><name>Zontik</name><name>Pawel</name><name>Krzysiek</name></users>' );
--------------------------------------------------------------------------------
-- recursive cte for split string
WITH cte
AS ( SELECT n = 1
UNION ALL
SELECT n + 1
FROM cte
WHERE n <= 1000
)
--------------------------------------------------------------------------------
-- final query
SELECT COUNT(*) AS Cnt
FROM cte
JOIN #Tags AS T ON n <= LEN(T.SampleText)
WHERE SUBSTRING(T.SampleText, n, 7) = '</name>'
OPTION ( MAXRECURSION 1000 )

Percentage of Values for Top 3 from a Character Field

I have an unusual situation. Please consider the following code:
IF OBJECT_ID('tempdb..#CharacterTest') IS NOT NULL
DROP TABLE #CharacterTest
CREATE TABLE #CharacterTest
(
[ID] int IDENTITY(1, 1) NOT NULL,
[CharField] varchar(50) NULL
)
INSERT INTO #CharacterTest (CharField)
VALUES ('A')
, ('A')
, ('A')
, ('A')
, ('B')
, ('B')
, ('B')
, ('C')
, ('C')
, ('D')
, ('D')
, ('F')
, ('G')
, ('H')
, ('I')
, ('J')
, ('K')
, ('L')
, ('M')
, ('N')
, (' ')
, (' ')
, (' ')
, (NULL)
, ('');
I would like a query which gives me a character string like this:
A (16%), B (12%), C(8%)
Please notice the following:
I don't want to have empty strings, strings with all blanks, or nulls listed in the top 3, but I do want the percentage of values calculated using the entire record count for the table.
Ties can be ignored, so if there were 22 values in the list with 8% frequency, it's alright to simply return whichever one is first.
Percentages can be rounded to whole numbers.
I'd like to find the easiest way to write this query while still retaining T-SQL compatibility back to SQL Server 2005. What is the best way to do this? Window Functions?

I'd go for.
WITH T1
AS (SELECT [CharField],
100.0 * COUNT(*) OVER (PARTITION BY [CharField]) /
COUNT(*) OVER () AS Pct
FROM #CharacterTest),
T2
AS (SELECT DISTINCT TOP 3 *
FROM T1
WHERE [CharField] <> '' --Excludes all blank or NULL as well
ORDER BY Pct DESC)
SELECT STUFF((SELECT ',' + [CharField] + ' (' + CAST(CAST(ROUND(Pct,1) AS INT) AS VARCHAR(3)) + ')'
FROM T2
ORDER BY Pct DESC
FOR XML PATH('')), 1, 1, '') AS Result

My first attempt would probably be this. Not saying that it's the best way to handle it, but that it would work.
DECLARE #TotalCount INT
SELECT #TotalCount = COUNT(*) FROM #CharacterTest AS ct
SELECT TOP(3) CharField, COUNT(*) * 1.0 / #TotalCount AS OverallPercentage
FROM #CharacterTest AS ct
WHERE CharField IS NOT NULL AND REPLACE(CharField, ' ', '') <> ''
GROUP BY CharField
ORDER BY COUNT(*) desc
DROP TABLE #CharacterTest

This should get the character string you need:
declare #output varchar(200);
with cte as (
select CharField
, (count(*) * 100) / (select count(*) from #CharacterTest) as CharPct
, row_number() over (order by count(*) desc, CharField) as RowNum
from #CharacterTest
where replace(CharField, ' ', '') not like ''
group by CharField
)
select #output = coalesce(#output + ', ', '') + CharField + ' (' + cast(CharPct as varchar(11)) + '%)'
from cte
where RowNum <= 3
order by RowNum;
select #output;
-- Returns:
-- A (16%), B (12%), C (8%)
I would draw attention to storing a single character in a varchar(50) column, however.

T-sql Common expression query as subquery

I have the following query:
WITH Orders(Id)
AS (
SELECT DISTINCT anfrageid FROM MPHotlineAnfrageAnhang
)
SELECT Id,
(
SELECT CONVERT(VARCHAR(255),anfragetext) + ' | '
FROM MPHotlineAnfrageAnhang
WHERE anfrageid = Id
ORDER BY anfrageid, erstelltam
FOR XML PATH('')
) AS Descriptions
FROM Orders
Its concatenates varchar values of diferents rows grouped by an id. But now i want to include it as a subquery and it gives some errors i cant solve.
Simplified example of use:
select descriptions from
(
WITH Orders(Id)
AS (
SELECT DISTINCT anfrageid FROM MPHotlineAnfrageAnhang
)
SELECT Id,
(
SELECT CONVERT(VARCHAR(255),anfragetext) + ' | '
FROM MPHotlineAnfrageAnhang
WHERE anfrageid = Id
ORDER BY anfrageid, erstelltam
FOR XML PATH('')
) AS Descriptions
FROM Orders
) as tx where id=100012
Errors (Aproximate translation from spanish):
-Incorrect sintaxis near 'WITH'.
-Incorrect sintaxis near 'WITH'. If the instruction is a common table expression or a xmlnamespaces clause, the previous instruction must end with semicolon.
-Incorrect sintaxis near ')'.
What im doing wrong?

Chain your queries as CTEs, like this:
WITH Orders(Id) AS (
SELECT DISTINCT anfrageid
FROM MPHotlineAnfrageAnhang
),
OrderDescs AS (
SELECT Id, (
SELECT CONVERT(VARCHAR(255),anfragetext) + ' | '
FROM MPHotlineAnfrageAnhang
WHERE anfrageid = Id
ORDER BY anfrageid, erstelltam
FOR XML PATH('')
) AS Description
FROM Orders
)
SELECT Description
FROM OrderDescs
WHERE Id = 100012
You can have as many CTEs as you like, each referencing the previous, before the actual query.

Also, you need to have a semi-colon before a WITH statement.
;with Orders(id)
Or terminate the previous statement with the semi-colon instead.

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

SQL Server 2008 - split multi-value column into rows with unique values - tsql

With a split function you use cross apply: select distinct SS.part from YourTable cross apply dbo.SplitString(YourColumn, ';') as SS Here the SplitString takes two arguments, the string column and the separator and has a column called part where the values are returned.

Related

How to move the data to the next line based on spaces in sqlserver 2008 R2

Sort data before concatenating using STUFF FOR XML

Counting words in column

Percentage of Values for Top 3 from a Character Field

T-sql Common expression query as subquery

Categories

Resources