Parse Prefix First Middle Last Suffix from full name - tsql

I need to parse a full name in the format, prefix first middle last suffix, but not all parts may be included. I have the prefix first middle and last working, but Jr gets stuffed in with the last name. How do I get the suffix to come out in a suffix column? Example includes data.
SELECT
FIRST_NAME.INPUT_DATA
,FIRST_NAME.PREFIX
,FIRST_NAME.FIRST_NAME
,CASE WHEN 0 = CHARINDEX(' ',FIRST_NAME.REMAINING)
THEN NULL --no more spaces found, consider remaining to be last name
ELSE SUBSTRING(
FIRST_NAME.REMAINING
,1
,CHARINDEX(' ',FIRST_NAME.REMAINING)-1
)
END AS MIDDLE_NAME
,SUBSTRING(
FIRST_NAME.REMAINING
,1 + CHARINDEX(' ',FIRST_NAME.REMAINING)
,LEN(FIRST_NAME.REMAINING)
) AS LAST_NAME
FROM
(
SELECT
PREFIX.PREFIX
,CASE WHEN 0 = CHARINDEX(' ',PREFIX.REMAINING)
THEN PREFIX.REMAINING --no space found, return the entire string
ELSE SUBSTRING(
PREFIX.REMAINING
,1
,CHARINDEX(' ',PREFIX.REMAINING)-1
)
END AS FIRST_NAME
,CASE WHEN 0 = CHARINDEX(' ',PREFIX.REMAINING)
THEN NULL --no spaces found, consider to be first name
ELSE SUBSTRING(
PREFIX.REMAINING
,CHARINDEX(' ',PREFIX.REMAINING)+1
,LEN(PREFIX.REMAINING)
)
END AS REMAINING
,PREFIX.INPUT_DATA
FROM
(
SELECT --CLEAN_DATA
--if first three characters match list,
--parse as a "PREFIX". else return NULL for PREFIX.
CASE WHEN SUBSTRING(CLEAN_DATA.FULL_NAME,1,3) IN ('MR ','MS ','DR ','MRS')
THEN LTRIM(RTRIM(SUBSTRING(CLEAN_DATA.FULL_NAME,1,3)))
ELSE NULL
END AS PREFIX
,CASE WHEN SUBSTRING(CLEAN_DATA.FULL_NAME,1,3) IN ('MR ','MS ','DR ','MRS')
THEN LTRIM(RTRIM(SUBSTRING(CLEAN_DATA.FULL_NAME,4,LEN(CLEAN_DATA.FULL_NAME))))
ELSE LTRIM(RTRIM(CLEAN_DATA.FULL_NAME))
END AS REMAINING
,CLEAN_DATA.INPUT_DATA
FROM
(
SELECT
--trim leading & trailing spaces to prepare for processing
--replace extra spaces in name
REPLACE(REPLACE(LTRIM(RTRIM(FULL_NAME)),' ',' '),' ',' ') AS FULL_NAME
,FULL_NAME AS INPUT_DATA
FROM
(
--test with test data, or table
--table
--SELECT CONTACT AS FULL_NAME
--FROM CONTACT
--test data
--/*
SELECT 'Andy D Where' AS FULL_NAME
UNION SELECT 'Cathy T Landers' AS FULL_NAME
UNION SELECT 'Ms Annie Wint There' AS FULL_NAME
UNION SELECT 'Frank Fields' AS FULL_NAME
UNION SELECT 'Howdy U Pokes Jr.' AS FULL_NAME
--*/
) SOURCE_DATA
) CLEAN_DATA
) PREFIX
) FIRST_NAME
--credits to JStyons of course

Hope this helps. I have only added Generational SUFFIX titles(Sr, Jr), If more are needed you could add to the Case statement as needed. I am also assuming that your Db is case insensitive.
Assumption (Business Rules):
First Name has no spaces
Middle Name has no spaces
Last name has no spaces
Prefix's are only of the form 'MR ','MS ','DR ','MRS' with no period "."
Suffix's are only of the form 'Sr', 'Jr', 'Sr.', 'Jr.'
The Database is case insensitive
IF OBJECT_ID('tempdb..#cte_SpaceFix') IS NOT NULL
DROP TABLE #cte_SpaceFix
;WITH cte_OriginalData (FullName)
AS (
SELECT 'Andy D Where'
UNION
SELECT 'Cathy T Landers'
UNION
SELECT 'Ms Annie Wint There'
UNION
SELECT 'Ms Annie Wint There Jr'
UNION
SELECT 'Mrs Annie There Jr'
UNION
SELECT 'Frank Fields'
UNION
SELECT 'Howdy U Pokes Jr.'
UNION
SELECT 'Howdy U Pokes Sr.'
UNION
SELECT 'Cathy T Landers Jr'
UNION
SELECT 'Landers Jr'
)
,cte_FullNameRemoveTail AS
(
SELECT LTRIM(RTRIM(FullName)) AS FullName
FROM cte_OriginalData
)
,cte_Parse_Prefix(Prefix,FullFirst_Prefix,FullName) AS
(
SELECT CASE
WHEN SUBSTRING(FullName, 1, 3) IN ('MR ','MS ','DR ','MRS')
THEN LTRIM(RTRIM(SUBSTRING(FullName, 1, 3)))
ELSE NULL
END AS Prefix,
CASE
WHEN SUBSTRING(FullName, 1, 3) IN ('MR ','MS ','DR ','MRS')
THEN LTRIM(RTRIM(SUBSTRING(FullName, 4, 8000)))
ELSE LTRIM(RTRIM(FullName))
END AS FullFirst_Prefix,
FullName
FROM cte_FullNameRemoveTail
)
,cte_Parse_Suffix(Prefix,FullFirst_Prefix_Suffix,Suffix,FullName) AS
(
SELECT Prefix,
CASE
WHEN RIGHT(FullFirst_Prefix,3) = ' JR' THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-3)))
WHEN RIGHT(FullFirst_Prefix,4) = ' JR.' THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-4)))
WHEN RIGHT(FullFirst_Prefix,3) = ' SR' THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-3)))
WHEN RIGHT(FullFirst_Prefix,4) = ' SR.' THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-4)))
ELSE LTRIM(RTRIM(FullFirst_Prefix))
END AS FullFirst_Prefix_Suffix,
CASE
WHEN RIGHT(FullFirst_Prefix,3) = ' JR'
OR RIGHT(FullFirst_Prefix,4) = ' JR.'
THEN 'Jr'
WHEN RIGHT(FullFirst_Prefix,3) = ' SR'
OR RIGHT(FullFirst_Prefix,4) = ' SR.'
THEN 'Sr'
ELSE NULL
END AS Suffix,
FullName
FROM cte_Parse_Prefix
)
,cte_SpaceFix(Prefix, FullFirst_Prefix_Suffix, Suffix, FullName) AS
(
SELECT Prefix,
CASE
WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) > 2 THEN REPLACE(REPLACE(REPLACE(REPLACE(FullFirst_Prefix_Suffix,SPACE(5), SPACE(1)),SPACE(4), SPACE(1)),SPACE(3), SPACE(1)),SPACE(2), SPACE(1))
ELSE FullFirst_Prefix_Suffix
END AS FullFirst_Prefix_Suffix,
Suffix,
FullName
FROM cte_Parse_Suffix
)
SELECT * INTO #cte_SpaceFix
FROM cte_SpaceFix
;WITH cte_Parse_FirstName(Prefix, FirstName, Suffix, FullFirst_Prefix_Suffix_FirstName, FullName) AS
(
SELECT Prefix,
CASE
WHEN FullFirst_Prefix_Suffix IS NULL THEN NULL
WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) >= 1 THEN LEFT(FullFirst_Prefix_Suffix,CHARINDEX(' ',FullFirst_Prefix_Suffix))
WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) = 0 THEN FullFirst_Prefix_Suffix
ELSE NULL
END AS FirstName,
Suffix,
CASE
WHEN FullFirst_Prefix_Suffix IS NULL THEN NULL
WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) >= 1 THEN LTRIM(RTRIM(REPLACE(FullFirst_Prefix_Suffix,LEFT(FullFirst_Prefix_Suffix,CHARINDEX(' ',FullFirst_Prefix_Suffix)),'')))
WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) = 0 THEN NULL
ELSE NULL
END AS FullFirst_Prefix_Suffix_FirstName,
FullName
FROM #cte_SpaceFix
)
,cte_Parse_LastName(Prefix, FirstName, LastName, Suffix, MiddleName, FullName) AS
(
SELECT Prefix,
FirstName,
CASE
WHEN FullFirst_Prefix_Suffix_FirstName IS NULL THEN NULL
WHEN LEN(FullFirst_Prefix_Suffix_FirstName) - LEN(REPLACE(FullFirst_Prefix_Suffix_FirstName, ' ', '')) >= 1 THEN SUBSTRING(FullFirst_Prefix_Suffix_FirstName,CHARINDEX(' ',FullFirst_Prefix_Suffix_FirstName)+1,8000)
WHEN LEN(FullFirst_Prefix_Suffix_FirstName) - LEN(REPLACE(FullFirst_Prefix_Suffix_FirstName, ' ', '')) = 0 THEN FullFirst_Prefix_Suffix_FirstName
ELSE NULL
END AS LastName,
Suffix,
CASE
WHEN FullFirst_Prefix_Suffix_FirstName IS NULL THEN NULL
WHEN LEN(FullFirst_Prefix_Suffix_FirstName) - LEN(REPLACE(FullFirst_Prefix_Suffix_FirstName, ' ', '')) >= 1 THEN LEFT(FullFirst_Prefix_Suffix_FirstName,CHARINDEX(' ',FullFirst_Prefix_Suffix_FirstName))
ELSE NULL
END AS MiddleName,
FullName
FROM cte_Parse_FirstName
)
SELECT Prefix, FirstName, MiddleName, LastName, Suffix--, FullName
FROM cte_Parse_LastName
IF OBJECT_ID('tempdb..#cte_SpaceFix') IS NOT NULL
DROP TABLE #cte_SpaceFix

Related

Compare two tables and find the missing column using left join

I wanted to compare the two tables employees and employees_a and find the missing columns in the table comployees_a.
select a.Column_name,
From User_tab_columns a
LEFT JOIN User_tab_columns b
ON upper(a.table_name) = upper(b.table_name)||'_A'
AND a.column_name = b.column_name
Where upper(a.Table_name) = 'EMPLOYEES'
AND upper(b.table_name) = 'EMPLOYEES_A'
AND b.column_name is NULL
;
But this doesnt seems to be working. No rows are returned.
My employees table has the below columns
emp_name
emp_id
base_location
department
current_location
salary
manager
employees_a table has below columns
emp_name
emp_id
base_location
department
current_location
I want to find the rest two columns and add them into employees_a table.
I have more than 50 tables like this to compare them and find the missing column and add those columns into their respective "_a" table.
Missing columns? Why not using the MINUS set operator, seems to be way simpler, e.g.
select column_name from user_tables where table_name = 'EMP_1'
minus
select column_name from user_tables where table_name = 'EMP_2'
Thirstly, check if user_tab_columns table contains columns of your tables (in my case user_tab_columns is empty and I have to use all_tab_columns):
select a.Column_name
From User_tab_columns a
Where upper(a.Table_name) = 'EMPLOYEES'
Secondly, remove line AND upper(b.table_name) = 'EMPLOYEES_A', because upper(b.table_name) is null in case a column is not found. You have b.table_name in JOIN part of the SELECT already.
select a.Column_name
From User_tab_columns a
LEFT JOIN User_tab_columns b
ON upper(a.table_name) = upper(b.table_name)||'_A'
AND a.column_name = b.column_name
Where upper(a.Table_name) = 'EMPLOYEES'
AND b.column_name is NULL
You do not need any joins and can use:
select 'ALTER TABLE EMPLOYEES_A ADD "'
|| Column_name || '" '
|| CASE MAX(data_type)
WHEN 'NUMBER'
THEN 'NUMBER(' || MAX(data_precision) || ',' || MAX(data_scale) || ')'
WHEN 'VARCHAR2'
THEN 'VARCHAR2(' || MAX(data_length) || ')'
END
AS sql
From User_tab_columns
Where Table_name IN ('EMPLOYEES', 'EMPLOYEES_A')
GROUP BY COLUMN_NAME
HAVING COUNT(CASE table_name WHEN 'EMPLOYEES' THEN 1 END) = 1
AND COUNT(CASE table_name WHEN 'EMPLOYEES_A' THEN 1 END) = 0;
Or, for multiple tables:
select 'ALTER TABLE ' || MAX(table_name) || '_A ADD "'
|| Column_name || '" '
|| CASE MAX(data_type)
WHEN 'NUMBER'
THEN 'NUMBER(' || MAX(data_precision) || ',' || MAX(data_scale) || ')'
WHEN 'VARCHAR2'
THEN 'VARCHAR2(' || MAX(data_length) || ')'
END
AS sql
From User_tab_columns
Where Table_name IN ('EMPLOYEES', 'EMPLOYEES_A', 'SOMETHING', 'SOMETHING_A')
GROUP BY
CASE
WHEN table_name LIKE '%_A'
THEN SUBSTR(table_name, 1, LENGTH(table_name) - 2)
ELSE table_name
END,
COLUMN_NAME
HAVING COUNT(CASE WHEN table_name NOT LIKE '%_A' THEN 1 END) = 1
AND COUNT(CASE WHEN table_name LIKE '%_A' THEN 1 END) = 0;
fiddle

Sort data before concatenating using STUFF FOR XML

I have the following query that I am using for an SSRS Report:
SELECT ROW_NUMBER() OVER ( ORDER BY Judge.EventJudgeID ) AS JudgeRow ,
Judge.EventID ,
Judge.Judge_PersonID ,
STUFF(( SELECT DISTINCT
',' + CAST(Fights.FightNumber AS VARCHAR(MAX)) AS [text()]
FROM dbo.tblFights Fights ,
dbo.tblFightJudge FRJudge
WHERE Fights.FightID = FRJudge.fightid
AND ( Judge.Judge_PersonID = FRJudge.judge1id
OR Judge.Judge_PersonID = FRJudge.judge2id
OR Judge.Judge_PersonID = FRJudge.judge3id
)
FOR
XML PATH('')
), 1, 1, '') AS BoutsJudged ,
Persons.LastName + ' ' + Persons.FirstName AS JudgeName ,
Events.EventName ,
Events.EventDate
FROM dbo.tblEventJudge Judge
INNER JOIN dbo.tblPersons Persons ON PersonID = Judge_PersonID
INNER JOIN dbo.tblEvents Events ON Events.EventID = Judge.EventID
WHERE Judge.EventID = 1278;
The problem is that the STUFF command returns the following string:
1,10,11,12,13,14,15,16,17,18,19,2,3,4,5,6,7,8,9
How can I make it sort the numbers before concatenating it into a string?
Try this
SELECT ROW_NUMBER() OVER ( ORDER BY Judge.EventJudgeID ) AS JudgeRow ,
Judge.EventID ,
Judge.Judge_PersonID ,
STUFF(Select ',' + CAST(Fights.FightNumber AS VARCHAR(MAX)) AS [text()] From ( SELECT DISTINCT Fights.FightNumber
FROM dbo.tblFights Fights ,
dbo.tblFightJudge FRJudge
WHERE Fights.FightID = FRJudge.fightid
AND ( Judge.Judge_PersonID = FRJudge.judge1id
OR Judge.Judge_PersonID = FRJudge.judge2id
OR Judge.Judge_PersonID = FRJudge.judge3id
)
) X
ORDER BY Fights.FightNumber
FOR
XML PATH('')
), 1, 1, '') AS BoutsJudged ,
Persons.LastName + ' ' + Persons.FirstName AS JudgeName ,
Events.EventName ,
Events.EventDate
FROM dbo.tblEventJudge Judge
INNER JOIN dbo.tblPersons Persons ON PersonID = Judge_PersonID
INNER JOIN dbo.tblEvents Events ON Events.EventID = Judge.EventID
WHERE Judge.EventID = 1278;
You can check below sqls,
Before :
Select *,
STUFF((Select Distinct ','+Cast(high as varchar(MAX))
from master..spt_values where type = 'p' and number < 20
for xml Path('')),1,1,'')
from master..spt_values where type = 'p' and number < 20
After :
Select *,
STUFF((Select ','+Cast(high as varchar(MAX)) from (Select distinct high
from master..spt_values where type = 'p' and number < 20) x Order by high for xml Path('')),1,1,'')
from master..spt_values where type = 'p' and number < 20
I apologize for this solution being pedantic, but I have a hard time parsing code and need to see things in steps. Also, Microsoft adds a feature to do this in the 2012 release, but this code should work in most releases. First, use a database open to users in most SQLServers...
USE MASTER; SELECT TOP 3 TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION FROM INFORMATION_SCHEMA.COLUMNS;
/*TABLE_NAME COLUMN_NAME ORDINAL_POSITION
spt_fallback_db xserver_name 1
spt_fallback_db xdttm_ins 2
spt_fallback_db xdttm_last_ins_upd 3
*/
Now, breaking down this approach (to sorting a list within a column)...
(1) Adding FOR XML PATH('') to a 1 column query pivots it to one row, but adds XML tags for the column header...
SELECT TOP 3 COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS FOR XML PATH('');
/*<COLUMN_NAME>xserver_name</COLUMN_NAME><COLUMN_NAME>xdttm_ins</COLUMN_NAME><COLUMN_NAME>xdttm_last_ins_upd</COLUMN_NAME>*/
(2) Concatination nullifies the column header, eliminating the tags. Any string will work, I want comma space...
SELECT TOP 3 ', ' + COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS FOR XML PATH('');
/*, xserver_name, xdttm_ins, xdttm_last_ins_upd*/
(3) Other columns will need their own SELECT, so FOR XML must be a subquery, and ORDER BY is a legal prefix in a FOR XML subquery ;)...
SELECT TOP 2 TABLE_NAME
, (SELECT ', ' + COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMNS.TABLE_NAME = TABLES.TABLE_NAME
ORDER BY ORDINAL_POSITION FOR XML PATH('')
) LIST_OF_COLUMNS
FROM INFORMATION_SCHEMA.TABLES
ORDER BY TABLE_NAME;
/*TABLE_NAME LIST_OF_COLUMNS
spt_fallback_db , xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_dbid, name, dbid, status, version
spt_fallback_dev , xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_low, xfallback_drive, low, high, status, name, phyname
*/
(4) Finally, SUBSTRING is more familiar to me than STUFF for removing a known prefix...
SELECT TOP 2 TABLE_NAME
, SUBSTRING((SELECT ', ' + COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMNS.TABLE_NAME = TABLES.TABLE_NAME
ORDER BY ORDINAL_POSITION FOR XML PATH('')
)
, 2+1--Add 1 to start substring after the first 2 characters
, 99999) LIST_OF_COLUMNS
FROM INFORMATION_SCHEMA.TABLES
ORDER BY TABLE_NAME;
/*TABLE_NAME LIST_OF_COLUMNS
spt_fallback_db xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_dbid, name, dbid, status, version
spt_fallback_dev xserver_name, xdttm_ins, xdttm_last_ins_upd, xfallback_low, xfallback_drive, low, high, status, name, phyname
*/
Pedantically yours - Jim Gettman

Postgres substring error

I am receiving an error when creating a view converted from code at website http://pratchev.blogspot.com/2007/02/passing-variable-to-in-list.html. ERROR: function pg_catalog.substring(text,bigint,integer) does not exist; #7. Appreciate your help.
Code:
WITH recursive Hierarchy(ChildId, SubRepInitials, ParentId, Parents, steps)
AS
(
SELECT salesforceid, salesforceinitials, parentid, CAST('' AS TEXT), 0 as steps
FROM tblbulksalesforce AS FirstGeneration
WHERE parentid IS NULL AND salesforceinitials IS NOT NULL
UNION ALL
SELECT NextGeneration.salesforceid, NextGeneration.salesforceinitials, Parent.ChildId,
CAST(CASE WHEN Parent.Parents = ''
THEN(CAST(NextGeneration.parentid AS TEXT) || ',')
ELSE(Parent.Parents || CAST(NextGeneration.parentid AS TEXT) || ',')
END AS TEXT), Parent.steps +1 as steps
FROM tblbulksalesforce AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.parentid = Parent.ChildId
WHERE NextGeneration.salesforceinitials IS NOT NULL
)
SELECT ISNULL(h.ParentId,h.ChildId) AS ParentId, h.ChildId
, h.SubRepInitials, h.Parents, steps
,Generation0.salesforceinitials AS RepInitials
,parent.salesforceinitials AS RepInitialsParent
FROM Hierarchy AS h
LEFT JOIN tblbulksalesforce AS parent ON parent.RecordID = h.ParentId
LEFT JOIN tblbulksalesforce AS Generation0 ON Generation0.RecordID IN (
(SELECT SUBSTRING(string, 2, strpos(',', string, 2) - 2)
FROM (SELECT SUBSTRING(list, n, character_length(list))
FROM (SELECT ',' || h.Parents || ',') AS L(list),
(SELECT ROW_NUMBER() OVER (ORDER BY parentid)
FROM Hierarchy) AS Nums(n)
WHERE n <= character_length(list)) AS D(string)
WHERE character_length(string) > 1 AND SUBSTRING(string, 1, 1) = ',')
) OR Generation0.RecordID = h.ChildId;
Please see Postgres docs for how to use strpos() and substr(). Note that substring() is a completely different function with a different format for its arguments.
Try this:
WITH recursive Hierarchy(ChildId, SubRepInitials, ParentId, Parents, steps)
AS
(
SELECT salesforceid, salesforceinitials, parentid, CAST('' AS TEXT), 0 as steps
FROM tblbulksalesforce AS FirstGeneration
WHERE parentid IS NULL AND salesforceinitials IS NOT NULL
UNION ALL
SELECT NextGeneration.salesforceid, NextGeneration.salesforceinitials, Parent.ChildId,
CAST(CASE WHEN Parent.Parents = ''
THEN(CAST(NextGeneration.parentid AS TEXT) || ',')
ELSE(Parent.Parents || CAST(NextGeneration.parentid AS TEXT) || ',')
END AS TEXT), Parent.steps +1 as steps
FROM tblbulksalesforce AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.parentid = Parent.ChildId
WHERE NextGeneration.salesforceinitials IS NOT NULL
)
SELECT ISNULL(h.ParentId,h.ChildId) AS ParentId, h.ChildId
, h.SubRepInitials, h.Parents, steps
,Generation0.salesforceinitials AS RepInitials
,parent.salesforceinitials AS RepInitialsParent
FROM Hierarchy AS h
LEFT JOIN tblbulksalesforce AS parent ON parent.RecordID = h.ParentId
LEFT JOIN tblbulksalesforce AS Generation0 ON Generation0.RecordID IN (
(SELECT SUBSTR(string, 2, strpos(',', string) - 2) -- you had a 2 sitting in strpos(',', string, 2) before. I'm not sure what you were trying to do with that.
FROM (SELECT SUBSTR(list, n, character_length(list))
FROM (SELECT ',' || h.Parents || ',') AS L(list),
(SELECT ROW_NUMBER() OVER (ORDER BY parentid)
FROM Hierarchy) AS Nums(n)
WHERE n <= character_length(list)) AS D(string)
WHERE character_length(string) > 1 AND SUBSTR(string, 1, 1) = ',')
) OR Generation0.RecordID = h.ChildId;

Percentage of Values for Top 3 from a Character Field

I have an unusual situation. Please consider the following code:
IF OBJECT_ID('tempdb..#CharacterTest') IS NOT NULL
DROP TABLE #CharacterTest
CREATE TABLE #CharacterTest
(
[ID] int IDENTITY(1, 1) NOT NULL,
[CharField] varchar(50) NULL
)
INSERT INTO #CharacterTest (CharField)
VALUES ('A')
, ('A')
, ('A')
, ('A')
, ('B')
, ('B')
, ('B')
, ('C')
, ('C')
, ('D')
, ('D')
, ('F')
, ('G')
, ('H')
, ('I')
, ('J')
, ('K')
, ('L')
, ('M')
, ('N')
, (' ')
, (' ')
, (' ')
, (NULL)
, ('');
I would like a query which gives me a character string like this:
A (16%), B (12%), C(8%)
Please notice the following:
I don't want to have empty strings, strings with all blanks, or nulls listed in the top 3, but I do want the percentage of values calculated using the entire record count for the table.
Ties can be ignored, so if there were 22 values in the list with 8% frequency, it's alright to simply return whichever one is first.
Percentages can be rounded to whole numbers.
I'd like to find the easiest way to write this query while still retaining T-SQL compatibility back to SQL Server 2005. What is the best way to do this? Window Functions?
I'd go for.
WITH T1
AS (SELECT [CharField],
100.0 * COUNT(*) OVER (PARTITION BY [CharField]) /
COUNT(*) OVER () AS Pct
FROM #CharacterTest),
T2
AS (SELECT DISTINCT TOP 3 *
FROM T1
WHERE [CharField] <> '' --Excludes all blank or NULL as well
ORDER BY Pct DESC)
SELECT STUFF((SELECT ',' + [CharField] + ' (' + CAST(CAST(ROUND(Pct,1) AS INT) AS VARCHAR(3)) + ')'
FROM T2
ORDER BY Pct DESC
FOR XML PATH('')), 1, 1, '') AS Result
My first attempt would probably be this. Not saying that it's the best way to handle it, but that it would work.
DECLARE #TotalCount INT
SELECT #TotalCount = COUNT(*) FROM #CharacterTest AS ct
SELECT TOP(3) CharField, COUNT(*) * 1.0 / #TotalCount AS OverallPercentage
FROM #CharacterTest AS ct
WHERE CharField IS NOT NULL AND REPLACE(CharField, ' ', '') <> ''
GROUP BY CharField
ORDER BY COUNT(*) desc
DROP TABLE #CharacterTest
This should get the character string you need:
declare #output varchar(200);
with cte as (
select CharField
, (count(*) * 100) / (select count(*) from #CharacterTest) as CharPct
, row_number() over (order by count(*) desc, CharField) as RowNum
from #CharacterTest
where replace(CharField, ' ', '') not like ''
group by CharField
)
select #output = coalesce(#output + ', ', '') + CharField + ' (' + cast(CharPct as varchar(11)) + '%)'
from cte
where RowNum <= 3
order by RowNum;
select #output;
-- Returns:
-- A (16%), B (12%), C (8%)
I would draw attention to storing a single character in a varchar(50) column, however.

TSQL not generating a new value per row

I'm trying to anonymize all the data in my database, so I'm renaming all the people in it. I asked a similar question earlier, and was told to use NewID to force the creation of a new value per updated row, but in this situation it doesn't seem to be working.
What am I doing wrong?
-- Create Table Customer
CREATE TABLE #FirstName
(
ID int,
FirstName nvarchar(255) NULL,
Gender nvarchar(255) NULL
)
CREATE TABLE #LastName (
ID int,
LastName nvarchar(255)
)
-- BULK INSERT to import data from Text or CSV File
BULK INSERT #FirstName
FROM 'C:\Users\jhollon\Desktop\tmp\names\firstnames.lined.txt'
WITH
(
FIRSTROW = 1,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
BULK INSERT #LastName
FROM 'C:\Users\jhollon\Desktop\tmp\names\lastnames.lined.txt'
WITH
(
FIRSTROW = 1,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
/*SELECT FirstName FROM #FirstName WHERE ID = (
SELECT RandomNumber FROM (
SELECT ABS(CHECKSUM(NewID())) % 1500 AS RandomNumber FROM tblTenant WHERE Sex = '1'
) AS A
);*/
UPDATE tblTenant SET TenantName = (
SELECT LastName + ', ' + FirstName FROM
(SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1501)) AS A,
(SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '2';
UPDATE tblTenant SET TenantName = (
SELECT LastName + ', ' + FirstName FROM
(SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1)) AS A,
(SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '1';
DROP TABLE #FirstName;
DROP TABLE #LastName;
Correct. The subquery is evaluated once which is as advertised ("cachable scalar subquery")
Try this which uses NEWID as a derived table
UPDATE T
SET
TenantName = L.LastName + ', ' + F.FirstName
FROM
tblTenant T
CROSS APPLY
(SELECT TOP 1 UPPER(FirstName) as FirstName FROM #FirstName
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()) F
CROSS APPLY
(SELECT TOP 1 LastName FROM #LastName
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()) L
I'm not sure I understand your question, but if you want the ID to be unique values, you can make it an identity column.
Ex:
[ID] [int] IDENTITY(1,1) NOT NULL
The code below demonstrates that without an inner to outer correlation, that the old name is not guaranteed to differ from the new name when using the CROSS APPLY answer above.
WHERE F.Id <> T.Id ORDER BY NEWID() would be better within the FirstName CROSS APPLY
USE tempdb
GO
IF OBJECT_ID('tblTenant') IS NOT NULL
DROP TABLE tblTenant
GO
CREATE TABLE tblTenant
(
Id int,
FirstName nvarchar(20),
LastName nvarchar(20),
Gender bit
)
INSERT INTO tblTenant
VALUES (1, 'Bob' , 'Marley', 1),
(2, 'Boz' , 'Skaggs', 1)
SELECT DISTINCT FirstName
INTO #FirstNames
FROM tblTenant
SELECT DISTINCT LastName
INTO #LastNames
FROM tblTenant
-- There is a probability > 0 that a tenant's new name = tenants old name
SELECT
OldFirst = T.FirstName,
OldLast = T.LastName,
NewFirst = F.FirstName,
NewLast = L.LastName
FROM
tblTenant T
CROSS APPLY
(
SELECT TOP 1 UPPER(FirstName) AS FirstName
FROM #FirstNames
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()
) F
CROSS APPLY
(
SELECT TOP 1 LastName
FROM #LastNames
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()
) L