Saving Substring's from a Cross Apply into A Variable - tsql

I need to transform a column returning '12 x 18 x 18 in' into 3 different columns by just grabbing the 12 18 18.
I need to store these into variables to do an insert further down because this is part of a large stored procedure doing data translation from one DB to another.
Using an example from a previous question I created this:
DECLARE
#HeightT VARCHAR(10),
#LengthT VARCHAR(10),
#WidthT VARCHAR(10)
SELECT TOP 1 ParsedData.*
FROM DB.dbo.Table sh
CROSS apply ( select str = sh.Dimensions + 'xxx in' ) f1
CROSS apply ( select p1 = charindex( 'x', str ) ) ap1
CROSS apply ( select p2 = charindex( 'x', str, p1 + 1 ) ) ap2
CROSS apply ( select p3 = charindex( 'x', str, p2 + 1 ) ) ap3
CROSS apply ( select substring( str, 1, p1-1 ) Height
, substring( str, p1+1, p2-p1-1 ) Length
, substring ( str, p2+1, p3 -p2 - 3) Width
WHERE ID = '1111111'
)
ParsedData
-From How to split a comma-separated value to columns
Is it possible to to grab the Height, Length and Width to store into those variables so I can then use them in an insert later on in the stored procedure?
*Note this does work as far as just selecting the parsed values, but this is not what I need. The cross apply is the part giving me difficulties.
Answer:
Using T I's suggestion I made this:
DECLARE #tbl TABLE (col VARCHAR(255))
INSERT INTO #tbl (col)
SELECT Dimensions
FROM DB.dbo.Table
WHERE ID = '1111111'
DECLARE #Height INT, #Length INT, #Width INT, #Temp varchar(5)
SELECT TOP 1
#Height = CAST(LEFT(col, CHARINDEX('x', col)-1) AS int),
#Length = CAST(SUBSTRING(col, CHARINDEX('x', col)+1, CHARINDEX('x', REVERSE(col))-CHARINDEX('x', col)) AS int),
#Temp = CAST(REVERSE(LEFT(REVERSE(col), CHARINDEX('x', REVERSE(col))-2)) AS VARCHAR(8))
FROM #tbl
SET #Width = SUBSTRING(#Temp,1,(CHARINDEX(' ',#Temp + ' ')-1) )
SELECT Height = #height, Length = #Length, Width = #Width

This can be done without cross apply.
DECLARE #tbl TABLE (col VARCHAR(255))
INSERT INTO #tbl (col)
VALUES ('18 x 18 x 12')
DECLARE #height INT, #width INT, #depth INT
SELECT TOP 1
#height = CAST(LEFT(col, CHARINDEX('x', col)-1) AS int),
#width = CAST(LEFT(STUFF(col, 1, CHARINDEX('x', col)+1, ''), CHARINDEX('x', STUFF(col, 1, CHARINDEX('x', col)+1, ''))-1) AS int),
#depth = CAST(REVERSE(LEFT(REVERSE(col), CHARINDEX('x', REVERSE(col))-1)) AS int)
FROM #tbl
SELECT height = #height, width = #width, depth = #depth

Related

Sum of all digits boiled down into a single digit

I have a table as under
numbers
794
709090
what I need is a sum of all the digits such as 7+9+4 = 20; 2+0=2 or 7+0+9+0+9+0=25; 2+5=7.
I tried with the below script but somehow not working:
declare #t table(numbers int)
insert into #t select 794 union all select 709090
declare #maxValue int
select #maxValue = max(numbers) from #t
;with cte as(
SELECT SUM(CAST(SUBSTRING(cast(numbers as varchar(1000)),number,1) AS INT)) SUMOFDIGITS FROM #t
cross apply (
SELECT DISTINCT number FROM
MASTER..SPT_VALUES WHERE number > 0 AND number <= DATALENGTH(#maxValue) ) x)
select SUMOFDIGITS, finalsum = cast(left(SUMOFDIGITS,1) as int)+cast(right(SUMOFDIGITS,1) as int)
from cte
DECLARE #a int = 709090
select (#a - 1) % 9 + 1

reuse table data in round robin manner

Let us say I have some data I would like to repeat N times. A naive approach would be this:
IF OBJECT_ID('dbo.Data', 'U') IS NOT NULL
DROP TABLE dbo.Data
CREATE TABLE Data
(
DataId INT NOT NULL PRIMARY KEY,
DataValue NVARCHAR(MAX) NOT NULL
)
INSERT INTO Data (DataId, DataValue)
SELECT 1, 'Value1' UNION ALL
SELECT 2, 'Value2' UNION ALL
SELECT 3, 'Value3' UNION ALL
SELECT 4, 'Value4' UNION ALL
SELECT 5, 'Value5'
DECLARE #RowsRequired INT
DECLARE #Counter INT
DECLARE #NumberOfRows INT
SET #RowsRequired = 22
IF OBJECT_ID('tempdb..#TempData') IS NOT NULL DROP TABLE #TempData
CREATE TABLE #TempData
(
Id INT IDENTITY(1,1),
DataValue NVARCHAR(MAX)
)
SELECT #NumberOfRows = COUNT(*) FROM Data
SET #Counter = 1
WHILE #RowsRequired > 0
BEGIN
INSERT INTO #TempData
SELECT DataValue FROM Data WHERE DataId = #Counter
SET #Counter = #Counter + 1
SET #RowsRequired = #RowsRequired - 1
IF(#Counter > #NumberOfRows)
BEGIN
SET #Counter = 1
END
END
SELECT * FROM #TempData
Here #RowsRequired determines how many rows are required. Could this be rephrased in a set based form? Thanks.
Here is a SQLFiddle with the code.
Try this instead:
DECLARE #RowsRequired INT = 22
;WITH CTE AS
(
SELECT DataId, DataValue, ROW_NUMBER() over (PARTITION BY DataId ORDER BY DataId) sort
FROM DATA
CROSS JOIN
(
SELECT TOP (#RowsRequired) 0 d
FROM master..spt_values
) d
)
SELECT TOP (#RowsRequired) ROW_NUMBER() over (order by sort), DataValue
FROM CTE
ORDER BY sort, 1
I tried this and worked for me.
declare #requiredrows int
set #requiredrows = 22;
declare #foreachrow int
select #foreachrow = #requiredrows / Count(*) from Data;
select top (#requiredrows) * from
(
select *, ROW_NUMBER() over(partition by dataId order by number) rno
from Data
Cross Join master..spt_values
) A
where rno <= #foreachrow + 1
Hope it will help.

T-SQL Get percentage of character match of 2 strings

Let's say I have a set of 2 words:
Alexander and Alecsander OR Alexander and Alegzander
Alexander and Aleaxnder, or any other combination. In general we are talking about human error in typing of a word or a set of words.
What I want to achieve is to get the percentage of matching of the characters of the 2 strings.
Here is what I have so far:
DECLARE #table1 TABLE
(
nr INT
, ch CHAR
)
DECLARE #table2 TABLE
(
nr INT
, ch CHAR
)
INSERT INTO #table1
SELECT nr,ch FROM [dbo].[SplitStringIntoCharacters] ('WORD w') --> return a table of characters(spaces included)
INSERT INTO #table2
SELECT nr,ch FROM [dbo].[SplitStringIntoCharacters] ('WORD 5')
DECLARE #resultsTable TABLE
(
ch1 CHAR
, ch2 CHAR
)
INSERT INTO #resultsTable
SELECT DISTINCt t1.ch ch1, t2.ch ch2 FROM #table1 t1
FULL JOIN #table2 t2 ON t1.ch = t2.ch --> returns both matches and missmatches
SELECT * FROM #resultsTable
DECLARE #nrOfMathches INT, #nrOfMismatches INT, #nrOfRowsInResultsTable INT
SELECT #nrOfMathches = COUNT(1) FROM #resultsTable WHERE ch1 IS NOT NULL AND ch2 IS NOT NULL
SELECT #nrOfMismatches = COUNT(1) FROM #resultsTable WHERE ch1 IS NULL OR ch2 IS NULL
SELECT #nrOfRowsInResultsTable = COUNT(1) FROM #resultsTable
SELECT #nrOfMathches * 100 / #nrOfRowsInResultsTable
The SELECT * FROM #resultsTable will return the following:
ch1 ch2
NULL 5
[blank] [blank]
D D
O O
R R
W W
Ok, here is my solution so far:
SELECT [dbo].[GetPercentageOfTwoStringMatching]('valentin123456' ,'valnetin123456')
returns 86%
CREATE FUNCTION [dbo].[GetPercentageOfTwoStringMatching]
(
#string1 NVARCHAR(100)
,#string2 NVARCHAR(100)
)
RETURNS INT
AS
BEGIN
DECLARE #levenShteinNumber INT
DECLARE #string1Length INT = LEN(#string1)
, #string2Length INT = LEN(#string2)
DECLARE #maxLengthNumber INT = CASE WHEN #string1Length > #string2Length THEN #string1Length ELSE #string2Length END
SELECT #levenShteinNumber = [dbo].[LEVENSHTEIN] ( #string1 ,#string2)
DECLARE #percentageOfBadCharacters INT = #levenShteinNumber * 100 / #maxLengthNumber
DECLARE #percentageOfGoodCharacters INT = 100 - #percentageOfBadCharacters
-- Return the result of the function
RETURN #percentageOfGoodCharacters
END
-- =============================================
-- Create date: 2011.12.14
-- Description: http://blog.sendreallybigfiles.com/2009/06/improved-t-sql-levenshtein-distance.html
-- =============================================
CREATE FUNCTION [dbo].[LEVENSHTEIN](#left VARCHAR(100),
#right VARCHAR(100))
returns INT
AS
BEGIN
DECLARE #difference INT,
#lenRight INT,
#lenLeft INT,
#leftIndex INT,
#rightIndex INT,
#left_char CHAR(1),
#right_char CHAR(1),
#compareLength INT
SET #lenLeft = LEN(#left)
SET #lenRight = LEN(#right)
SET #difference = 0
IF #lenLeft = 0
BEGIN
SET #difference = #lenRight
GOTO done
END
IF #lenRight = 0
BEGIN
SET #difference = #lenLeft
GOTO done
END
GOTO comparison
COMPARISON:
IF ( #lenLeft >= #lenRight )
SET #compareLength = #lenLeft
ELSE
SET #compareLength = #lenRight
SET #rightIndex = 1
SET #leftIndex = 1
WHILE #leftIndex <= #compareLength
BEGIN
SET #left_char = substring(#left, #leftIndex, 1)
SET #right_char = substring(#right, #rightIndex, 1)
IF #left_char <> #right_char
BEGIN -- Would an insertion make them re-align?
IF( #left_char = substring(#right, #rightIndex + 1, 1) )
SET #rightIndex = #rightIndex + 1
-- Would an deletion make them re-align?
ELSE IF( substring(#left, #leftIndex + 1, 1) = #right_char )
SET #leftIndex = #leftIndex + 1
SET #difference = #difference + 1
END
SET #leftIndex = #leftIndex + 1
SET #rightIndex = #rightIndex + 1
END
GOTO done
DONE:
RETURN #difference
END
Ultimately, you appear to be looking to solve for the likelihood that two strings are a "fuzzy" match to one another.
SQL provides efficient, optimized built-in functions that will do that for you, and likely with better performance than what you have written. The two functions you are looking for are SOUNDEX and DIFFERENCE.
While neither of them solves exactly what you asked for - i.e. they do not return a percentage match - I believe they solve what you are ultimately trying to achieve.
SOUNDEX returns a 4-character code which is the first letter of the word plus a 3-number code that represents the sound pattern of the word. Consider the following:
SELECT SOUNDEX('Alexander')
SELECT SOUNDEX('Alegzander')
SELECT SOUNDEX('Owleksanndurr')
SELECT SOUNDEX('Ulikkksonnnderrr')
SELECT SOUNDEX('Jones')
/* Results:
A425
A425
O425
U425
J520
*/
What you will notice is that the three-digit number 425 is the same for all of the ones that roughly sound alike. So you could easily match them up and say "You typed 'Owleksanndurr', did you perhaps mean 'Alexander'?"
In addition, there's the DIFFERENCE function, which compares the SOUNDEX discrepancy between two strings and gives it a score.
SELECT DIFFERENCE( 'Alexander','Alexsander')
SELECT DIFFERENCE( 'Alexander','Owleksanndurr')
SELECT DIFFERENCE( 'Alexander', 'Jones')
SELECT DIFFERENCE( 'Alexander','ekdfgaskfalsdfkljasdfl;jl;asdj;a')
/* Results:
4
3
1
1
*/
As you can see, the lower the score (between 0 and 4), the more likely the strings are a match.
The advantage of SOUNDEX over DIFFERENCE is that if you really need to do frequent fuzzy matching, you can store and index the SOUNDEX data in a separate (indexable) column, whereas DIFFERENCE can only calculate the SOUNDEX at the time of comparison.

SQL Server 2008 T-SQL UDF Split() Tailoring

I'm useing SQL Ser 2008 and have a large table with only one column of data. The data is a random string with very little consistency. Eample: Name Account 445566 0010020056893010445478008 AFD 369. I've been working with a split function that a stackoverflow user suggested. It works great but the function assigns the split string into one column. I need a row of individual columns. The present result is 1col with values Name, Account, 445566,... in it but the result I'm looking for is col1 Name, col2 Account, col3 445566,...
If anyone could provide some insight on how to tailor this script or its usage to get the desired result it would be much appreciated.
CREATE FUNCTION [dbo].[Split]
(
#String varchar(max)
,#Delimiter char
)
RETURNS #Results table
(
Ordinal int
,StringValue varchar(max)
)
as
begin
set #String = isnull(#String,'')
set #Delimiter = isnull(#Delimiter,'')
declare
#TempString varchar(max) = #String
,#Ordinal int = 0
,#CharIndex int = 0
set #CharIndex = charindex(#Delimiter, #TempString)
while #CharIndex != 0 begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,substring(#TempString, 0, #CharIndex)
)
set #TempString = substring(#TempString, #CharIndex + 1, len(#TempString) - #CharIndex)
set #CharIndex = charindex(#Delimiter, #TempString)
end
if #TempString != '' begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,#TempString
)
end
return
end
--The usage:
SELECT
*
FROM
mytable M
CROSS APPLY
[dbo].[Split] (M.TheColumn, ' ') S
Where rtrim(s.StringValue) != ''
If you know that you have 6 columns in the string you can use a split functions that looks like this and of course modify the function to whatever number of columns you want. A function can not return a dynamic number of columns.
create function dbo.Split6(#String varchar(max), #Delimiter char(1))
returns table as return
(
select
substring(T.Col, 1, S1.Pos-1) as Col1,
substring(T.Col, S1.Pos+1, S2.Pos-S1.Pos-1) as Col2,
substring(T.Col, S2.Pos+1, S3.Pos-S2.Pos-1) as Col3,
substring(T.Col, S3.Pos+1, S4.Pos-S3.Pos-1) as Col4,
substring(T.Col, S4.Pos+1, S5.Pos-S4.Pos-1) as Col5,
substring(T.Col, S5.Pos+1, S6.Pos-S5.Pos-1) as Col6
from (select #String+replicate(#Delimiter, 6)) as T(Col)
cross apply (select charindex(#Delimiter, T.Col, 1)) as S1(Pos)
cross apply (select charindex(#Delimiter, T.Col, S1.Pos+1)) as S2(Pos)
cross apply (select charindex(#Delimiter, T.Col, S2.Pos+1)) as S3(Pos)
cross apply (select charindex(#Delimiter, T.Col, S3.Pos+1)) as S4(Pos)
cross apply (select charindex(#Delimiter, T.Col, S4.Pos+1)) as S5(Pos)
cross apply (select charindex(#Delimiter, T.Col, S5.Pos+1)) as S6(Pos)
)
Test:
declare #T table (Col varchar(100))
insert into #T values
('Name Account 445566 0010020056893010445478008 AFD 369'),
(''),
('1 2'),
('1 3')
select S.Col1, S.Col2, S.Col3, S.Col4, S.Col5, S.Col6
from #T as T
cross apply
dbo.Split6(T.Col, ' ') as S
Result:
Col1 Col2 Col3 Col4 Col5 Col6
---- ------- ------ ------------------------- ---- ----
Name Account 445566 0010020056893010445478008 AFD 369
1 2
1 3
You might try using a PIVOT.
http://msdn.microsoft.com/en-us/library/ms177410.aspx

How to create a date lookup table to speed up stored procs?

I want to reduce the time it takes for one of my stored procs that currently uses the following logic to calculate the date field, both in Select and Group portion:
left(datename(month, a.QXP_REPORT_DATE), 3) + ' ''' + right(datename(year, a.QXP_REPORT_DATE), 2)
Would a simple lookup table take less time? If so, then how would I populate for the following fields for all dates in the last 2 years?
CREATE TABLE #CALENDAR(
FULLDATE DATETIME,
MONTHNAME NVARCHAR(3),
sYEAR SMALLINT
)
INSERT INTO #CALENDAR
SELECT '4/19/2011', left(datename(month, '4/19/2011'), 3), right(datename(year, '4/19/2011'), 2)
I'm starting to think maybe a function call would be better than a lookup table. Here is all of my SQL stored proc:
DECLARE
#FirstMonthDate DATETIME,
#LastMonthDate DATETIME,
#TheLevel INT,
#ProductGroup VARCHAR(255),
#TheCategory VARCHAR(255),
#ListNumber VARCHAR(50)
--AS
-- SET NOCOUNT ON;
--ComplaintTrendingDrillDown3p '3/1/10', '3/31/11 23:59:59', 3 , 'RealTime IVD', 'Procedure Not Followed', ''
SET #FirstMonthDate = '3/1/11'
SET #LastMonthDate = '3/31/11 23:59:59'
SET #TheLevel = 3
SET #ProductGroup = 'RealTime IVD'
SET #TheCategory = 'Procedure Not followed'
--SET #ListNumber = '2G31-90'
DECLARE #SelectedLevels table (LevelId int not null primary key)
declare #OneYearAgo datetime
set #OneYearAgo = dateadd(year, -1, #FirstMonthDate)
IF #TheLevel = 3
BEGIN
INSERT INTO #SelectedLevels (LevelId) VALUES (1)
INSERT INTO #SelectedLevels (LevelId) VALUES (2)
END
ELSE if #TheLevel = 5
BEGIN
INSERT INTO #SelectedLevels (LevelId) VALUES (0)
INSERT INTO #SelectedLevels (LevelId) VALUES (1)
INSERT INTO #SelectedLevels (LevelId) VALUES (2)
END
ELSE
BEGIN
INSERT INTO #SelectedLevels (LevelId) VALUES (#TheLevel)
END
SELECT count(distinct a.QXP_EXCEPTION_NO) AS QXP_EXCEPTION_NO, PRODUCT_CODE_STD, a.qxp_short_desc,
left(datename(month, a.QXP_REPORT_DATE), 3) + ' ''' +
right(datename(year, a.QXP_REPORT_DATE), 2) AS MonthYear ,
CASE WHEN a.QXP_SHORT_DESC = #TheCategory OR ISNULL(#TheCategory, '') = '' THEN 1 ELSE 0 END AS SELECTED_CATEGORY
FROM ALL_COMPLAINTS a
INNER JOIN #SelectedLevels F ON A.[LEVEL] = F.LevelId
LEFT OUTER JOIN MANUAL.PRODUCTS b ON a.EPA_PRD_CODE = b.LIST_NUMBER
LEFT OUTER JOIN SMARTSOLVE.V_CXP_CUSTOMER_PXP c ON a.QXP_ID = c.QXP_ID
WHERE a.QXP_REPORT_DATE >= #OneYearAgo AND
a.QXP_REPORT_DATE <= #LastMonthDate AND a.QXP_SHORT_DESC <> 'Design Control'
AND (c.QXP_EXCEPTION_TYPE <> 'Non-Diagnostic' OR c.QXP_EXCEPTION_TYPE IS NULL)
AND PRODUCT_GROUP= #ProductGroup
AND (PRODUCT_CODE_STD = #ListNumber OR ISNULL(#ListNumber, '') = '')
and left(datename(month, a.QXP_REPORT_DATE), 3) = 'may'
GROUP BY PRODUCT_CODE_STD, left(datename(month, a.QXP_REPORT_DATE), 3) + ' ''' + right(datename(year, a.QXP_REPORT_DATE), 2) , a.qxp_short_desc
order by left(datename(month, a.QXP_REPORT_DATE), 3) + ' ''' +
right(datename(year, a.QXP_REPORT_DATE), 2), product_code_std, qxp_short_desc
Execution plan recommendations:
CREATE NONCLUSTERED INDEX [<Name of Missing Index, sysname,>]
ON [SMARTSOLVE].[V_CXP_CUSTOMER_PXP] ([QXP_REPORT_DATE],[QXP_UDF_STRING_8],[QXP_XRS_DESCRIPTION])
INCLUDE ([QXP_ID],[QXP_EXCEPTION_NO],[QXP_BASE_EXCEPTION],[QXP_OCCURENCE_DATE],[QXP_COORD_ID],[QXP_ROOT_CAUSE],[QXP_DESCRIPTION],[QXP_QEI_ID],[QXP_EXCEPTION_TYPE],[QXP_UDF_STRING_2],[QXP_UDF_STRING_5],[CXP_ID],[CXP_AWARE_DATE],[QXP_XSV_CODE],[QXP_COORD_NAME],[QXP_ORU_NAME],[QXP_RESOLUTION_DESC],[QXP_CLOSED_DATE],[CXP_CLIENT_CODE],[CXP_CLIENT_NAME])