one column split to more column sql server 2008? - tsql

Table name: Table1
id name
1 1-aaa-14 milan road
2 23-abcde-lsd road
3 2-mnbvcx-welcoome street
I want the result like this:
Id name name1 name2
1 1 aaa 14 milan road
2 23 abcde lsd road
3 2 mnbvcx welcoome street

This function ought to give you what you need.
--Drop Function Dbo.Part
Create Function Dbo.Part
(#Value Varchar(8000)
,#Part Int
,#Sep Char(1)='-'
)Returns Varchar(8000)
As Begin
Declare #Start Int
Declare #Finish Int
Set #Start=1
Set #Finish=CharIndex(#Sep,#Value,#Start)
While (#Part>1 And #Finish>0)Begin
Set #Start=#Finish+1
Set #Finish=CharIndex(#Sep,#Value,#Start)
Set #Part=#Part-1
End
If #Part>1 Set #Start=Len(#Value)+1 -- Not found
If #Finish=0 Set #Finish=Len(#Value)+1 -- Last token on line
Return SubString(#Value,#Start,#Finish-#Start)
End
Usage:
Select ID
,Dbo.Part(Name,1,Default)As Name
,Dbo.Part(Name,2,Default)As Name1
,Dbo.Part(Name,3,Default)As Name2
From Dbo.Table1
It's rather compute-intensive, so if Table1 is very long you ought to write the results to another table, which you could refresh from time to time (perhaps once a day, at night).
Better yet, you could create a trigger, which automatically updates Table2 whenever a change is made to Table1. Assuming that column ID is primary key:
Create Table Dbo.Table2(
ID Int Constraint PK_Table2 Primary Key,
Name Varchar(8000),
Name1 Varchar(8000),
Name2 Varchar(8000))
Create Trigger Trigger_Table1 on Dbo.Table1 After Insert,Update,Delete
As Begin
If (Select Count(*)From Deleted)>0
Delete From Dbo.Table2 Where ID=(Select ID From Deleted)
If (Select Count(*)From Inserted)>0
Insert Dbo.Table2(ID, Name, Name1, Name2)
Select ID
,Dbo.Part(Name,1,Default)
,Dbo.Part(Name,2,Default)
,Dbo.Part(Name,3,Default)
From Inserted
End
Now, do your data manipulation (Insert, Update, Delete) on Table1, but do your Select statements on Table2 instead.

The below solution uses a recursive CTE for splitting the strings, and PIVOT for displaying the parts in their own columns.
WITH Table1 (id, name) AS (
SELECT 1, '1-aaa-14 milan road' UNION ALL
SELECT 2, '23-abcde-lsd road' UNION ALL
SELECT 3, '2-mnbvcx-welcoome street'
),
cutpositions AS (
SELECT
id, name,
rownum = 1,
startpos = 1,
nextdash = CHARINDEX('-', name + '-')
FROM Table1
UNION ALL
SELECT
id, name,
rownum + 1,
nextdash + 1,
CHARINDEX('-', name + '-', nextdash + 1)
FROM cutpositions c
WHERE nextdash < LEN(name)
)
SELECT
id,
[1] AS name,
[2] AS name1,
[3] AS name2
/* add more columns here */
FROM (
SELECT
id, rownum,
part = SUBSTRING(name, startpos, nextdash - startpos)
FROM cutpositions
) s
PIVOT ( MAX(part) FOR rownum IN ([1], [2], [3] /* extend the list here */) ) x
Without additional modifications this query can split names consisting of up to 100 parts (that's the default maximum recursion depth, which can be changed), but can only display no more than 3 of them. You can easily extend it to however many parts you want it to display, just follow the instructions in the comments.

select T.id,
substring(T.Name, 1, D1.Pos-1) as Name,
substring(T.Name, D1.Pos+1, D2.Pos-D1.Pos-1) as Name1,
substring(T.Name, D2.Pos+1, len(T.name)) as Name2
from Table1 as T
cross apply (select charindex('-', T.Name, 1)) as D1(Pos)
cross apply (select charindex('-', T.Name, D1.Pos+1)) as D2(Pos)
Testing performance of suggested solutions
Setup:
create table Table1
(
id int identity primary key,
Name varchar(50)
)
go
insert into Table1
select '1-aaa-14 milan road' union all
select '23-abcde-lsd road' union all
select '2-mnbvcx-welcoome street'
go 10000
Result:

if you always will have 2 dashes, you can do the following by using PARSENAME
--testing table
CREATE TABLE #test(id INT, NAME VARCHAR(1000))
INSERT #test VALUES(1, '1-aaa-14 milan road')
INSERT #test VALUES(2, '23-abcde-lsd road')
INSERT #test VALUES(3, '2-mnbvcx-welcoome street')
SELECT id,PARSENAME(name,3) AS name,
PARSENAME(name,2) AS name1,
PARSENAME(name,1)AS name2
FROM (
SELECT id,REPLACE(NAME,'-','.') NAME
FROM #test)x
if you have dots in the name column you have to first replace them and then replace them back to dots in the end
example, by using a tilde to substitute the dot
INSERT #test VALUES(3, '5-mnbvcx-welcoome street.')
SELECT id,REPLACE(PARSENAME(name,3),'~','.') AS name,
REPLACE(PARSENAME(name,2),'~','.') AS name1,
REPLACE(PARSENAME(name,1),'~','.') AS name2
FROM (
SELECT id,REPLACE(REPLACE(NAME,'.','~'),'-','.') NAME
FROM #test)x

Related

left join all orders on left on 1 row

I have a table Orders, it has the following columns:
OrderID, ClientID, BankNumber, Adres, Name;
I want to write a query that gives me this result: distinct clientid name and adres on one row with all the belonging orders and corespondating bankaccount numbers on one row: This is my example.
ClientID Adres Name order1 Banknumber Order2 Banknumber order3 Banknumber
First you cannot query something and come up with a results set with infinite number of columns, but you could combine orders and show them in 1 column.
if you are on SQL Azure or SQL2017 you can also use STRING_AGG like this:
select customer.Id, customer.Name, orderSummary.orderData
(select STRING_AGG(orderID+'-'+banknumber+', ') as orderData from orders where customerId = customer.Id) orderSummary
from Customers as customer
You can look at this post for more answers
How to concatenate text from multiple rows into a single text string in SQL server?
And Subquery from Microsoft:
https://technet.microsoft.com/en-us/library/ms189575(v=sql.105).aspx
here is a working sample.
Hope it works for you.
You need to build the Order1, order2, order3...and BankNumber1,BankNumber2...dynamically. I have hard coded in my example
drop table #t1
create table #t1(OrderID Int, ClientID int, BankNumber varchar(50), [Address] varchar(50), Name varchar(50))
insert into #t1
select 1,11,'111','xyz1','xyz'
union all
select 2,22,'112','xyz2','xyzz'
union all
select 3,33,'113','xyz3','xyzzz'
union all
select 100,11,'111','xyz1','xyz'
union all
select 200,22,'112','xyz2','xyzz'
union all
select 300,33,'113','xyz3','xyzzz'
;with cte as
(
select OrderID,ClientID,BankNumber,Address,Name,ROW_NUMBER()over (partition by clientid order by orderid asc) RN
from #t1
)
select ClientID
,max([Order 1]) Order1
,max([Order 2]) Order2
,max([BankNumber 1]) BankNumber1
,max([BankNumber 2]) BankNumber2
from
(
select ClientID,Address,Name,OrderID,BankNumber,'Order '+cast(rn as varchar(10)) OrderSeq
,'BankNumber '+cast(rn as varchar(10)) BankNumberSeq
from cte
) as ST
pivot(max(OrderID) for OrderSeq in ([Order 1],[Order 2])) as pt1
pivot(max(BankNumber) for BankNumberSeq in ([BankNumber 1],[BankNumber 2])) as pt2
group by ClientID

Find exact FK matches

Have a very large table (over 200 million rows)
sID int, wordID int (PK sID, wordID)
Want to find the sID's that have the exact same wordID's (and no extras)
For a sID with over 100 wordID the chance of an exact match goes down so willing to limit it to 100
(but would like to go to 1000)
If this was school and sID were classes and wordID were students.
Then I want to find classes that have the exact same students.
sID, wordID
1, 1
1, 2
1, 3
2, 2
2, 3
3, 1
3, 4
5, 1
5, 2
6, 2
6, 3
7, 1
7, 2
8, 1
8, 1
sID 6 and 2 have the exact same wordID's
sID 5, 7, and 8 have the exact same wordID's
This is what I have so far
I would like to eliminate the two delete #temp3_sID1_sID2 and take care of that in the insert above
But I will try any ideas
It is not like you can easily create a table with 200 million rows to test with
drop table #temp_sID_wordCount
drop table #temp_count_wordID_sID
drop table #temp3_wordID_sID_forThatCount
drop table #temp3_sID1_sID2
drop table #temp3_sID1_sID2_keep
create table #temp_sID_wordCount (sID int primary key, ccount int not null)
create table #temp_count_wordID_sID (ccount int not null, wordID int not null, sID int not null, primary key (ccount, wordID, sID))
create table #temp3_wordID_sID_forThatCount (wordID int not null, sID int not null, primary key(wordID, sID))
create table #temp3_sID1_sID2_keep (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
create table #temp3_sID1_sID2 (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
insert into #temp_sID_wordCount
select sID, count(*) as ccount
FROM [FTSindexWordOnce] with (nolock)
group by sID
order by sID;
select count(*) from #temp_sID_wordCount where ccount <= 100; -- 701,966
truncate table #temp_count_wordID_sID
insert into #temp_count_wordID_sID
select #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID
from #temp_sID_wordCount
join [FTSindexWordOnce] with (nolock)
on [FTSindexWordOnce].sID = #temp_sID_wordCount.sID
and ccount >= 1 and ccount <= 10
order by #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID;
select count(*) from #temp_sID_wordCount; -- 34,860,090
truncate table #temp3_sID1_sID2_keep
declare cur cursor for
select top 10 ccount from #temp_count_wordID_sID group by ccount order by ccount
open cur
declare #count int, #sIDcur int
fetch next from cur into #count
while (##FETCH_STATUS = 0)
begin
--print (#count)
--select count(*), #count from #temp_sID_wordCount where #temp_sID_wordCount.ccount = #count
truncate table #temp3_wordID_sID_forThatCount
truncate table #temp3_sID1_sID2
-- wordID and sID for that unique word count
-- they can only be exact if they have the same word count
insert into #temp3_wordID_sID_forThatCount
select #temp_count_wordID_sID.wordID
, #temp_count_wordID_sID.sID
from #temp_count_wordID_sID
where #temp_count_wordID_sID.ccount = #count
order by #temp_count_wordID_sID.wordID, #temp_count_wordID_sID.sID
-- select count(*) from #temp3_wordID_sID_forThatCount
-- this has some duplicates
-- sID1 is the group
insert into #temp3_sID1_sID2
select w1.sID, w2.sID
from #temp3_wordID_sID_forThatCount as w1 with (nolock)
join #temp3_wordID_sID_forThatCount as w2 with (nolock)
on w1.wordID = w2.wordID
and w1.sID <= w2.sID
group by w1.sID, w2.sID
having count(*) = #count
order by w1.sID, w2.sID
-- get rid of the goups of 1
delete #temp3_sID1_sID2
where sID1 in (select sID1 from #temp3_sID1_sID2 group by sID1 having count(*) = 1)
-- get rid of the double dips
delete #temp3_sID1_sID2
where #temp3_sID1_sID2.sID1 in
(select distinct s1del.sID1 -- these are the double dips
from #temp3_sID1_sID2 as s1base with (nolock)
join #temp3_sID1_sID2 as s1del with (nolock)
on s1del.sID1 > s1base.sID1
and s1Del.sID1 = s1base.sID2)
insert into #temp3_sID1_sID2_keep
select #temp3_sID1_sID2.sID1
, #temp3_sID1_sID2.sID2
from #temp3_sID1_sID2 with (nolock)
order by #temp3_sID1_sID2.sID1, #temp3_sID1_sID2.sID2
fetch next from cur into #count
end
close cur
deallocate cur
select *
FROM #temp3_sID1_sID2_keep with (nolock)
order by 1,2
So, as I see, the task is to find equal subsets.
First we can find pairs of equal subsets:
;with tmp1 as (select sID, cnt = count(wordID) from [Table] group by sID)
select s1.sID, s2.sID
from tmp1 s1
cross join tmp1 s2
cross apply (
select count(1)
from [Table] d1
join [Table] d2 on d2.wordID = d1.wordID
where d1.sID = s1.sID and d2.sID = s2.sID
) c(cnt)
where s1.cnt = s2.cnt
and s1.sID > s2.sID
and s1.cnt = c.cnt
Output is:
sID sID
----------- -----------
6 2
7 5
8 5
8 7
And then pairs can be combined into groups, if necessary:
sID gNum
----------- -----------
2 1
6 1
5 2
7 2
8 2
See details in SqlFiddle sample below.
SqlFiddle Sample
The other approach is to calculate hash function for every subset data:
;with a as (
select distinct sID from [Table]
)
select sID,
hashbytes('sha1', (
select cast(wordID as varchar(10)) + '|'
from [Table]
where sID = a.sID
order by wordID
for xml path('')))
from a
Then subsets can be grouped based on hash value.
SqlFiddle Sample
The last one took less than a minute on my machine for a test data of about 10 million rows (20k sID values up to 1k wordID each). Also you can optimize it by excluding sIDs having no wordID count matches to any other.

How to use Common Table Expression with parameters?

I have a stored procedure with 2 CTEs. The second CTE has a parameter
WITH path_sequences
AS
(
),
WITH categories
AS
(
... WHERE CategoryId = #CategoryId
// I dont know how to get this initial parameter inside the CTE
)
SELECT * FROM path_sequences p
JOIN categories c
ON p.CategoryId = c.CategoryId
The initial parameter that I need to get inside the second TCE is p.CategoryId. How do I do that without having to create another stored procedure to contain the second CTE?
Thanks for helping
You can create table valued function
create function ftCategories
(
#CategoryID int
)
returns table
as return
with categories as (
... WHERE CategoryId = #CategoryId
)
select Col1, Col2 ...
from categories
and use it as
SELECT *
FROM path_sequences p
cross apply ftCategories(p.CategoryId) c
I have created simple query using your code. You can use it like -
DECLARE #CategoryId INT
SET #CategoryId = 1
;WITH path_sequences
AS
(
SELECT 1 CategoryId
),
categories
AS
(
SELECT 1 CategoryId WHERE 1 = #CategoryId
)
SELECT * FROM path_sequences p
JOIN categories c
ON p.CategoryId = c.CategoryId
This syntax is for External Aliases:
-- CTES With External Aliases:
WITH Sales_CTE (SalesPersonID, SalesOrderID, SalesYear)
AS
-- Define the CTE query.
(
SELECT SalesPersonID, SalesOrderID, YEAR(OrderDate) AS SalesYear
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NOT NULL
)
The only way to add parameters is to use scope variables like so:
--Declare a variable:
DECLARE #category INT
WITH
MyCTE1 (exName1, exName2)
AS
(
SELECT <SELECT LIST>
FROM <TABLE LIST>
--Use the variable as 'a parameter'
WHERE CategoryId = #CategoryId
)
First remove the second WITH, separate each cte with just a comma. Next you can add parameters like this:
DECLARE #category INT; -- <~~ Parameter outside of CTEs
WITH
MyCTE1 (col1, col2) -- <~~ were poorly named param1 and param2 previously
AS
(
SELECT blah blah
FROM blah
WHERE CategoryId = #CategoryId
),
MyCTE2 (col1, col2) -- <~~ were poorly named param1 and param2 previously
AS
(
)
SELECT *
FROM MyCTE2
INNER JOIN MyCTE1 ON ...etc....
EDIT (and CLARIFICATION):
I have renamed the columns from param1 and param2 to col1 and col2 (which is what I meant originally).
My example assumes that each SELECT has exactly two columns. The columns are optional if you want to return all of the columns from the underlying query AND those names are unique. If you have more or less columns than what is being SELECTed you will need to specify names.
Here is another example:
Table:
CREATE TABLE Employee
(
Id INT NOT NULL IDENTITY PRIMARY KEY CLUSTERED,
FirstName VARCHAR(50) NOT NULL,
LastName VARCHAR(50) NOT NULL,
ManagerId INT NULL
)
Fill table with some rows:
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Donald', 'Duck', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Micky', 'Mouse', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Daisy', 'Duck', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Fred', 'Flintstone', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Darth', 'Vader', null)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Bugs', 'Bunny', null)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Daffy', 'Duck', null)
CTEs:
DECLARE #ManagerId INT = 5;
WITH
MyCTE1 (col1, col2, col3, col4)
AS
(
SELECT *
FROM Employee e
WHERE 1=1
AND e.Id = #ManagerId
),
MyCTE2 (colx, coly, colz, cola)
AS
(
SELECT e.*
FROM Employee e
INNER JOIN MyCTE1 mgr ON mgr.col1 = e.ManagerId
WHERE 1=1
)
SELECT
empsWithMgrs.colx,
empsWithMgrs.coly,
empsWithMgrs.colz,
empsWithMgrs.cola
FROM MyCTE2 empsWithMgrs
Notice in the CTEs the columns are being aliased. MyCTE1 exposes columns as col1, col2, col3, col4 and MyCTE2 references MyCTE1.col1 when it references it. Notice the final select uses MyCTE2's column names.
Results:
For anyone still struggling with this, the only thing you need to is terminate your declaration of variables with a semicolon before the CTE. Nothing else is required.
DECLARE #test AS INT = 42;
WITH x
AS (SELECT #test AS 'Column')
SELECT *
FROM x
Results:
Column
-----------
42
(1 row affected)

PostgreSQL join to denormalize a table with generate_series

I've this table:
CREATE TABLE "mytable"
( name text, count integer );
INSERT INTO mytable VALUES ('john', 4),('mark',2),('albert',3);
and I would like "denormlize" the rows in this way:
SELECT name FROM mytable JOIN generate_series(1,4) tmp(a) ON (a<=count)
so I've a number of rows for each name equals to the count column: I've 4 rows with john, 2 with mark and 3 with albert.
But i can't use the generate_series() function if I don't know the highest count (in this case 4). There is a way to do this without knowing the MAX(count) ?
select name,
generate_series(1,count)
from mytable;
Set returning functions can be used in the select list and will do a cross join with the row retrieved from the base table.
I think this is an undocumented behaviour that might go away in the future, but I'm not sure about that (I recall some discussion regarding this on the mailing list)
SQLFiddle example
DROP TABLE ztable ;
CREATE TABLE ztable (zname varchar, zvalue INTEGER NOT NULL);
INSERT INTO ztable(zname, zvalue) VALUES( 'one', 1), ( 'two', 2 ), ( 'three', 3) , ( 'four', 4 );
WITH expand AS (
WITH RECURSIVE zzz AS (
SELECT 1::integer AS rnk , t0.zname
FROM ztable t0
UNION
SELECT 1+rr.rnk , t1.zname
FROM ztable t1
JOIN zzz rr ON rr.rnk < t1.zvalue
)
SELECT zzz.zname
FROM zzz
)
SELECT x.*
FROM expand x
;

Finding duplicate rows but skip the last result?

I am trying to find duplicate rows in my DB, like this:
SELECT email, COUNT(emailid) AS NumOccurrences
FROM users
GROUP BY emailid HAVING ( COUNT(emailid) > 1 )
This returns the emailid and the number of matches found. Now what I want do is compare the ID column to another table I have and set a column there with the count.
The other table has a column named duplicates, which should contain the amount of duplicates from the select. So let's say we have 3 rows with the same emailid. The duplicates column has a "3" in all 3 rows. What I want is a "2" in the first 2 and nothing or 0 in the last of the 3 matching ID rows.
Is this possible?
Update:
I managed to have a temporary table now, which looks like this:
mailid | rowcount | AmountOfDups
643921 | 1 | 3
643921 | 2 | 3
643921 | 3 | 3
Now, how could I decide that only the first 2 should be updated (by mailid) in the other table? The other table has mailid as well.
SELECT ...
ROW_NUMBER() OVER (PARTITION BY email ORDER BY emailid DESC) AS RN
FROM ...
...is a great starting point for such a problem. Never underestimate the power of ROW_NUMBER()!
Using Sql Server 2005+ you could try something like (full example)
DECLARE #Table TABLE(
ID INT IDENTITY(1,1),
Email VARCHAR(20)
)
INSERT INTO #Table (Email) SELECT 'a'
INSERT INTO #Table (Email) SELECT 'b'
INSERT INTO #Table (Email) SELECT 'c'
INSERT INTO #Table (Email) SELECT 'a'
INSERT INTO #Table (Email) SELECT 'b'
INSERT INTO #Table (Email) SELECT 'a'
; WITH Duplicates AS (
SELECT Email,
COUNT(ID) TotalDuplicates
FROM #Table
GROUP BY Email
HAVING COUNT(ID) > 1
)
, Counts AS (
SELECT t.ID,
ROW_NUMBER() OVER(PARTITION BY t.Email ORDER BY t.ID) EmailID,
d.TotalDuplicates
FROM #Table t INNER JOIN
Duplicates d ON t.Email = d.Email
)
SELECT ID,
CASE
WHEN EmailID = TotalDuplicates
THEN 0
ELSE TotalDuplicates - 1
END Dups
FROM Counts