Group by occurrence of row - tsql

have a table like below where I have to take sum of col2 based on group by. But it belongs to a chain, if chain breaks then sum will be limited to sequence.
DECLARE #TabVar TABLE
(
ID INT IDENTITY(1,1), col1 varchar(20), Col2 INT
)
INSERT INTO #TabVar
VALUES ('a',2),('a',3),('b',4),('b',2),('a',6),('a',3),('b',3)
SELECT * FROM #TabVar
Expected output:
COL1 SUM(COL2)
A 5
B 6
A 9
B 3
I have tried to do it with Ranking functions but ranking is done using Order by which accumulate total of Col1

You can use a ROW_NUMBER() with PARTITION BY and then a GROUP BY to achieve this.
Whenever your chain breaks, id - ROW_NUMBER()over(partition by col1 order by id) will have a different value for the same col1 value. You can then use this along with col1 to group your data and do a SUM. Something like this
Sample Data
DECLARE #TabVar TABLE
(
ID INT IDENTITY(1,1), col1 varchar(20), Col2 INT
)
INSERT INTO #TabVar
VALUES ('a',2),('a',3),('b',4),('b',2),('a',6),('a',3),('b',3)
Query
SELECT Col1,SUM(Col2) sumcol
FROM
(
SELECT id - ROW_NUMBER()over(partition by col1 order by id) grpcol,Col1,Col2,id
FROM #TabVar
)T
GROUP BY grpcol,Col1
ORDER BY MAX(ID)
Output
Col1 sumcol
a 5
b 6
a 9
b 3
Edit
Incase your IDs are not consecutive in live environment, you can use this
SELECT Col1,SUM(Col2) sumcol
FROM
(
SELECT ROW_NUMBER()over(order by id) - ROW_NUMBER()over(partition by col1 order by id) grpcol, Col1,Col2,id
FROM #TabVar
)T
GROUP BY grpcol,Col1
ORDER BY MAX(ID)

Related

Delete duplicate rows with different values in columns

I didn't find my case on the Internet. Tell me how i can delete duplicates if the values are in different columns.
I have a table with a lot of values, for example:
|Id1|Id2|
|89417980|89417978|
|89417980|89417979|
|89417978|89417980|
|89417979|89417980|
I need to exclude duplicates and leave in the answer only:
|Id1|Id2|
|89417980|89417978|
|89417980|89417979|
min/max does not work here, as the values may be different.
I tried to union/join tables on a table/exclude results with temporary tables, but in the end I come to the beginning.
Assuming id1 and id2 are primary keys columns you could try this
DECLARE #tbl table (id1 int, id2 int )
INSERT INTO #tbl
SELECT 89417980, 89417978
UNION SELECT 89417980, 89417979
UNION SELECT 89417978, 89417980
UNION SELECT 89417979, 89417980
SELECT * FROM #tbl
;WITH CTE AS (--Get comparable value as "cs"
SELECT
IIF(id1 > id2, CHECKSUM(id1, id2), CHECKSUM(id2,id1)) as cs
, id1
, id2
, ROW_NUMBER() OVER (order by id1, id2) as rn
FROM #tbl
)
, CTE2 AS ( --Get rows to keep
SELECT MAX (rn) as rn
FROM CTE
GROUP BY cs
HAVING COUNT(*) > 1
)
DELETE tbl -- Delete all except the rows to keep
FROM #tbl tbl
WHERE NOT EXISTS(SELECT 1
FROM CTE2
JOIN CTE ON CTE.rn = CTE2.rn
WHERE CTE.id1 = tbl.id1
AND CTE.id2 = tbl.id2
)
SELECT * FROM #tbl

left join all orders on left on 1 row

I have a table Orders, it has the following columns:
OrderID, ClientID, BankNumber, Adres, Name;
I want to write a query that gives me this result: distinct clientid name and adres on one row with all the belonging orders and corespondating bankaccount numbers on one row: This is my example.
ClientID Adres Name order1 Banknumber Order2 Banknumber order3 Banknumber
First you cannot query something and come up with a results set with infinite number of columns, but you could combine orders and show them in 1 column.
if you are on SQL Azure or SQL2017 you can also use STRING_AGG like this:
select customer.Id, customer.Name, orderSummary.orderData
(select STRING_AGG(orderID+'-'+banknumber+', ') as orderData from orders where customerId = customer.Id) orderSummary
from Customers as customer
You can look at this post for more answers
How to concatenate text from multiple rows into a single text string in SQL server?
And Subquery from Microsoft:
https://technet.microsoft.com/en-us/library/ms189575(v=sql.105).aspx
here is a working sample.
Hope it works for you.
You need to build the Order1, order2, order3...and BankNumber1,BankNumber2...dynamically. I have hard coded in my example
drop table #t1
create table #t1(OrderID Int, ClientID int, BankNumber varchar(50), [Address] varchar(50), Name varchar(50))
insert into #t1
select 1,11,'111','xyz1','xyz'
union all
select 2,22,'112','xyz2','xyzz'
union all
select 3,33,'113','xyz3','xyzzz'
union all
select 100,11,'111','xyz1','xyz'
union all
select 200,22,'112','xyz2','xyzz'
union all
select 300,33,'113','xyz3','xyzzz'
;with cte as
(
select OrderID,ClientID,BankNumber,Address,Name,ROW_NUMBER()over (partition by clientid order by orderid asc) RN
from #t1
)
select ClientID
,max([Order 1]) Order1
,max([Order 2]) Order2
,max([BankNumber 1]) BankNumber1
,max([BankNumber 2]) BankNumber2
from
(
select ClientID,Address,Name,OrderID,BankNumber,'Order '+cast(rn as varchar(10)) OrderSeq
,'BankNumber '+cast(rn as varchar(10)) BankNumberSeq
from cte
) as ST
pivot(max(OrderID) for OrderSeq in ([Order 1],[Order 2])) as pt1
pivot(max(BankNumber) for BankNumberSeq in ([BankNumber 1],[BankNumber 2])) as pt2
group by ClientID

Remove Duplicates

I have a table like below:
SuppID AreaID SuppNo SupName SupPrice
------------------------------------------------
1 3 526 ANC 100
1 3 985 JTT 200
3 4 100 HIK 300
In the above table, for same SuppID(1) and same AreaID(3), different SuppNo are there (526 & 985) in two different rows.
In this scenario , I'd like to make those two rows into a single row with SuppNo field as blank.
Also my output result should display rows with all the columns.
Any Help?
This should get you started:
DECLARE #TABLE TABLE (SuppID INT, AreaID INT, SuppNo VARCHAR(5), SupName VARCHAR(5), SupPrice INT)
INSERT INTO #TABLE
SELECT 1,3,'526','ANC',100 UNION
SELECT 1,3,'985','JTT',200 UNION
SELECT 3,4,'100','HIK',300
-- select data before updates
SELECT * FROM #TABLE
-- add a row count by AreaID/SuppID
;WITH T1 AS
(
SELECT *
,SUM(1) OVER(PARTITION BY AREAID,SUPPID) AS ROWCNT
FROM #TABLE
)
-- set the SuppNo blank on rows that have more than 1 match
UPDATE T1 SET SuppNo='' WHERE ROWCNT>1
-- add a row # by AreaID/SuppID
;WITH T2 AS
(
SELECT *
,ROW_NUMBER() OVER(PARTITION BY AREAID,SUPPID ORDER BY AREAID,SUPPID) AS ROWID
FROM #TABLE
)
-- delete duplicate rows
DELETE
FROM T2
WHERE ROWID>1
-- select data after updates
SELECT * FROM #TABLE

Most effective way to get value if select count(*) = 1 with grouping

Lets say I have table with ID int, VALUE string:
ID | VALUE
1 abc
2 abc
3 def
4 abc
5 abc
6 abc
If I do select value, count(*) group by value I should get
VALUE | COUNT
abc 5
def 1
Now the tricky part, if there is count == 1 I need to get that ID from first table. Should I be using CTE? creating resultset where I will add ID string == null and run update b.ID = a.ID where count == 1 ?
Or is there another easier way?
EDIT:
I want to have result table like this:
ID VALUE count
null abc 5
3 def 1
If your ID values are unique, you can simply check to see if the max(id) = min(id). If so, then use either one, otherwise you can return null. Like this:
Select Case When Min(id) = Max(id) Then Min(id) Else Null End As Id,
Value, Count(*) As [Count]
From YourTable
Group By Value
Since you are already performing an aggregate, including the MIN and Max function is not likely to take any extra (noticeable) time. I encourage you to give this a try.
The way I would do it would indeed be a CTE:
using #group AS (SELECT value, Count(*) as count from MyTable GROUP BY value HAVING count = 1)
SELECT MyTable.ID, #group.value, #group.count from MyTable
JOIN #group ON #group.value = MyTable.value
When using group by, after the group by statement you can use a having clause.
So
SELECT [ID]
FROM table
GROUP BY [VALUE]
HAVING COUNT(*) = 1
Edit: with regards to your edited question: this uses some fun joins and unions
CREATE TABLE #table
(ID int IDENTITY,
VALUE varchar(3))
INSERT INTO #table (VALUE)
VALUES('abc'),('abc'),('def'),('abc'),('abc'),('abc')
SELECT * FROM (
SELECT Null as ID,VALUE, COUNT(*) as [Count]
FROM #table
GROUP BY VALUE
HAVING COUNT(*) > 1
UNION ALL
SELECT t.ID,t.VALUE,p.Count FROM
#table t
JOIN
(SELECT VALUE, COUNT(*) as [Count]
FROM #table
GROUP BY VALUE
HAVING COUNT(*) = 1) p
ON t.VALUE=p.VALUE
) a
DROP TABLE #table
maybe not the most efficient but something like this works:
SELECT MAX(Id) as ID,Value FROM Table WHERE COUNT(*) = 1 GROUP BY Value

Finding duplicate rows but skip the last result?

I am trying to find duplicate rows in my DB, like this:
SELECT email, COUNT(emailid) AS NumOccurrences
FROM users
GROUP BY emailid HAVING ( COUNT(emailid) > 1 )
This returns the emailid and the number of matches found. Now what I want do is compare the ID column to another table I have and set a column there with the count.
The other table has a column named duplicates, which should contain the amount of duplicates from the select. So let's say we have 3 rows with the same emailid. The duplicates column has a "3" in all 3 rows. What I want is a "2" in the first 2 and nothing or 0 in the last of the 3 matching ID rows.
Is this possible?
Update:
I managed to have a temporary table now, which looks like this:
mailid | rowcount | AmountOfDups
643921 | 1 | 3
643921 | 2 | 3
643921 | 3 | 3
Now, how could I decide that only the first 2 should be updated (by mailid) in the other table? The other table has mailid as well.
SELECT ...
ROW_NUMBER() OVER (PARTITION BY email ORDER BY emailid DESC) AS RN
FROM ...
...is a great starting point for such a problem. Never underestimate the power of ROW_NUMBER()!
Using Sql Server 2005+ you could try something like (full example)
DECLARE #Table TABLE(
ID INT IDENTITY(1,1),
Email VARCHAR(20)
)
INSERT INTO #Table (Email) SELECT 'a'
INSERT INTO #Table (Email) SELECT 'b'
INSERT INTO #Table (Email) SELECT 'c'
INSERT INTO #Table (Email) SELECT 'a'
INSERT INTO #Table (Email) SELECT 'b'
INSERT INTO #Table (Email) SELECT 'a'
; WITH Duplicates AS (
SELECT Email,
COUNT(ID) TotalDuplicates
FROM #Table
GROUP BY Email
HAVING COUNT(ID) > 1
)
, Counts AS (
SELECT t.ID,
ROW_NUMBER() OVER(PARTITION BY t.Email ORDER BY t.ID) EmailID,
d.TotalDuplicates
FROM #Table t INNER JOIN
Duplicates d ON t.Email = d.Email
)
SELECT ID,
CASE
WHEN EmailID = TotalDuplicates
THEN 0
ELSE TotalDuplicates - 1
END Dups
FROM Counts