-- Just a brief of business scenario is table has been created for a good receipt.
-- So here we have good expected line with PurchaseOrder(PO) in first few line.
-- And then we receive each expected line physically and that time these quantity may be different
-- due to business case like quantity may damage and short quantity like that.
-- So we maintain a status for that eg: OK, Damage, also we have to calculate short quantity
-- based on total of expected quantity of each item and total of received line.
if object_id('DEV..Temp','U') is not null
drop table Temp
CREATE TABLE Temp
(
ID INT IDENTITY(1,1) PRIMARY KEY CLUSTERED,
Item VARCHAR(32),
PO VARCHAR(32) NULL,
ExpectedQty INT NULL,
ReceivedQty INT NULL,
[STATUS] VARCHAR(32) NULL,
BoxName VARCHAR(32) NULL
)
-- Please see first few line with PO data will be the expected lines,
-- and then rest line will be received line
INSERT INTO TEMP (Item,PO,ExpectedQty,ReceivedQty,[STATUS],BoxName)
SELECT 'ITEM01','PO-01','30',NULL,NULL,NULL UNION ALL
SELECT 'ITEM01','PO-02','20',NULL,NULL,NULL UNION ALL
SELECT 'ITEM02','PO-01','40',NULL,NULL,NULL UNION ALL
SELECT 'ITEM03','PO-01','50',NULL,NULL,NULL UNION ALL
SELECT 'ITEM03','PO-02','30',NULL,NULL,NULL UNION ALL
SELECT 'ITEM03','PO-03','20',NULL,NULL,NULL UNION ALL
SELECT 'ITEM04','PO-01','30',NULL,NULL,NULL UNION ALL
SELECT 'ITEM01',NULL,NULL,'20','OK','box01' UNION ALL
SELECT 'ITEM01',NULL,NULL,'25','OK','box02' UNION ALL
SELECT 'ITEM01',NULL,NULL,'5','DAMAGE','box03' UNION ALL
SELECT 'ITEM02',NULL,NULL,'38','OK','box04' UNION ALL
SELECT 'ITEM02',NULL,NULL,'2','DAMAGE','box05' UNION ALL
SELECT 'ITEM03',NULL,NULL,'30','OK','box06' UNION ALL
SELECT 'ITEM03',NULL,NULL,'30','OK','box07' UNION ALL
SELECT 'ITEM03',NULL,NULL,'10','DAMAGE','box09' UNION ALL
SELECT 'ITEM04',NULL,NULL,'25','OK','box10'
-- Below Table is my expected result based on above data.
-- I need to show those data following way.
-- So I appreciate if you can give me an appropriate query for it.
-- Note: first row is blank and it is actually my table header. :)
-- Conditions : any of row, we cant have ReceivedQty, DamageQty and ShortQty
-- values more than ExpectedQty value. Item03 has this scenario
-- Query should run in SQL 2000 DB
SELECT ''as'ITEM', ''as'PO#', ''as'ExpectedQty',''as'ReceivedQty',''as'DamageQty' ,''as'ShortQty' UNION ALL
SELECT 'ITEM01','PO-01','30','30','0' ,'0' UNION ALL
SELECT 'ITEM01','PO-02','20','15','5' ,'0' UNION ALL
SELECT 'ITEM02','PO-01','40','38','2' ,'0' UNION ALL
SELECT 'ITEM03','PO-01','50','50','0' ,'0' UNION ALL
SELECT 'ITEM03','PO-02','30','20','10' ,'10' UNION ALL
SELECT 'ITEM03','PO-03','20','0','0','20' UNION ALL
SELECT 'ITEM04','PO-01','30','25','0' ,'5'
Using this solution as a starting point, I've eventually ended up with this:
SELECT
Item,
PO,
ExpectedQty,
ReceivedQty = CASE
WHEN RemainderQty >= 0 THEN ExpectedQty
WHEN RemainderQty < -ExpectedQty THEN 0
ELSE RemainderQty + ExpectedQty
END,
DamageQty = CASE
WHEN RemainderQty >=0 OR ExpectedQty < -TotalRemainderQty THEN 0
WHEN RemainderQty < -ExpectedQty AND TotalRemainderQty > 0 THEN ExpectedQty
WHEN RemainderQty < -ExpectedQty AND TotalRemainderQty < -DamagedQty THEN ExpectedQty + TotalRemainderQty
WHEN RemainderQty > -DamagedQty THEN -RemainderQty
ELSE DamagedQty
END,
ShortQty = CASE
WHEN TotalRemainderQty >= 0 THEN 0
WHEN TotalRemainderQty < -ExpectedQty THEN ExpectedQty
ELSE -TotalRemainderQty
END
FROM (
SELECT
a.Item,
a.PO,
a.ExpectedQty,
b.DamagedQty,
RemainderQty = b.ReceivedQty - a.RunningTotalQty,
TotalRemainderQty = b.ReceivedQty + b.DamagedQty - a.RunningTotalQty
FROM (
SELECT
a.Item,
a.PO,
a.ExpectedQty,
RunningTotalQty = SUM(a2.ExpectedQty)
FROM (SELECT Item, PO, ExpectedQty FROM Temp WHERE STATUS IS NULL) AS a
INNER JOIN (SELECT Item, PO, ExpectedQty FROM Temp WHERE STATUS IS NULL) AS a2
ON a.Item = a2.Item AND a.PO >= a2.PO
GROUP BY
a.Item,
a.PO,
a.ExpectedQty
) a
LEFT JOIN (
SELECT
Item,
ReceivedQty = SUM(CASE STATUS WHEN 'OK' THEN ReceivedQty ELSE 0 END),
DamagedQty = SUM(CASE STATUS WHEN 'DAMAGE' THEN ReceivedQty ELSE 0 END)
FROM Temp
GROUP BY Item
) b ON a.Item = b.Item
) s;
Related
I'm trying to combine range.
WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29'
union
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43'
), b AS (
SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
FROM a
)
, c AS (
SELECT *, count(step) OVER (ORDER BY begat) AS grp
FROM b
)
SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
FROM c
GROUP BY grp
ORDER BY 1
Result
1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
4 "2017-09-22 12:24:16";"2017-09-22 13:18:29"
5 "2017-09-28 09:48:54";"2017-09-28 13:39:13"
positions 3,4 intersect (endata> next begat)
How do I make the union of all the intersections into one large interval
I need result
1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
4 "2017-09-28 09:48:54";"2017-09-28 13:39:13"
Hey I would suggest using the following process :
1- Identify when a row is new, so you give a value of 1 to values that do not overlap (CTE b)
2- Sequence together the rows that have overlaps with others. This way you can see have a common identifier that will allow you to MAX and MIN begat and endat (CTE c)
3- For each sequence, give the MIN of begat and the MAX of endat so you will have your final values
WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29'
union
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43'
)
, b AS (
SELECT
begat
, endat
, (begat > MAX(endat) OVER w IS TRUE)::INT is_new
FROM a
WINDOW w AS (ORDER BY begat ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
)
, c AS (
SELECT
begat
, endat
, SUM((is_new)) OVER (ORDER BY begat) seq
FROM b
)
SELECT
MIN(begat) beg_at
, MAX(endat) end_at
FROM c
GROUP BY seq
EDITED
If you need speed you can use a psql function:
create or replace function append_ranges_in_a() returns setof a
language plpgsql
as
$BODY$
declare
v_current a%rowtype;
v_new a%rowtype;
v_first boolean:=true;
begin
for v_current in select begat, endat from a order by begat, endat
loop
if v_first then
v_first := false;
v_new.begat := v_current.begat;
elsif v_new.endat < v_current.begat then
return next v_new;
v_new.begat := v_current.begat;
end if;
v_new.endat := greatest(v_current.endat,v_new.endat);
end loop;
return next v_new;
return;
end;
$BODY$;
select * from append_ranges_in_a()
I test it with ~ 400000 rows:
delete from a;
insert into a (begat, endat)
select time::text, (time+'1 day'::interval)::text
from (select t+(round(random()*23.0)||' hours')::interval as time
from generate_series('1401-01-01'::timestamp,'2018-08-21'::timestamp,'1 day'::interval) t
) t;
select count(*) from a;
select * from append_ranges_in_a() offset 100000 limit 10
and it is twice fast as O(n^2) pure SQL version.
OLD slow solution:
You can use a recursive WITH query https://www.postgresql.org/docs/current/static/queries-with.html to construct the result row by row.
I create the table
The first row is the candidate first row (ending where ending), but the row is not "ready"
Then I look at the next row (step) and if it is not intersecting I add a ready row,
Also I add a not ready row with the current (last) observed range
When I do not have more rows I calculate the last row
I retain ready rows and the last row
Here is the code
CREATE TABLE a as
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29'
union
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43';
WITH RECURSIVE t(begat, endat, ready, step) AS (
select * from (
select *,false,1 from a order by begat, endat limit 1) a
UNION ALL
SELECT new_rows.*
FROM (SELECT * FROM t ORDER BY begat DESC limit 1) t,
lateral (SELECT * FROM a ORDER BY begat, endat OFFSET step LIMIT 1) a,
lateral (
SELECT t.begat, t.endat, true as ready, step WHERE t.endat < a.begat
UNION SELECT CASE WHEN t.endat < a.begat THEN a.begat ELSE t.begat END, greatest(a.endat, t.endat), false, step+1
) new_rows
)
select begat, endat
from (
select begat, endat, ready, row_number() over (order by begat desc, endat desc)=1 is_last
from t
order by begat, endat) t
where ready or is_last;
i using range type
https://www.postgresql.org/docs/9.3/static/rangetypes.html
WITH tmp AS (
-- preparation range type
select begat, coalesce( endat, 'infinity' ) as endAt, tsrange( begat, coalesce( endat, 'infinity' ) ) as rg
from (
select '2017-09-11 17:13:03'::timestamp as begat ,'2017-09-12 12:24:09'::timestamp as endat union
select '2017-09-19 15:34:40','2017-09-20 11:04:45' union
select '2017-09-20 08:32:00','2017-09-22 13:28:37' union
select '2017-09-20 13:52:43','2017-09-20 14:14:43' union
select '2017-09-21 12:24:16','2017-09-21 13:28:29' union
select '2017-09-22 12:24:16','2017-09-22 13:28:29' union
select '2017-09-22 12:34:16','2017-09-23 13:28:29' union
select '2017-09-22 12:25:16','2017-09-24 13:28:29' union
select '2017-09-28 09:48:54','2017-09-28 13:39:13' union
select '2017-09-28 14:22:16','2017-09-28 15:52:15' union
select '2017-10-05 12:17:45','2017-10-06 12:35:38' union
select '2017-10-06 16:20:44','2017-10-07 10:11:09' union
select '2017-10-07 20:38:32','2017-10-09 14:42:29' union
select '2017-10-12 18:22:14','2017-10-12 20:52:45'
) a
),a as (
-- group intersecting range
select l.*
from tmp l left join tmp r on l.begAt > r.begAt and r.rg #> l.rg
where r.begAt is null
),
b AS (
SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
FROM a
)
, c AS (
SELECT *, count(step) OVER (ORDER BY begat) AS grp
FROM b
)
SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
FROM c
GROUP BY grp
ORDER BY 1
I have the following issue. I have products with 3 different states. Parent, Child and products which are orphans. I am setting Parents as 1, Children as 2 and Orphans as 0. I am struggling to get the Orphan to set to 0. I realise that counting the amount of Parent PLU's is where I am going wrong but I do not know how to resolve this issue. Any help would be appreciated. (As you maybe able to tell, I am a noob and constructive criticism would be appreciated)
Kind Regards,
Jason.
Picture of results from query
declare #OrderID int = 1635
declare #Store char(3) = '001'
declare #SortedBy smallint = 2
DECLARE #tbl TABLE (DetailID int, OrderID int, PLU nvarchar(35), ParentPLU nvarchar(35))
INSERT INTO #tbl (DetailID, OrderID, PLU, ParentPLU)
SELECT DetailID, OrderDetails.OrderID, OrderDetails.PLU, OrderDetails.ParentPLU
FROM OrderDetails
INNER JOIN PLU
ON PLU.PLU = OrderDetails.PLU
WHERE OrderDetails.OrderID = #OrderID
AND OrderDetails.OrderStore = #Store
SELECT DetailID, OrderID, PLU, ParentPLU,
CASE WHEN ( SELECT COUNT(DISTINCT ParentPLU)
FROM #tbl
WHERE ParentPLU IN (SELECT PLU FROM #tbl)
) > 0 AND ParentPLU = '' THEN 1
WHEN ( SELECT COUNT(DISTINCT ParentPLU)
FROM #tbl
WHERE ParentPLU IN (SELECT PLU FROM #tbl)
) > 0 THEN 2
ELSE
0
END AS ParentChild,
ROW_NUMBER() OVER (ORDER BY
CASE WHEN #SortedBy = 1 THEN OrderID END ASC,
CASE WHEN #SortedBy = 2 THEN DetailID END ASC
) AS ID
FROM #tbl
You can use coalesce to get your desired result. First subquery checks for parent state, second for children. If both are null, then it is orphan
select
DetailID, OrderID, PLU, ParentPLU
, coalesce((
select
distinct 1
from
#tbl b
where
a.PLU = b.ParentPlu
)
, (
select
distinct 2
from
#tbl b
where
b.PLU = a.ParentPlu
), 0)
from
#tbl a
I need to get a tree of related nodes given a certain node, but not necessary top node. I've got a solution using two CTEs, since I am struggling to squeeze it all into one CTE :). Might somebody have a sleek solution to avoid using two CTEs? Here is some code that I was playing with:
DECLARE #temp AS TABLE (ID INT, ParentID INT)
INSERT INTO #temp
SELECT 1 ID, NULL AS ParentID
UNION ALL
SELECT 2, 1
UNION ALL
SELECT 3, 2
UNION ALL
SELECT 4, 3
UNION ALL
SELECT 5, 4
UNION ALL
SELECT 6, NULL
UNION ALL
SELECT 7, 6
UNION ALL
SELECT 8, 7
DECLARE #startNode INT = 4
;WITH TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM #temp
WHERE ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
;WITH Up(ID,ParentID)
AS (
SELECT t.id, t.ParentID
FROM #temp t
WHERE t.ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN Up c ON t.id = c.ParentID
)
--SELECT * FROM Up
,TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM Up
WHERE ParentID is null
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
thanks
Meh. This avoids using two CTEs, but the result is a brute force kludge that hardly qualifies as "sleek" as it won’t be efficient if your table is at all sizeable. It will:
Recursively build all possible hierarchies
As you build them, flag the target NodeId as you find it
Return only the targeted tree
I threw in column “TreeNumber” on the off-chance the TargetId appears in multiple hierarchies, or if you’d ever have multiple values to check in one pass. “Depth” was added to make the output a bit more legible.
A more complex solution like #John’s might do, and more and subtler tricks could be done with more detailed table sturctures.
DECLARE #startNode INT = 4
;WITH cteAllTrees (TreeNumber, Depth, ID, ParentID, ContainsTarget)
AS (
SELECT
row_number() over (order by ID) TreeNumber
,1
,ID
,ParentID
,case
when ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp
WHERE ParentId is null
UNION ALL
SELECT
tr.TreeNumber
,tr.Depth + 1
,t.id
,t.ParentID
,case
when tr.ContainsTarget = 1 then 1
when t.ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp t
INNER JOIN cteAllTrees tr
ON t.ParentID = tr.ID
)
SELECT
TreeNumber
,Depth
,ID
,ParentId
from cteAllTrees
where TreeNumber in (select TreeNumber from cteAllTrees where ContainsTarget = 1)
order by
TreeNumber
,Depth
,ID
Here is a technique where you can select the entire hierarchy, a specific node with all its children, and even a filtered list and how they roll.
Note: See the comments next to the DECLAREs
Declare #YourTable table (id int,pt int,name varchar(50))
Insert into #YourTable values
(1,null,'1'),(2,1,'2'),(3,1,'3'),(4,2,'4'),(5,2,'5'),(6,3,'6'),(7,null,'7'),(8,7,'8')
Declare #Top int = null --<< Sets top of Hier Try 2
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
Declare #Filter varchar(25) = '' --<< Empty for All or try 4,6
;with cteP as (
Select Seq = cast(1000+Row_Number() over (Order by name) as varchar(500))
,ID
,pt
,Lvl=1
,name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(pt,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',1000+Row_Number() over (Order by r.name)) as varchar(500))
,r.ID
,r.pt
,p.Lvl+1
,r.name
From #YourTable r
Join cteP p on r.pt = p.ID)
,cteR1 as (Select *,R1=Row_Number() over (Order By Seq) From cteP)
,cteR2 as (Select A.Seq,A.ID,R2=Max(B.R1) From cteR1 A Join cteR1 B on (B.Seq like A.Seq+'%') Group By A.Seq,A.ID )
Select Distinct
A.R1
,B.R2
,A.ID
,A.pt
,A.Lvl
,name = Replicate(#Nest,A.Lvl-1) + A.name
From cteR1 A
Join cteR2 B on A.ID=B.ID
Join (Select R1 From cteR1 where IIF(#Filter='',1,0)+CharIndex(concat(',',ID,','),concat(',',#Filter+','))>0) F on F.R1 between A.R1 and B.R2
Order By A.R1
Have a very large table (over 200 million rows)
sID int, wordID int (PK sID, wordID)
Want to find the sID's that have the exact same wordID's (and no extras)
For a sID with over 100 wordID the chance of an exact match goes down so willing to limit it to 100
(but would like to go to 1000)
If this was school and sID were classes and wordID were students.
Then I want to find classes that have the exact same students.
sID, wordID
1, 1
1, 2
1, 3
2, 2
2, 3
3, 1
3, 4
5, 1
5, 2
6, 2
6, 3
7, 1
7, 2
8, 1
8, 1
sID 6 and 2 have the exact same wordID's
sID 5, 7, and 8 have the exact same wordID's
This is what I have so far
I would like to eliminate the two delete #temp3_sID1_sID2 and take care of that in the insert above
But I will try any ideas
It is not like you can easily create a table with 200 million rows to test with
drop table #temp_sID_wordCount
drop table #temp_count_wordID_sID
drop table #temp3_wordID_sID_forThatCount
drop table #temp3_sID1_sID2
drop table #temp3_sID1_sID2_keep
create table #temp_sID_wordCount (sID int primary key, ccount int not null)
create table #temp_count_wordID_sID (ccount int not null, wordID int not null, sID int not null, primary key (ccount, wordID, sID))
create table #temp3_wordID_sID_forThatCount (wordID int not null, sID int not null, primary key(wordID, sID))
create table #temp3_sID1_sID2_keep (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
create table #temp3_sID1_sID2 (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
insert into #temp_sID_wordCount
select sID, count(*) as ccount
FROM [FTSindexWordOnce] with (nolock)
group by sID
order by sID;
select count(*) from #temp_sID_wordCount where ccount <= 100; -- 701,966
truncate table #temp_count_wordID_sID
insert into #temp_count_wordID_sID
select #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID
from #temp_sID_wordCount
join [FTSindexWordOnce] with (nolock)
on [FTSindexWordOnce].sID = #temp_sID_wordCount.sID
and ccount >= 1 and ccount <= 10
order by #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID;
select count(*) from #temp_sID_wordCount; -- 34,860,090
truncate table #temp3_sID1_sID2_keep
declare cur cursor for
select top 10 ccount from #temp_count_wordID_sID group by ccount order by ccount
open cur
declare #count int, #sIDcur int
fetch next from cur into #count
while (##FETCH_STATUS = 0)
begin
--print (#count)
--select count(*), #count from #temp_sID_wordCount where #temp_sID_wordCount.ccount = #count
truncate table #temp3_wordID_sID_forThatCount
truncate table #temp3_sID1_sID2
-- wordID and sID for that unique word count
-- they can only be exact if they have the same word count
insert into #temp3_wordID_sID_forThatCount
select #temp_count_wordID_sID.wordID
, #temp_count_wordID_sID.sID
from #temp_count_wordID_sID
where #temp_count_wordID_sID.ccount = #count
order by #temp_count_wordID_sID.wordID, #temp_count_wordID_sID.sID
-- select count(*) from #temp3_wordID_sID_forThatCount
-- this has some duplicates
-- sID1 is the group
insert into #temp3_sID1_sID2
select w1.sID, w2.sID
from #temp3_wordID_sID_forThatCount as w1 with (nolock)
join #temp3_wordID_sID_forThatCount as w2 with (nolock)
on w1.wordID = w2.wordID
and w1.sID <= w2.sID
group by w1.sID, w2.sID
having count(*) = #count
order by w1.sID, w2.sID
-- get rid of the goups of 1
delete #temp3_sID1_sID2
where sID1 in (select sID1 from #temp3_sID1_sID2 group by sID1 having count(*) = 1)
-- get rid of the double dips
delete #temp3_sID1_sID2
where #temp3_sID1_sID2.sID1 in
(select distinct s1del.sID1 -- these are the double dips
from #temp3_sID1_sID2 as s1base with (nolock)
join #temp3_sID1_sID2 as s1del with (nolock)
on s1del.sID1 > s1base.sID1
and s1Del.sID1 = s1base.sID2)
insert into #temp3_sID1_sID2_keep
select #temp3_sID1_sID2.sID1
, #temp3_sID1_sID2.sID2
from #temp3_sID1_sID2 with (nolock)
order by #temp3_sID1_sID2.sID1, #temp3_sID1_sID2.sID2
fetch next from cur into #count
end
close cur
deallocate cur
select *
FROM #temp3_sID1_sID2_keep with (nolock)
order by 1,2
So, as I see, the task is to find equal subsets.
First we can find pairs of equal subsets:
;with tmp1 as (select sID, cnt = count(wordID) from [Table] group by sID)
select s1.sID, s2.sID
from tmp1 s1
cross join tmp1 s2
cross apply (
select count(1)
from [Table] d1
join [Table] d2 on d2.wordID = d1.wordID
where d1.sID = s1.sID and d2.sID = s2.sID
) c(cnt)
where s1.cnt = s2.cnt
and s1.sID > s2.sID
and s1.cnt = c.cnt
Output is:
sID sID
----------- -----------
6 2
7 5
8 5
8 7
And then pairs can be combined into groups, if necessary:
sID gNum
----------- -----------
2 1
6 1
5 2
7 2
8 2
See details in SqlFiddle sample below.
SqlFiddle Sample
The other approach is to calculate hash function for every subset data:
;with a as (
select distinct sID from [Table]
)
select sID,
hashbytes('sha1', (
select cast(wordID as varchar(10)) + '|'
from [Table]
where sID = a.sID
order by wordID
for xml path('')))
from a
Then subsets can be grouped based on hash value.
SqlFiddle Sample
The last one took less than a minute on my machine for a test data of about 10 million rows (20k sID values up to 1k wordID each). Also you can optimize it by excluding sIDs having no wordID count matches to any other.
Lets say I have table with ID int, VALUE string:
ID | VALUE
1 abc
2 abc
3 def
4 abc
5 abc
6 abc
If I do select value, count(*) group by value I should get
VALUE | COUNT
abc 5
def 1
Now the tricky part, if there is count == 1 I need to get that ID from first table. Should I be using CTE? creating resultset where I will add ID string == null and run update b.ID = a.ID where count == 1 ?
Or is there another easier way?
EDIT:
I want to have result table like this:
ID VALUE count
null abc 5
3 def 1
If your ID values are unique, you can simply check to see if the max(id) = min(id). If so, then use either one, otherwise you can return null. Like this:
Select Case When Min(id) = Max(id) Then Min(id) Else Null End As Id,
Value, Count(*) As [Count]
From YourTable
Group By Value
Since you are already performing an aggregate, including the MIN and Max function is not likely to take any extra (noticeable) time. I encourage you to give this a try.
The way I would do it would indeed be a CTE:
using #group AS (SELECT value, Count(*) as count from MyTable GROUP BY value HAVING count = 1)
SELECT MyTable.ID, #group.value, #group.count from MyTable
JOIN #group ON #group.value = MyTable.value
When using group by, after the group by statement you can use a having clause.
So
SELECT [ID]
FROM table
GROUP BY [VALUE]
HAVING COUNT(*) = 1
Edit: with regards to your edited question: this uses some fun joins and unions
CREATE TABLE #table
(ID int IDENTITY,
VALUE varchar(3))
INSERT INTO #table (VALUE)
VALUES('abc'),('abc'),('def'),('abc'),('abc'),('abc')
SELECT * FROM (
SELECT Null as ID,VALUE, COUNT(*) as [Count]
FROM #table
GROUP BY VALUE
HAVING COUNT(*) > 1
UNION ALL
SELECT t.ID,t.VALUE,p.Count FROM
#table t
JOIN
(SELECT VALUE, COUNT(*) as [Count]
FROM #table
GROUP BY VALUE
HAVING COUNT(*) = 1) p
ON t.VALUE=p.VALUE
) a
DROP TABLE #table
maybe not the most efficient but something like this works:
SELECT MAX(Id) as ID,Value FROM Table WHERE COUNT(*) = 1 GROUP BY Value