Using CTE instead of Cursor - tsql

I have the following table structure.
I just want to update SubId to all the rows where it is null and where the RawLineNumber is ascending by 1 and also the SeqNumber ascending by 1.
RawlineNumber Claimid SubId SeqNumber
1 6000 A100 1
2 6000 NULL 2
3 6000 NULL 3
10 6000 A200 1
11 6000 NULL 2
25 6000 A300 1
26 6000 NULL 2
27 6000 NULL 3
I want to update
SubId of RawLineNumber 2 and 3 with A100,
SubId of RawLineNumber 11 with A200,
SubId of RawLineNumber 26 and 27 with A300.
I have a cursor which does the job but can I have a CTE to take care of it ?

UPDATE m
SET subid = q.subid
FROM mytable m
CROSS APPLY
(
SELECT TOP 1 subid
FROM mytable mi
WHERE mi.rawLineNumber < m.rawLineNumber
AND mi.subid IS NOT NULL
ORDER BY
rawLineNumber DESC
) q
WHERE m.subid IS NULL

Since a recusive solution was requested, I decided to write one. Also it works for gaps in Seqnumbers and RawlineNumber
declare #t table (RawlineNumber int, Claimid int, SubId varchar(5), SeqNumber int)
insert #t values(1, 6000, 'A100', 1)
insert #t values(2, 6000, NULL, 2)
insert #t values(3, 6000, NULL, 3)
insert #t values(10, 6000, 'A200', 1)
insert #t values(11, 6000, NULL, 2)
insert #t values(25, 6000, 'A300', 1)
insert #t values(26, 6000, NULL, 2)
insert #t values(27, 6000, NULL, 3)
;with cte as
(
select Rawlinenumber, SeqNumber, SubId
from #t where SubId is not null and SeqNumber = 1
union all
select t.Rawlinenumber, t.SeqNumber, c.SubId
from cte c
join
#t t
on c.Rawlinenumber + 1 = t.Rawlinenumber
and c.SeqNumber + 1 = t.SeqNumber
where t.SubId is null and t.SeqNumber > 1
)
update t
set SubId = c.SubId
from #t t join cte c
on c.Rawlinenumber = t.Rawlinenumber
where t.SeqNumber > 1
select * from #t

A not-so simple SQL script should achieve what you want:
update my_table t1 set t1.subid =
(select t2.subid from my_table t2
where t2.rawlinenumber < t1.rawlinenumber
and t2.seqnumber = 1
and t2.rawlinenumber = (
select max(t3.rawlinenumber)
from my_table t3
where t3.seq_number = 1
and t3.rawlinenumber <= t2.rawlinenumber)
where t1.subid is null;
The inner subselect (T3) gives us the last row having seqnumber = 1 before the current line,
the outer subselect gives us the SubID for this row (using windowing functions would be more efficient, but since you didn't mention a specific RDBMS, I stick with this :-) )

Related

SQL deduct some numbers from rows conditionally

I have a table (showing temp table)
CREATE TABLE #TempTable
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
This table has this sample data:
INSERT INTO #TempTable (CustID, RODate, Operation)
VALUES (10, DATEADD(MONT, -2, GETDATE()), 2),
(10, DATEADD(MONT, -1, GETDATE()), 3),
(10, GETDATE(), 5)
So table have below data
TempID CustID RODate Operation
-----------------------------------------------------------
1 10 2019-03-17 2
2 10 2019-04-17 3
3 10 2019-05-17 5
Requirement is I will get one integer variable in parameter which is #noOfOperation, let's say its value is 10
I will also get no of months in parameter, let's say it's 3
I have to query the table to return data for last 3 months only (excluding current month (date asc)
Then I have to deduct #noOfOperation from the table and update.
Deduction will be based on availability in operation column.
For example: in this case first we will deduct from 2019-03-17
10 - 2 = 8 (operation column for this row becomes 0)
Next we will deduct from 2019-04-17
8 - 3 = 5 (operation column for this row becomes 0)
Similarly for 2019-05-17
5-5 = 0 (operation column for this row becomes 0)
I have to check if #noOfOperation is less than or more than the number of operation of individual months then do the above accordingly
Check this out, The idea is to use the accumulated operation and then subtracted the needed value as below:
declare #TempTable TABLE
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
INSERT INTO #TempTable (CustID, RODate, Operation)
VALUES (10, DATEADD(MONTH, -2, GETDATE()), 5),
(10, DATEADD(MONTH, -1, GETDATE()), 6),
(10, GETDATE(), 7)
select * from #TempTable
Declare #noOfOperation int =8
Declare #noOfMonths int =3
Declare #StartDate date,#DateEnd date,#avNoOfOperation int
--get the range you are working for
select
#StartDate=cast(cast(year(dateadd(Month,-#noOfMonths+1,getdate())) as varchar(4))+'-'+cast(Month(dateadd(Month,-#noOfMonths+1,getdate())) as varchar(2))+'-01' as date)
,#DateEnd=dateadd(day,-1,cast(cast(year(getdate()) as varchar(4))+'-'+cast(Month(getdate()) as varchar(2))+'-01' as date)) ;
--get the total of avaliable operation, for validating before subtracting
select #avNoOfOperation=sum(t.Operation) from #TempTable t where cast(t.RODate as date) between #StartDate and #DateEnd
--review the variables if needed
--select #StartDate [#StartDate],#DateEnd [#DateEnd],#avNoOfOperation [#avNoOfOperation]
if(#avNoOfOperation>=#noOfOperation and #noOfOperation>0)
begin
--only here we can start subtracting
;with DataIncluded as (
select *,#noOfOperation [noOfOperation],sum(Operation) over (order by RODate) [AcOp] from #TempTable t where cast(t.RODate as date) between #StartDate and #DateEnd
),SubtractDataSet as (
select *,AcOp-#noOfOperation [leftOp],
case when (AcOp-#noOfOperation)<=0 then 0 else
case when (AcOp-#noOfOperation)<Operation then AcOp-#noOfOperation else Operation end end [UpOp]
from DataIncluded
)
Update #TempTable
set A.Operation=B.[UpOp]
From #TempTable A
inner join SubtractDataSet B on A.TempID=B.TempID
end
select * from #TempTable
Note: Im not using the current month so my output is different then the one you suggested. if the inputs was as follow:
TempID CustID RODate Operation
1 10 2019-03-17 5
2 10 2019-04-17 6
3 10 2019-05-17 7
The output would be :-
TempID CustID RODate Operation
1 10 2019-03-17 0
2 10 2019-04-17 3
3 10 2019-05-17 7
--Change the value of #OperationsToBeDeducted, to see different results
declare #OperationsToBeDeducted int
declare #OperationsRemaining int
declare #RODate date
set #OperationsToBeDeducted = 4
declare #TempID int
set #TempID = 1
DROP TABLE IF EXISTS #TempOperation
create table #TempOperation
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
insert into #TempOperation (CustID,RODate,Operation)
values
(10,DATEADD(month, -3, getdate()),2),
(10,DATEADD(month, -2, getdate()), 2),
(10,DATEADD(month, -1, getdate()),3)
DROP TABLE IF EXISTS #TempOperation2
create table #TempOperation2
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
insert into #TempOperation2 select CustID,RODate, Operation from #TempOperation
select * from #TempOperation2 order by RODate asc
declare #maxID int;
select #maxID = max(TempID) from #TempOperation2
while (#TempID <= #maxID)
begin
set #OperationsRemaining = 0
select #OperationsRemaining = Operation, #RODate = RODate from #TempOperation2 where TempID = #TempID
if(#OperationsToBeDeducted is not null and #OperationsRemaining is not null and
#OperationsRemaining > 0 and #OperationsRemaining > #OperationsToBeDeducted)
begin
update #TempOperation set Operation = #OperationsRemaining - #OperationsToBeDeducted where TempID = #TempID
set #OperationsToBeDeducted = 0
end
else if(#OperationsToBeDeducted is not null and #OperationsRemaining is not null and
#OperationsRemaining > 0 and #OperationsRemaining <= #OperationsToBeDeducted)
begin
set #OperationsToBeDeducted = #OperationsToBeDeducted - #OperationsRemaining
update #TempOperation set Operation = #OperationsRemaining - #OperationsRemaining where TempID = #TempID
end
SET #TempID = #TempID + 1
end
select * from #TempOperation order by RODate asc
DROP TABLE #TempOperation
DROP TABLE #TempOperation2

sql compare columns to get result

I have the following issue. I have products with 3 different states. Parent, Child and products which are orphans. I am setting Parents as 1, Children as 2 and Orphans as 0. I am struggling to get the Orphan to set to 0. I realise that counting the amount of Parent PLU's is where I am going wrong but I do not know how to resolve this issue. Any help would be appreciated. (As you maybe able to tell, I am a noob and constructive criticism would be appreciated)
Kind Regards,
Jason.
Picture of results from query
declare #OrderID int = 1635
declare #Store char(3) = '001'
declare #SortedBy smallint = 2
DECLARE #tbl TABLE (DetailID int, OrderID int, PLU nvarchar(35), ParentPLU nvarchar(35))
INSERT INTO #tbl (DetailID, OrderID, PLU, ParentPLU)
SELECT DetailID, OrderDetails.OrderID, OrderDetails.PLU, OrderDetails.ParentPLU
FROM OrderDetails
INNER JOIN PLU
ON PLU.PLU = OrderDetails.PLU
WHERE OrderDetails.OrderID = #OrderID
AND OrderDetails.OrderStore = #Store
SELECT DetailID, OrderID, PLU, ParentPLU,
CASE WHEN ( SELECT COUNT(DISTINCT ParentPLU)
FROM #tbl
WHERE ParentPLU IN (SELECT PLU FROM #tbl)
) > 0 AND ParentPLU = '' THEN 1
WHEN ( SELECT COUNT(DISTINCT ParentPLU)
FROM #tbl
WHERE ParentPLU IN (SELECT PLU FROM #tbl)
) > 0 THEN 2
ELSE
0
END AS ParentChild,
ROW_NUMBER() OVER (ORDER BY
CASE WHEN #SortedBy = 1 THEN OrderID END ASC,
CASE WHEN #SortedBy = 2 THEN DetailID END ASC
) AS ID
FROM #tbl
You can use coalesce to get your desired result. First subquery checks for parent state, second for children. If both are null, then it is orphan
select
DetailID, OrderID, PLU, ParentPLU
, coalesce((
select
distinct 1
from
#tbl b
where
a.PLU = b.ParentPlu
)
, (
select
distinct 2
from
#tbl b
where
b.PLU = a.ParentPlu
), 0)
from
#tbl a

Remove Duplicates

I have a table like below:
SuppID AreaID SuppNo SupName SupPrice
------------------------------------------------
1 3 526 ANC 100
1 3 985 JTT 200
3 4 100 HIK 300
In the above table, for same SuppID(1) and same AreaID(3), different SuppNo are there (526 & 985) in two different rows.
In this scenario , I'd like to make those two rows into a single row with SuppNo field as blank.
Also my output result should display rows with all the columns.
Any Help?
This should get you started:
DECLARE #TABLE TABLE (SuppID INT, AreaID INT, SuppNo VARCHAR(5), SupName VARCHAR(5), SupPrice INT)
INSERT INTO #TABLE
SELECT 1,3,'526','ANC',100 UNION
SELECT 1,3,'985','JTT',200 UNION
SELECT 3,4,'100','HIK',300
-- select data before updates
SELECT * FROM #TABLE
-- add a row count by AreaID/SuppID
;WITH T1 AS
(
SELECT *
,SUM(1) OVER(PARTITION BY AREAID,SUPPID) AS ROWCNT
FROM #TABLE
)
-- set the SuppNo blank on rows that have more than 1 match
UPDATE T1 SET SuppNo='' WHERE ROWCNT>1
-- add a row # by AreaID/SuppID
;WITH T2 AS
(
SELECT *
,ROW_NUMBER() OVER(PARTITION BY AREAID,SUPPID ORDER BY AREAID,SUPPID) AS ROWID
FROM #TABLE
)
-- delete duplicate rows
DELETE
FROM T2
WHERE ROWID>1
-- select data after updates
SELECT * FROM #TABLE

reuse table data in round robin manner

Let us say I have some data I would like to repeat N times. A naive approach would be this:
IF OBJECT_ID('dbo.Data', 'U') IS NOT NULL
DROP TABLE dbo.Data
CREATE TABLE Data
(
DataId INT NOT NULL PRIMARY KEY,
DataValue NVARCHAR(MAX) NOT NULL
)
INSERT INTO Data (DataId, DataValue)
SELECT 1, 'Value1' UNION ALL
SELECT 2, 'Value2' UNION ALL
SELECT 3, 'Value3' UNION ALL
SELECT 4, 'Value4' UNION ALL
SELECT 5, 'Value5'
DECLARE #RowsRequired INT
DECLARE #Counter INT
DECLARE #NumberOfRows INT
SET #RowsRequired = 22
IF OBJECT_ID('tempdb..#TempData') IS NOT NULL DROP TABLE #TempData
CREATE TABLE #TempData
(
Id INT IDENTITY(1,1),
DataValue NVARCHAR(MAX)
)
SELECT #NumberOfRows = COUNT(*) FROM Data
SET #Counter = 1
WHILE #RowsRequired > 0
BEGIN
INSERT INTO #TempData
SELECT DataValue FROM Data WHERE DataId = #Counter
SET #Counter = #Counter + 1
SET #RowsRequired = #RowsRequired - 1
IF(#Counter > #NumberOfRows)
BEGIN
SET #Counter = 1
END
END
SELECT * FROM #TempData
Here #RowsRequired determines how many rows are required. Could this be rephrased in a set based form? Thanks.
Here is a SQLFiddle with the code.
Try this instead:
DECLARE #RowsRequired INT = 22
;WITH CTE AS
(
SELECT DataId, DataValue, ROW_NUMBER() over (PARTITION BY DataId ORDER BY DataId) sort
FROM DATA
CROSS JOIN
(
SELECT TOP (#RowsRequired) 0 d
FROM master..spt_values
) d
)
SELECT TOP (#RowsRequired) ROW_NUMBER() over (order by sort), DataValue
FROM CTE
ORDER BY sort, 1
I tried this and worked for me.
declare #requiredrows int
set #requiredrows = 22;
declare #foreachrow int
select #foreachrow = #requiredrows / Count(*) from Data;
select top (#requiredrows) * from
(
select *, ROW_NUMBER() over(partition by dataId order by number) rno
from Data
Cross Join master..spt_values
) A
where rno <= #foreachrow + 1
Hope it will help.

Find exact FK matches

Have a very large table (over 200 million rows)
sID int, wordID int (PK sID, wordID)
Want to find the sID's that have the exact same wordID's (and no extras)
For a sID with over 100 wordID the chance of an exact match goes down so willing to limit it to 100
(but would like to go to 1000)
If this was school and sID were classes and wordID were students.
Then I want to find classes that have the exact same students.
sID, wordID
1, 1
1, 2
1, 3
2, 2
2, 3
3, 1
3, 4
5, 1
5, 2
6, 2
6, 3
7, 1
7, 2
8, 1
8, 1
sID 6 and 2 have the exact same wordID's
sID 5, 7, and 8 have the exact same wordID's
This is what I have so far
I would like to eliminate the two delete #temp3_sID1_sID2 and take care of that in the insert above
But I will try any ideas
It is not like you can easily create a table with 200 million rows to test with
drop table #temp_sID_wordCount
drop table #temp_count_wordID_sID
drop table #temp3_wordID_sID_forThatCount
drop table #temp3_sID1_sID2
drop table #temp3_sID1_sID2_keep
create table #temp_sID_wordCount (sID int primary key, ccount int not null)
create table #temp_count_wordID_sID (ccount int not null, wordID int not null, sID int not null, primary key (ccount, wordID, sID))
create table #temp3_wordID_sID_forThatCount (wordID int not null, sID int not null, primary key(wordID, sID))
create table #temp3_sID1_sID2_keep (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
create table #temp3_sID1_sID2 (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
insert into #temp_sID_wordCount
select sID, count(*) as ccount
FROM [FTSindexWordOnce] with (nolock)
group by sID
order by sID;
select count(*) from #temp_sID_wordCount where ccount <= 100; -- 701,966
truncate table #temp_count_wordID_sID
insert into #temp_count_wordID_sID
select #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID
from #temp_sID_wordCount
join [FTSindexWordOnce] with (nolock)
on [FTSindexWordOnce].sID = #temp_sID_wordCount.sID
and ccount >= 1 and ccount <= 10
order by #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID;
select count(*) from #temp_sID_wordCount; -- 34,860,090
truncate table #temp3_sID1_sID2_keep
declare cur cursor for
select top 10 ccount from #temp_count_wordID_sID group by ccount order by ccount
open cur
declare #count int, #sIDcur int
fetch next from cur into #count
while (##FETCH_STATUS = 0)
begin
--print (#count)
--select count(*), #count from #temp_sID_wordCount where #temp_sID_wordCount.ccount = #count
truncate table #temp3_wordID_sID_forThatCount
truncate table #temp3_sID1_sID2
-- wordID and sID for that unique word count
-- they can only be exact if they have the same word count
insert into #temp3_wordID_sID_forThatCount
select #temp_count_wordID_sID.wordID
, #temp_count_wordID_sID.sID
from #temp_count_wordID_sID
where #temp_count_wordID_sID.ccount = #count
order by #temp_count_wordID_sID.wordID, #temp_count_wordID_sID.sID
-- select count(*) from #temp3_wordID_sID_forThatCount
-- this has some duplicates
-- sID1 is the group
insert into #temp3_sID1_sID2
select w1.sID, w2.sID
from #temp3_wordID_sID_forThatCount as w1 with (nolock)
join #temp3_wordID_sID_forThatCount as w2 with (nolock)
on w1.wordID = w2.wordID
and w1.sID <= w2.sID
group by w1.sID, w2.sID
having count(*) = #count
order by w1.sID, w2.sID
-- get rid of the goups of 1
delete #temp3_sID1_sID2
where sID1 in (select sID1 from #temp3_sID1_sID2 group by sID1 having count(*) = 1)
-- get rid of the double dips
delete #temp3_sID1_sID2
where #temp3_sID1_sID2.sID1 in
(select distinct s1del.sID1 -- these are the double dips
from #temp3_sID1_sID2 as s1base with (nolock)
join #temp3_sID1_sID2 as s1del with (nolock)
on s1del.sID1 > s1base.sID1
and s1Del.sID1 = s1base.sID2)
insert into #temp3_sID1_sID2_keep
select #temp3_sID1_sID2.sID1
, #temp3_sID1_sID2.sID2
from #temp3_sID1_sID2 with (nolock)
order by #temp3_sID1_sID2.sID1, #temp3_sID1_sID2.sID2
fetch next from cur into #count
end
close cur
deallocate cur
select *
FROM #temp3_sID1_sID2_keep with (nolock)
order by 1,2
So, as I see, the task is to find equal subsets.
First we can find pairs of equal subsets:
;with tmp1 as (select sID, cnt = count(wordID) from [Table] group by sID)
select s1.sID, s2.sID
from tmp1 s1
cross join tmp1 s2
cross apply (
select count(1)
from [Table] d1
join [Table] d2 on d2.wordID = d1.wordID
where d1.sID = s1.sID and d2.sID = s2.sID
) c(cnt)
where s1.cnt = s2.cnt
and s1.sID > s2.sID
and s1.cnt = c.cnt
Output is:
sID sID
----------- -----------
6 2
7 5
8 5
8 7
And then pairs can be combined into groups, if necessary:
sID gNum
----------- -----------
2 1
6 1
5 2
7 2
8 2
See details in SqlFiddle sample below.
SqlFiddle Sample
The other approach is to calculate hash function for every subset data:
;with a as (
select distinct sID from [Table]
)
select sID,
hashbytes('sha1', (
select cast(wordID as varchar(10)) + '|'
from [Table]
where sID = a.sID
order by wordID
for xml path('')))
from a
Then subsets can be grouped based on hash value.
SqlFiddle Sample
The last one took less than a minute on my machine for a test data of about 10 million rows (20k sID values up to 1k wordID each). Also you can optimize it by excluding sIDs having no wordID count matches to any other.