SQL deduct some numbers from rows conditionally - tsql

I have a table (showing temp table)
CREATE TABLE #TempTable
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
This table has this sample data:
INSERT INTO #TempTable (CustID, RODate, Operation)
VALUES (10, DATEADD(MONT, -2, GETDATE()), 2),
(10, DATEADD(MONT, -1, GETDATE()), 3),
(10, GETDATE(), 5)
So table have below data
TempID CustID RODate Operation
-----------------------------------------------------------
1 10 2019-03-17 2
2 10 2019-04-17 3
3 10 2019-05-17 5
Requirement is I will get one integer variable in parameter which is #noOfOperation, let's say its value is 10
I will also get no of months in parameter, let's say it's 3
I have to query the table to return data for last 3 months only (excluding current month (date asc)
Then I have to deduct #noOfOperation from the table and update.
Deduction will be based on availability in operation column.
For example: in this case first we will deduct from 2019-03-17
10 - 2 = 8 (operation column for this row becomes 0)
Next we will deduct from 2019-04-17
8 - 3 = 5 (operation column for this row becomes 0)
Similarly for 2019-05-17
5-5 = 0 (operation column for this row becomes 0)
I have to check if #noOfOperation is less than or more than the number of operation of individual months then do the above accordingly

Check this out, The idea is to use the accumulated operation and then subtracted the needed value as below:
declare #TempTable TABLE
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
INSERT INTO #TempTable (CustID, RODate, Operation)
VALUES (10, DATEADD(MONTH, -2, GETDATE()), 5),
(10, DATEADD(MONTH, -1, GETDATE()), 6),
(10, GETDATE(), 7)
select * from #TempTable
Declare #noOfOperation int =8
Declare #noOfMonths int =3
Declare #StartDate date,#DateEnd date,#avNoOfOperation int
--get the range you are working for
select
#StartDate=cast(cast(year(dateadd(Month,-#noOfMonths+1,getdate())) as varchar(4))+'-'+cast(Month(dateadd(Month,-#noOfMonths+1,getdate())) as varchar(2))+'-01' as date)
,#DateEnd=dateadd(day,-1,cast(cast(year(getdate()) as varchar(4))+'-'+cast(Month(getdate()) as varchar(2))+'-01' as date)) ;
--get the total of avaliable operation, for validating before subtracting
select #avNoOfOperation=sum(t.Operation) from #TempTable t where cast(t.RODate as date) between #StartDate and #DateEnd
--review the variables if needed
--select #StartDate [#StartDate],#DateEnd [#DateEnd],#avNoOfOperation [#avNoOfOperation]
if(#avNoOfOperation>=#noOfOperation and #noOfOperation>0)
begin
--only here we can start subtracting
;with DataIncluded as (
select *,#noOfOperation [noOfOperation],sum(Operation) over (order by RODate) [AcOp] from #TempTable t where cast(t.RODate as date) between #StartDate and #DateEnd
),SubtractDataSet as (
select *,AcOp-#noOfOperation [leftOp],
case when (AcOp-#noOfOperation)<=0 then 0 else
case when (AcOp-#noOfOperation)<Operation then AcOp-#noOfOperation else Operation end end [UpOp]
from DataIncluded
)
Update #TempTable
set A.Operation=B.[UpOp]
From #TempTable A
inner join SubtractDataSet B on A.TempID=B.TempID
end
select * from #TempTable
Note: Im not using the current month so my output is different then the one you suggested. if the inputs was as follow:
TempID CustID RODate Operation
1 10 2019-03-17 5
2 10 2019-04-17 6
3 10 2019-05-17 7
The output would be :-
TempID CustID RODate Operation
1 10 2019-03-17 0
2 10 2019-04-17 3
3 10 2019-05-17 7

--Change the value of #OperationsToBeDeducted, to see different results
declare #OperationsToBeDeducted int
declare #OperationsRemaining int
declare #RODate date
set #OperationsToBeDeducted = 4
declare #TempID int
set #TempID = 1
DROP TABLE IF EXISTS #TempOperation
create table #TempOperation
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
insert into #TempOperation (CustID,RODate,Operation)
values
(10,DATEADD(month, -3, getdate()),2),
(10,DATEADD(month, -2, getdate()), 2),
(10,DATEADD(month, -1, getdate()),3)
DROP TABLE IF EXISTS #TempOperation2
create table #TempOperation2
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
insert into #TempOperation2 select CustID,RODate, Operation from #TempOperation
select * from #TempOperation2 order by RODate asc
declare #maxID int;
select #maxID = max(TempID) from #TempOperation2
while (#TempID <= #maxID)
begin
set #OperationsRemaining = 0
select #OperationsRemaining = Operation, #RODate = RODate from #TempOperation2 where TempID = #TempID
if(#OperationsToBeDeducted is not null and #OperationsRemaining is not null and
#OperationsRemaining > 0 and #OperationsRemaining > #OperationsToBeDeducted)
begin
update #TempOperation set Operation = #OperationsRemaining - #OperationsToBeDeducted where TempID = #TempID
set #OperationsToBeDeducted = 0
end
else if(#OperationsToBeDeducted is not null and #OperationsRemaining is not null and
#OperationsRemaining > 0 and #OperationsRemaining <= #OperationsToBeDeducted)
begin
set #OperationsToBeDeducted = #OperationsToBeDeducted - #OperationsRemaining
update #TempOperation set Operation = #OperationsRemaining - #OperationsRemaining where TempID = #TempID
end
SET #TempID = #TempID + 1
end
select * from #TempOperation order by RODate asc
DROP TABLE #TempOperation
DROP TABLE #TempOperation2

Related

POSTGRES INSERT/UPDATE ON CONFLICT using WITH CTE

I have a table like below. I am trying to merge into this table based on the value in a CTE. But when I try to update the table when there is a conflict, it cannot get the value in CTE
CREATE TABLE IF NOT EXISTS master_config_details
(
master_config_id INT NOT NULL,
account_id INT NOT NULL,
date_value TIMESTAMP(3) NULL,
number_value BIGINT NULL,
string_value VARCHAR(50) NULL,
row_status SMALLINT NOT NULL,
created_date TIMESTAMP(3) NOT NULL,
modified_date TIMESTAMP(3) NULL,
CONSTRAINT pk_master_config_details PRIMARY KEY (master_config_id, account_id, row_status)
);
INSERT INTO master_config_details VALUES (
1, 11, NULL,100,NULL, 0, '2020-11-18 12:01:18', '2020-11-18 12:02:31');
select * from master_config_details;`
Now using a cte I want to insert/update records in this table. Below is the code I am using to do the same. When the record already exist in the table I want to update the table based on the data_type_id value in the cte (cte_input_data.data_type_id ) but it fails with the error.
SQL Error [42703]: ERROR: column excluded.data_type_id does not exist
what it should achieve is
if cte_input_data.data_type_id = 1 update master_config_details set date_value = cte.value
if cte_input_data.data_type_id = 2 update master_config_details set number_value = cte.value
if cte_input_data.data_type_id = 3 update master_config_details set string_value = cte.value
The below code should do an update to the table master_config_details.number_value = 22 as there is already a record in that combination (master_config_id, account_id, row_status) which is (1,11,1) ( run this to see the record select * from master_config_details;) but its throwing an error instead
SQL Error [42703]: ERROR: column excluded.data_type_id does not exist
WITH cte_input_data AS (
select
1 AS master_config_id
,11 AS account_id
,2 AS data_type_id
,'22' AS value
,1 AS row_status)
INSERT INTO master_config_details
SELECT
cte.master_config_id
,cte.account_id
,CASE WHEN cte.data_type_id = 1 THEN cte.value::timestamp(3) ELSE NULL END AS date_time_value
,CASE WHEN cte.data_type_id = 2 THEN cte.value::integer ELSE NULL END AS number_value
,CASE WHEN cte.data_type_id = 3 THEN cte.value ELSE NULL END AS string_value
,1
,NOW() AT TIME ZONE 'utc'
,NOW() AT TIME ZONE 'utc'
FROM cte_input_data cte
ON CONFLICT (master_config_id,account_id,row_status)
DO UPDATE SET
date_value = CASE WHEN excluded.data_type_id = 1 THEN excluded.date_time_value::timestamp(3) ELSE NULL END
,number_value = CASE WHEN excluded.data_type_id = 2 THEN excluded.number_value::integer ELSE NULL END
,string_value = CASE WHEN excluded.data_type_id = 3 THEN excluded.string_value ELSE NULL END
,modified_date = NOW() AT TIME ZONE 'utc';
Special excluded table is used to reference values originally proposed for insertion.
So you’re getting this error because this column doesn’t exist in your target table, and so in special excluded table. It exists only in your cte.
As a workaround you can select it from cte using nested select in on conflict statement.

How to fill column basing on two other columns

I have table LessonHour with empty Number column.
TABLE [dbo].[LessonHour]
(
[Id] [uniqueidentifier] NOT NULL,
[StartTime] [time](7) NOT NULL,
[EndTime] [time](7) NOT NULL,
[SchoolId] [uniqueidentifier] NOT NULL,
[Number] [int] NULL
)
How can I fill up the table with Number for each LessonHour so it would be the number of lesson hour in order?
The LessonHours cannot cross each other. Every school has defined its own lesson hour schema.
Example set of data
http://pastebin.com/efWCtUbv
What'd I do:
Order by SchoolId and StartTime
Use Cursor to insert into row next number, starting from 1 every time the SchoolId changes.
Edit:
Solution with cursor
select -- top 20
LH.[Id],
[StartTime],
[EndTime],
[SchoolId]
into #LH
from
LessonHour as LH
join RowStatus as RS on LH.RowStatusId = RS.Id
where
RS.IsActive = 1
select * from #LH order by SchoolId, StartTime
declare #id uniqueidentifier, #st time(7), #et time(7), #sid uniqueidentifier
declare #prev_sid uniqueidentifier = NEWID()
declare #i int = 1
declare cur scroll cursor for
select * from #LH order by SchoolId, StartTime
open cur;
fetch next from cur into #id, #st, #et, #sid
while ##FETCH_STATUS = 0
begin
--print #prev_sid
if #sid <> #prev_sid
begin
set #i = 1
end
update LessonHour set Number = #i where Id = #id
print #i
set #i = #i + 1
set #prev_sid = #sid
fetch next from cur into #id, #st, #et, #sid
end;
close cur;
deallocate cur;
drop table #LH
This is the result I was after http://pastebin.com/iZ8cnA6w
Merging the information from the StackOverflow questions SQL Update with row_number() and
How do I use ROW_NUMBER()?:
with cte as (
select number, ROW_NUMBER() OVER(partition by schoolid order by starttime asc) as r from lessonhour
)
update cte
set number = r
Would this work
CREATE TABLE [dbo].[LessonHour]
(
[Id] [uniqueidentifier] NOT NULL,
[StartTime] [time](7) NOT NULL,
[EndTime] [time](7) NOT NULL,
[SchoolId] [uniqueidentifier] NOT NULL,
[Number] AS DATEDIFF(hour,[StartTime],[EndTime])
)
So if I understand the question correctly you require a calculated column which takes in the values of [StartTime] and [EndTime] and returns the number of hours for that lesson as an int. The above table definition should do the trick.

reuse table data in round robin manner

Let us say I have some data I would like to repeat N times. A naive approach would be this:
IF OBJECT_ID('dbo.Data', 'U') IS NOT NULL
DROP TABLE dbo.Data
CREATE TABLE Data
(
DataId INT NOT NULL PRIMARY KEY,
DataValue NVARCHAR(MAX) NOT NULL
)
INSERT INTO Data (DataId, DataValue)
SELECT 1, 'Value1' UNION ALL
SELECT 2, 'Value2' UNION ALL
SELECT 3, 'Value3' UNION ALL
SELECT 4, 'Value4' UNION ALL
SELECT 5, 'Value5'
DECLARE #RowsRequired INT
DECLARE #Counter INT
DECLARE #NumberOfRows INT
SET #RowsRequired = 22
IF OBJECT_ID('tempdb..#TempData') IS NOT NULL DROP TABLE #TempData
CREATE TABLE #TempData
(
Id INT IDENTITY(1,1),
DataValue NVARCHAR(MAX)
)
SELECT #NumberOfRows = COUNT(*) FROM Data
SET #Counter = 1
WHILE #RowsRequired > 0
BEGIN
INSERT INTO #TempData
SELECT DataValue FROM Data WHERE DataId = #Counter
SET #Counter = #Counter + 1
SET #RowsRequired = #RowsRequired - 1
IF(#Counter > #NumberOfRows)
BEGIN
SET #Counter = 1
END
END
SELECT * FROM #TempData
Here #RowsRequired determines how many rows are required. Could this be rephrased in a set based form? Thanks.
Here is a SQLFiddle with the code.
Try this instead:
DECLARE #RowsRequired INT = 22
;WITH CTE AS
(
SELECT DataId, DataValue, ROW_NUMBER() over (PARTITION BY DataId ORDER BY DataId) sort
FROM DATA
CROSS JOIN
(
SELECT TOP (#RowsRequired) 0 d
FROM master..spt_values
) d
)
SELECT TOP (#RowsRequired) ROW_NUMBER() over (order by sort), DataValue
FROM CTE
ORDER BY sort, 1
I tried this and worked for me.
declare #requiredrows int
set #requiredrows = 22;
declare #foreachrow int
select #foreachrow = #requiredrows / Count(*) from Data;
select top (#requiredrows) * from
(
select *, ROW_NUMBER() over(partition by dataId order by number) rno
from Data
Cross Join master..spt_values
) A
where rno <= #foreachrow + 1
Hope it will help.

Find exact FK matches

Have a very large table (over 200 million rows)
sID int, wordID int (PK sID, wordID)
Want to find the sID's that have the exact same wordID's (and no extras)
For a sID with over 100 wordID the chance of an exact match goes down so willing to limit it to 100
(but would like to go to 1000)
If this was school and sID were classes and wordID were students.
Then I want to find classes that have the exact same students.
sID, wordID
1, 1
1, 2
1, 3
2, 2
2, 3
3, 1
3, 4
5, 1
5, 2
6, 2
6, 3
7, 1
7, 2
8, 1
8, 1
sID 6 and 2 have the exact same wordID's
sID 5, 7, and 8 have the exact same wordID's
This is what I have so far
I would like to eliminate the two delete #temp3_sID1_sID2 and take care of that in the insert above
But I will try any ideas
It is not like you can easily create a table with 200 million rows to test with
drop table #temp_sID_wordCount
drop table #temp_count_wordID_sID
drop table #temp3_wordID_sID_forThatCount
drop table #temp3_sID1_sID2
drop table #temp3_sID1_sID2_keep
create table #temp_sID_wordCount (sID int primary key, ccount int not null)
create table #temp_count_wordID_sID (ccount int not null, wordID int not null, sID int not null, primary key (ccount, wordID, sID))
create table #temp3_wordID_sID_forThatCount (wordID int not null, sID int not null, primary key(wordID, sID))
create table #temp3_sID1_sID2_keep (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
create table #temp3_sID1_sID2 (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
insert into #temp_sID_wordCount
select sID, count(*) as ccount
FROM [FTSindexWordOnce] with (nolock)
group by sID
order by sID;
select count(*) from #temp_sID_wordCount where ccount <= 100; -- 701,966
truncate table #temp_count_wordID_sID
insert into #temp_count_wordID_sID
select #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID
from #temp_sID_wordCount
join [FTSindexWordOnce] with (nolock)
on [FTSindexWordOnce].sID = #temp_sID_wordCount.sID
and ccount >= 1 and ccount <= 10
order by #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID;
select count(*) from #temp_sID_wordCount; -- 34,860,090
truncate table #temp3_sID1_sID2_keep
declare cur cursor for
select top 10 ccount from #temp_count_wordID_sID group by ccount order by ccount
open cur
declare #count int, #sIDcur int
fetch next from cur into #count
while (##FETCH_STATUS = 0)
begin
--print (#count)
--select count(*), #count from #temp_sID_wordCount where #temp_sID_wordCount.ccount = #count
truncate table #temp3_wordID_sID_forThatCount
truncate table #temp3_sID1_sID2
-- wordID and sID for that unique word count
-- they can only be exact if they have the same word count
insert into #temp3_wordID_sID_forThatCount
select #temp_count_wordID_sID.wordID
, #temp_count_wordID_sID.sID
from #temp_count_wordID_sID
where #temp_count_wordID_sID.ccount = #count
order by #temp_count_wordID_sID.wordID, #temp_count_wordID_sID.sID
-- select count(*) from #temp3_wordID_sID_forThatCount
-- this has some duplicates
-- sID1 is the group
insert into #temp3_sID1_sID2
select w1.sID, w2.sID
from #temp3_wordID_sID_forThatCount as w1 with (nolock)
join #temp3_wordID_sID_forThatCount as w2 with (nolock)
on w1.wordID = w2.wordID
and w1.sID <= w2.sID
group by w1.sID, w2.sID
having count(*) = #count
order by w1.sID, w2.sID
-- get rid of the goups of 1
delete #temp3_sID1_sID2
where sID1 in (select sID1 from #temp3_sID1_sID2 group by sID1 having count(*) = 1)
-- get rid of the double dips
delete #temp3_sID1_sID2
where #temp3_sID1_sID2.sID1 in
(select distinct s1del.sID1 -- these are the double dips
from #temp3_sID1_sID2 as s1base with (nolock)
join #temp3_sID1_sID2 as s1del with (nolock)
on s1del.sID1 > s1base.sID1
and s1Del.sID1 = s1base.sID2)
insert into #temp3_sID1_sID2_keep
select #temp3_sID1_sID2.sID1
, #temp3_sID1_sID2.sID2
from #temp3_sID1_sID2 with (nolock)
order by #temp3_sID1_sID2.sID1, #temp3_sID1_sID2.sID2
fetch next from cur into #count
end
close cur
deallocate cur
select *
FROM #temp3_sID1_sID2_keep with (nolock)
order by 1,2
So, as I see, the task is to find equal subsets.
First we can find pairs of equal subsets:
;with tmp1 as (select sID, cnt = count(wordID) from [Table] group by sID)
select s1.sID, s2.sID
from tmp1 s1
cross join tmp1 s2
cross apply (
select count(1)
from [Table] d1
join [Table] d2 on d2.wordID = d1.wordID
where d1.sID = s1.sID and d2.sID = s2.sID
) c(cnt)
where s1.cnt = s2.cnt
and s1.sID > s2.sID
and s1.cnt = c.cnt
Output is:
sID sID
----------- -----------
6 2
7 5
8 5
8 7
And then pairs can be combined into groups, if necessary:
sID gNum
----------- -----------
2 1
6 1
5 2
7 2
8 2
See details in SqlFiddle sample below.
SqlFiddle Sample
The other approach is to calculate hash function for every subset data:
;with a as (
select distinct sID from [Table]
)
select sID,
hashbytes('sha1', (
select cast(wordID as varchar(10)) + '|'
from [Table]
where sID = a.sID
order by wordID
for xml path('')))
from a
Then subsets can be grouped based on hash value.
SqlFiddle Sample
The last one took less than a minute on my machine for a test data of about 10 million rows (20k sID values up to 1k wordID each). Also you can optimize it by excluding sIDs having no wordID count matches to any other.

Using CTE instead of Cursor

I have the following table structure.
I just want to update SubId to all the rows where it is null and where the RawLineNumber is ascending by 1 and also the SeqNumber ascending by 1.
RawlineNumber Claimid SubId SeqNumber
1 6000 A100 1
2 6000 NULL 2
3 6000 NULL 3
10 6000 A200 1
11 6000 NULL 2
25 6000 A300 1
26 6000 NULL 2
27 6000 NULL 3
I want to update
SubId of RawLineNumber 2 and 3 with A100,
SubId of RawLineNumber 11 with A200,
SubId of RawLineNumber 26 and 27 with A300.
I have a cursor which does the job but can I have a CTE to take care of it ?
UPDATE m
SET subid = q.subid
FROM mytable m
CROSS APPLY
(
SELECT TOP 1 subid
FROM mytable mi
WHERE mi.rawLineNumber < m.rawLineNumber
AND mi.subid IS NOT NULL
ORDER BY
rawLineNumber DESC
) q
WHERE m.subid IS NULL
Since a recusive solution was requested, I decided to write one. Also it works for gaps in Seqnumbers and RawlineNumber
declare #t table (RawlineNumber int, Claimid int, SubId varchar(5), SeqNumber int)
insert #t values(1, 6000, 'A100', 1)
insert #t values(2, 6000, NULL, 2)
insert #t values(3, 6000, NULL, 3)
insert #t values(10, 6000, 'A200', 1)
insert #t values(11, 6000, NULL, 2)
insert #t values(25, 6000, 'A300', 1)
insert #t values(26, 6000, NULL, 2)
insert #t values(27, 6000, NULL, 3)
;with cte as
(
select Rawlinenumber, SeqNumber, SubId
from #t where SubId is not null and SeqNumber = 1
union all
select t.Rawlinenumber, t.SeqNumber, c.SubId
from cte c
join
#t t
on c.Rawlinenumber + 1 = t.Rawlinenumber
and c.SeqNumber + 1 = t.SeqNumber
where t.SubId is null and t.SeqNumber > 1
)
update t
set SubId = c.SubId
from #t t join cte c
on c.Rawlinenumber = t.Rawlinenumber
where t.SeqNumber > 1
select * from #t
A not-so simple SQL script should achieve what you want:
update my_table t1 set t1.subid =
(select t2.subid from my_table t2
where t2.rawlinenumber < t1.rawlinenumber
and t2.seqnumber = 1
and t2.rawlinenumber = (
select max(t3.rawlinenumber)
from my_table t3
where t3.seq_number = 1
and t3.rawlinenumber <= t2.rawlinenumber)
where t1.subid is null;
The inner subselect (T3) gives us the last row having seqnumber = 1 before the current line,
the outer subselect gives us the SubID for this row (using windowing functions would be more efficient, but since you didn't mention a specific RDBMS, I stick with this :-) )