Optimize SQL statement for this Query? - firebird

I have two tables T1 and T2.
T1 have ID,F1,F2,F3,F4,F5,F6,F7,F8
T2 have ID,T1_ID,F1,F2,F3,F4,F5,F6,F7,F8,SUM
Examples Data for T1 and T2
T1
ID,F1,F2,F3,F4,F5,F6,F7,F8
1, 1, 2, 3, 0, 0, 5, 0, 0
2, 0, 0, 0, 1, 0, 4, 5, 0
3, 4, 1, 3, 2, 0, 0, 0, 5
4, 1 ,3, 4, 0, 0 ,0, 0, 0
5, 7, 2, 1, 3, 0, 0, 0, 0
.
.
.
T2
ID,T1_ID,F1,F2,F3,F4,F5,F6,F7,F8,SUM
1, 1, 2, 3, 5, 0, 0, 3, 0, 0,100
2, 5, 9, 8, 8, 1, 0, 0, 0, 0,200
3, 2, 0, 0, 0, 5, 0, 6, 6, 0,300
4, 1 ,3, 4, 2, 0 ,0, 3, 0, 0,255
5, 4, 8, 8, 8, 0, 0, 0, 0, 0,155
.
.
Select * from T2 where T1.F1....T1.F8 have (1 and 2 and 3)
query must return records 1,2,4
1, 1, 2, 3, 5, 0, 0, 3, 0, 0,100
2, 5, 9, 8, 8, 1, 0, 0, 0, 0,200
4, 1 ,3, 4, 2, 0 ,0, 3, 0, 0,255
I create this query
Select T2.ID,T2.F1,T2.F2,T2.F3,T2.F4.T2.F5,T2.F6,T2.F7,T2.F8,T2.SUM,T1.ID
from T2
join T1 on T1.ID = T2.T1_ID
where
(CASE WHEN ( T1_ID.F1 = 1 ) THEN T2.F1 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 2 ) THEN T2.F1 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 3 ) THEN T2.F1 between 0 and 1000 end)
or
(CASE WHEN ( T1_ID.F1 = 1 ) THEN T2.F1 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F2 = 2 ) THEN T2.F2 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 3 ) THEN T2.F1 between 0 and 1000 end)
or
(CASE WHEN ( T1_ID.F1 = 1 ) THEN T2.F1 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F3 = 2 ) THEN T2.F3 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 3 ) THEN T2.F1 between 0 and 1000 end)
or
(CASE WHEN ( T1_ID.F1 = 1 ) THEN T2.F1 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F4 = 2 ) THEN T2.F4 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 3 ) THEN T2.F1 between 0 and 1000 end)
.
.
.
or
(CASE WHEN ( T1_ID.F2 = 1 ) THEN T2.F2 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 2 ) THEN T2.F1 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 3 ) THEN T2.F1 between 0 and 1000 end)
or
(CASE WHEN ( T1_ID.F2 = 1 ) THEN T2.F2 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F2 = 2 ) THEN T2.F2 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 3 ) THEN T2.F1 between 0 and 1000 end)
or
(CASE WHEN ( T1_ID.F2 = 1 ) THEN T2.F2 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F3 = 2 ) THEN T2.F3 between 0 and 1000 end)
and(CASE WHEN ( T1_ID.F1 = 3 ) THEN T2.F1 between 0 and 1000 end)
.
.
.
this is too big statement.
How can I optimize statement ?

SELECT * FROM T2
WHERE EXISTS ( SELECT N.ID
FROM( SELECT T1.ID , T1.F1 AS F
FROM T1
UNION ALL
SELECT T1.ID , T1.F2 AS F
FROM T1
UNION ALL
SELECT T1.ID , T1.F3 AS F
FROM T1
UNION ALL
SELECT T1.ID , T1.F4 AS F
FROM T1
UNION ALL
SELECT T1.ID , T1.F5 AS F
FROM T1
UNION ALL
SELECT T1.ID , T1.F6 AS F
FROM T1
UNION ALL
SELECT T1.ID , T1.F7 AS F
FROM T1
UNION ALL
SELECT T1.ID , T1.F8 AS F
FROM T1
UNION ALL
) N
WHERE N.F IN ( 1, 2, 3 )
AND N.ID = T2.T1_ID
GROUP BY N.ID
HAVING COUNT(DISTINCT N.F) = 3
);

Related

Divide by 0 Error Inside Windows Function

I am trying to calculate the % of closed claims divided by total claims per month using Windows functions. But some months will have zero total claims which results in the Divide by zero error encountered message.
I have tried writing a CASE statement to handle where the total claims equal zero to be set the percentage to zero but I have not had any luck so far.
Below is a screenshot of what I am expecting:
And the TSQL below are my attempts at error handling the divide by zero message: I've added a NULLIF on the bottom of the divider and an attempt at a CASE statement to set the value = 0 when the bottom divider is zero - both result in the same error. Any suggestions on how to get around this error?
CREATE TABLE #ClaimCounts
(
Year INT,
ClaimStatus VARCHAR (50),
LossMonth DATE,
ClaimMonth DATE,
ClaimCount INT
);
INSERT INTO #ClaimCounts
(
Year,
ClaimStatus,
LossMonth,
ClaimMonth,
ClaimCount
)
VALUES
(2008, 'Closed', '20080630', '20080131', 0),
(2008, 'Total', '20080630', '20080131', 0),
(2008, 'Closed', '20080630', '20080229', 0),
(2008, 'Total', '20080630', '20080229', 0),
(2008, 'Closed', '20080630', '20080331', 0),
(2008, 'Total', '20080630', '20080331', 0),
(2008, 'Closed', '20080630', '20080430', 0),
(2008, 'Total', '20080630', '20080430', 0),
(2008, 'Closed', '20080630', '20080531', 0),
(2008, 'Total', '20080630', '20080531', 0),
(2008, 'Closed', '20080630', '20080630', 0),
(2008, 'Total', '20080630', '20080630', 6),
(2008, 'Closed', '20080630', '20090731', 2),
(2008, 'Total', '20080630', '20090731', 5),
(2008, 'Closed', '20080630', '20080831', 1),
(2008, 'Total', '20080630', '20080831', 1),
(2008, 'Closed', '200806308', '20080930', 3),
(2008, 'Total', '20080630', '20080930', 3),
(2008, 'Closed', '20080630', '20081031', 2),
(2008, 'Total', '20080630', '20081031', 3),
(2008, 'Closed', '200806308', '20081130', 0),
(2008, 'Total', '20080630', '20081130', 0);
SELECT Year,
ClaimStatus,
LossMonth,
ClaimMonth,
ClaimCount,
SUM ( CASE WHEN ClaimStatus = 'Closed' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth) /
SUM ( CASE WHEN ClaimStatus = 'Total' THEN ClaimCount * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth) AS PercentageClosedClaims1 ,
SUM ( CASE WHEN ClaimStatus = 'Closed' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth) /
SUM ( CASE WHEN ClaimStatus = 'Total' THEN NULLIF(ClaimCount, 0) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth) AS PercentageClosedClaims2 ,
CASE WHEN ClaimStatus = 'Total' AND ClaimCount = 0 THEN 0
ELSE SUM ( CASE WHEN ClaimStatus = 'Closed' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth) /
SUM ( CASE WHEN ClaimStatus = 'Total' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth)
END AS PercentageClosedClaims3
FROM #ClaimCounts;
DROP TABLE IF EXISTS #ClaimCounts;
Here is a working option. The IsNull() is optional. I personally don't mind NULL values.
SELECT Year
,ClaimStatus
,LossMonth
,ClaimMonth
,ClaimCount
,PercentageClosedClaims1 = IsNull(SUM ( CASE WHEN ClaimStatus = 'Closed' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth)
/ nullif(SUM ( CASE WHEN ClaimStatus = 'Total' THEN ClaimCount * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth),0)
,0)
,PercentageClosedClaims2 = IsNull(SUM ( CASE WHEN ClaimStatus = 'Closed' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth)
/ nullif(SUM ( CASE WHEN ClaimStatus = 'Total' THEN NULLIF(ClaimCount, 0) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth),0)
,0)
,PercentageClosedClaims3 = IsNull(SUM ( CASE WHEN ClaimStatus = 'Closed' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth)
/ nullif(SUM ( CASE WHEN ClaimStatus = 'Total' THEN ISNULL ( ClaimCount, 0 ) * 1.0 ELSE 0 END ) OVER (PARTITION BY Year, ClaimMonth),0)
,0)
FROM #ClaimCounts;

Tree structure in tsql - How get data by the special class in branches

I have structure like that(as example):
ID ClassId Name Parent
--------------------------------------
1 12 Boss
2 13 Manager1 1
3 13 Manager2 1
4 13 Manager3 1
5 14 SubManager1 3
6 15 UnderSubManager1 5
7 16 Worker1 2
8 16 Worker2 6
9 14 SubManager2 4
10 16 Worker3 9
Than, we have this:
Boss->Manager1->Worker1
Boss->Manager2->SubManager1->UnderSubManager1->Worker2
Boss->Manager3->SubManager2->Worker3
I need query, that give me a this reult:
Boss->Manager1->worker1
Boss->Manager2->worker2
Boss->Manager3->worker3
I try do this witch CTE using ClassId but with poor result :(
Assuming you want to show the 2 top levels (Boss, and ManagerX), and then the lowest level (WorkerX) -
create table #tmp (ID int, ClassID int, Name varchar(32), Parent int)
go
insert into #tmp (ID, ClassID, Name, Parent)
values
(1, 12, 'Boss', null)
, (2, 13, 'Manager1', 1)
, (3, 13, 'Manager2', 1)
, (4, 13, 'Manager3', 1)
, (5, 14, 'SubManager1', 2)
, (6, 15, 'UnderSubManager1', 5)
, (7, 16, 'Worker1', 2)
, (8, 16, 'Worker2', 6)
, (9, 14, 'SubManager2', 4)
, (10, 16, 'Worker3', 9)
go
with cte as (
select t.ID, t.ClassID, t.Name, t.Parent
, Path = cast(case when t.ClassID in (12, 13) then t.Name else '' end as varchar(max))
, NestLevel = 0
, IsWorker = case t.ClassID when 16 then 1 else 0 end
from #tmp t
where t.Parent is null
union all
select t.ID, t.ClassID, t.Name, t.Parent
, Path = cte.Path + cast(case when t.ClassID in (12, 13, 16) then '->' + t.Name else '' end as varchar(max))
, NestLevel = cte.NestLevel + 1
, IsWorker = case t.ClassID when 16 then 1 else 0 end
from #tmp t
inner join cte on t.Parent = cte.ID
)
select cte.Path
from cte
where cte.IsWorker = 1
order by cte.Path
drop table #tmp
go
The result:
Boss->Manager1->Worker1
Boss->Manager1->Worker2
Boss->Manager3->Worker3

Improve performance on CTE with sub-queries

I have a table with this structure:
WorkerID Value GroupID Sequence Validity
1 '20%' 1 1 2018-01-01
1 '10%' 1 1 2017-06-01
1 'Yes' 1 2 2017-06-01
1 '2018-01-01' 2 1 2017-06-01
1 '17.2' 2 2 2017-06-01
2 '10%' 1 1 2017-06-01
2 'No' 1 2 2017-06-01
2 '2016-03-01' 2 1 2017-06-01
2 '15.9' 2 2 2017-06-01
This structure was created so that the client can create customized data for a worker. For example Group 1 can be something like "Salary" and Sequence is one value that belongs to that Group like "Overtime Compensation". The column Value is a VARCHAR(150) field and the correct validation and conversation is done in another part of the application.
The Validity column exist mainly for historical reasons.
Now I would like to show, for the different workers, the information in a grid where each row should be one worker (displaying the one with the most recent Validity):
Worker 1_1 1_2 2_1 2_2
1 20% Yes 2018-01-01 17.2
2 10% No 2016-03-01 15.9
To accomplish this I created a CTE that looks like this:
WITH CTE_worker_grid
AS
(
SELECT
worker,
/* 1 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 1_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 1_2,
/* 2 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 2_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 2_2
)
GO
This produces the correct result but it's very slow as it creates this grid for over 18'000 worker with almost 30 Groups and up to 20 Sequences in each Group.
How could one speed up the process of a CTE of this magnitude? Should CTE even be used? Can the sub-queries be changed or re-factored out to speed up the execution?
Use a PIVOT!
+----------+---------+---------+------------+---------+
| WorkerId | 001_001 | 001_002 | 002_001 | 002_002 |
+----------+---------+---------+------------+---------+
| 1 | 20% | Yes | 2018-01-01 | 17.2 |
| 2 | 10% | No | 2016-03-01 | 15.9 |
+----------+---------+---------+------------+---------+
SQL Fiddle: http://sqlfiddle.com/#!18/6e768/1
CREATE TABLE WorkerAttributes
(
WorkerID INT NOT NULL
, [Value] VARCHAR(50) NOT NULL
, GroupID INT NOT NULL
, [Sequence] INT NOT NULL
, Validity DATE NOT NULL
)
INSERT INTO WorkerAttributes
(WorkerID, Value, GroupID, Sequence, Validity)
VALUES
(1, '20%', 1, 1, '2018-01-01')
, (1, '10%', 1, 1, '2017-06-01')
, (1, 'Yes', 1, 2, '2017-06-01')
, (1, '2018-01-01', 2, 1, '2017-06-01')
, (1, '17.2', 2, 2, '2017-06-01')
, (2, '10%', 1, 1, '2017-06-01')
, (2, 'No', 1, 2, '2017-06-01')
, (2, '2016-03-01', 2, 1, '2017-06-01')
, (2, '15.9', 2, 2, '2017-06-01')
;WITH CTE_WA_RANK
AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY WorkerID, GroupID, [Sequence] ORDER BY Validity DESC) AS VersionNumber
, WA.WorkerID
, WA.GroupID
, WA.[Sequence]
, WA.[Value]
FROM
WorkerAttributes AS WA
),
CTE_WA
AS
(
SELECT
WA_RANK.WorkerID
, RIGHT('000' + CAST(WA_RANK.GroupID AS VARCHAR(3)), 3)
+ '_'
+ RIGHT('000' + CAST(WA_RANK.[Sequence] AS VARCHAR(3)), 3) AS SMART_KEY
, WA_RANK.[Value]
FROM
CTE_WA_RANK AS WA_RANK
WHERE
WA_RANK.VersionNumber = 1
)
SELECT
WorkerId
, [001_001] AS [001_001]
, [001_002] AS [001_002]
, [002_001] AS [002_001]
, [002_002] AS [002_002]
FROM
(
SELECT
CTE_WA.WorkerId
, CTE_WA.SMART_KEY
, CTE_WA.[Value]
FROM
CTE_WA
) AS WA
PIVOT
(
MAX([Value])
FOR
SMART_KEY IN
(
[001_001]
, [001_002]
, [002_001]
, [002_002]
)
) AS PVT

sql window function to detect change in column values

I'd like to detect changes in column values in this (example) db
WITH events(id, row,event) AS (
VALUES
(1,1, 0 )
,(1,2, 0 )
,(1,3, 1 )
,(1,4, 0 )
,(1,5, 1 )
,(2,1, 0 )
,(2,2, 1 )
,(3,1, 0 )
,(3,2, 0 )
)
select * from events
What I am looking for is code for a new column 'code' which switches to 1 AFTER
de event column shows a 1. Within the same id the code stays 1.
For this example this new column wil look like this
WITH events2(id, row,event, code) AS (
VALUES
(1,1, 0, 0 )
,(1,2, 0, 0 )
,(1,3, 1, 0 )
,(1,4, 0, 1 ) -- notice the switch here
,(1,5, 1, 1 ) --
,(2,1, 0, 0 )
,(2,2, 1, 0 )
,(3,1, 0, 0 )
,(3,2, 0, 0 )
)
select * from events2
I have a hunch that the answer will be related to the answer on this question : PostgreSQL window function: partition by comparison
Somehow I cannot figure this out myself..
Peter
COALESCE over a scalar subquery:
WITH events(id, zrow, zevent) AS (
VALUES
(1,1, 0 ) ,(1,2, 0 ) ,(1,3, 1 ) ,(1,4, 0 ) ,(1,5, 1 )
,(2,1, 0 ) ,(2,2, 1 )
,(3,1, 0 ) ,(3,2, 0 )
)
SELECT id, zrow, zevent
, COALESCE((SELECT 1 FROM events ex WHERE ex.id = ev.id AND ex.zrow < ev.zrow AND ex.zevent> 0),0) AS oevent
FROM events ev
;
Or, avoid the COALESCE() by typecasting the boolean EXISTS() to INTEGER:
WITH events(id, zrow,event) AS (
VALUES
(1,1, 0 ) ,(1,2, 0 ) ,(1,3, 1 ) ,(1,4, 0 ) ,(1,5, 1 )
,(2,1, 0 ) ,(2,2, 1 )
,(3,1, 0 ) ,(3,2, 0 )
)
SELECT id, zrow, event
, EXISTS(SELECT 1 FROM events ex WHERE ex.id = ev.id AND ex.zrow < ev.zrow AND ex.event> 0)::integer AS oevent
FROM events ev
;
Find the MAX() value over the previous records within the same group (frame):
WITH events(id, zrow,event) AS (
VALUES
(1,1, 0 ) ,(1,2, 0 ) ,(1,3, 1 ) ,(1,4, 0 ) ,(1,5, 1 )
,(2,1, 0 ) ,(2,2, 1 )
,(3,1, 0 ) ,(3,2, 0 )
)
, drag AS (
SELECT id, zrow, event, MAX(event)
OVER (PARTITION BY id
ORDER BY zrow
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS lagged
FROM events ev
)
SELECT id, zrow, event
, COALESCE(lagged,0) AS oevent
FROM drag dr
;
The same without the extra CTE:
WITH events(id, zrow,event) AS (
VALUES
(1,1, 0 ) ,(1,2, 0 ) ,(1,3, 1 ) ,(1,4, 0 ) ,(1,5, 1 )
,(2,1, 0 ) ,(2,2, 1 )
,(3,1, 0 ) ,(3,2, 0 )
)
SELECT id, zrow, event, COALESCE(MAX(event) OVER (PARTITION BY id
ORDER BY zrow
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
),0) AS lagged
FROM events ev
;
Another way to perform the self-join would be to use a recursive query.

How to average 3 values in Sql Server?

I have three variables :-
#ScoreA DECIMAL(10,7)
#ScoreB DECIMAL(10,7)
#ScoreC DECIMAL(10,7)
#FinalScore DECIMAL(10, 7)
I wish to get the average of the three scores. BUT 1, 2 or all 3 values might be zero.
Eg. scenarios:
A = 1.4, B=3.5, C=5.0; FinalScore = 3.3
A = 0.0, B=0.0, C=0.0; FinalScore = 0.0
A = 1.1, B=0.0, C=0.0; FinalScore = 1.1
A = 0.0, B=2.0, C=4.8; FinalScore = 3.4
Cheers!
IF #A > 0 OR #B > 0 OR #C > 0
SELECT ((#A + #B + #C) /
(0 +
CASE WHEN #A = 0 THEN 0 ELSE 1 END +
CASE WHEN #B = 0 THEN 0 ELSE 1 END +
CASE WHEN #C = 0 THEN 0 ELSE 1 END ))
ELSE
SELECT 0.0
EDIT
Modified query to now handle divide by zero scenario's.
EDIT2
Here is "the trick with the AVG(..) function" :) with Common Table Expression
WITH T(I) AS (SELECT #A UNION SELECT #B UNION SELECT #C)
SELECT AVG(I) FROM T
WHERE I > 0
SELECT ((#A + #B + #C) /
(CASE WHEN (#A = 0.0 AND #B = 0.0 AND #C = 0.0) THEN 1 ELSE 0 END
+ CASE WHEN #A = 0 THEN 0 ELSE 1 END
+ CASE WHEN #B = 0 THEN 0 ELSE 1 END
+ CASE WHEN #C = 0 THEN 0 ELSE 1 END
)
)
For me this is easier to read and understand:
DECLARE
#ScoreA DECIMAL(10,7),
#ScoreB DECIMAL(10,7),
#ScoreC DECIMAL(10,7),
#FinalScore DECIMAL(10, 7)
SET #ScoreA = 1.4
SET #ScoreB = 3.5
SET #ScoreC = 5.0
DECLARE
#AVG TABLE (value DECIMAL(10,7))
INSERT INTO #AVG
SELECT #ScoreA WHERE #ScoreA > 0
UNION
SELECT #ScoreB WHERE #ScoreB > 0
UNION
SELECT #ScoreC WHERE #ScoreC > 0
SELECT COALESCE(AVG(value), 0) FROM #AVG