Clustered Index Cost is high using Sql Query - tsql

I have given a task to optimize the below sql query. It is currently performing badly. I ran the execution plan for it, and i see it is doing Clustered Index Scan, and it's cost is very high. How can i reduce the cost or it is possible to change it to use Index seek instead?
SELECT TOP 20 CustomerPrimaryExtID,
Max(POSTimeStamp) AS TransactionDate,
ExtLocationCode,
0 AS RedemptionAmount,
0 AS RedemptionCount,
TerminalNum,
LogixTransNum,
POSTransNum AS TransNum,
0 AS DetailRecords,
CustomerTypeID,
PresentedCustomerID,
PresentedCardTypeID,
HHID,
Replayed,
0 AS TransContext,
isnull(TransTotal, 0) AS TransTotal
FROM TransHist AS TH WITH(nolock)
WHERE ( ( ( CustomerPrimaryExtID IN ( '' )
AND HHID IS NULL )
OR HHID = '0000000250000013408'
AND CustomerTypeID <> 1 )
OR ( CustomerPrimaryExtID = '0000000250000013408'
AND CustomerTypeID = 1 ) )
AND NOT EXISTS (SELECT LogixTransNum
FROM TransRedemption AS TR2 with(nolock)
WHERE ( ( ( CustomerPrimaryExtID IN ( '' )
AND HHID IS NULL )
OR HHID = '0000000250000013408'
AND CustomerTypeID <> 1 )
OR ( CustomerPrimaryExtID = '0000000250000013408'
AND CustomerTypeID = 1 ) )
AND TH.LogixTransNum = TR2.LogixTransNum)
GROUP BY CustomerPrimaryExtID,
HHID,
CustomerTypeID,
PresentedCustomerID,
PresentedCardTypeID,
LogixTransNum,
POSTransNum,
TerminalNum,
ExtLocationCode,
Replayed,
TransTotal
ORDER BY TransactionDate DESC

One thing to note: CustomerTypeID <> 1 will not use indexes as effectively as CustomerTypeID > 1, see the below test example. Also, WHERE NOT EXISTS or NOT IN are also performance killers.
CREATE TABLE NumberCrazy(
IntegersAreCool INT
)
-- Run this 20+ times
INSERT INTO NumberCrazy
VALUES (1)
-- Run this twice
DECLARE #begin INT = 1, #max INT = 2000, #r INT
WHILE #begin <= #max
BEGIN
SET #r = (RAND()*(#begin))
INSERT INTO NumberCrazy
SELECT #r
SET #begin = #begin + 1
END
CREATE CLUSTERED INDEX [IX_DaInts] ON [dbo].[NumberCrazy]
(
[IntegersAreCool] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
SET STATISTICS IO ON
SELECT *
FROM NumberCrazy
WHERE IntegersAreCool > 1
SELECT *
FROM NumberCrazy
WHERE IntegersAreCool <> 1
Results:
Table 'NumberCrazy'. Scan count 1, logical reads 12, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'NumberCrazy'. Scan count 2, logical reads 15, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
NOTE THE EXECUTION PLAN (Looks like the mess you have above this).

Related

Why using same field when filtering cause different execution time? (different index usage)

When I run query and filter by agreement_id it is slow,
but when I filter by an alias id it is fast. (Look at the end of the query)
Why using same field when filtering cause different execution time?
Links to explain analyze:
slow1, slow2
fast1, fast2
Difference start at #20: Where different indexes are used:
Index Cond: (o.sys_period #> sys_time()) VS Index Cond: (o.agreement_id = 38)
PS. It would be nice if I can contact to developer of this feature (I have one more similar problem)
UPD I did some experiments. when I remove window functions from my query it works fast in any case. So why window function stop index usage in some cases? How to escape/workaround that?
dbfiddle with minimal test case
Server version is v13.1
Full query:
WITH gconf AS
-- https://www.postgresql.org/docs/current/queries-with.html#QUERIES-WITH-SELECT
NOT MATERIALIZED -- force it to be merged into the parent query
-- it gives a net savings because each usage of the WITH query needs only a small part of the WITH query's full output.
( SELECT
ocd.*,
tstzrange( '2021-05-01', '2021-05-01', '[]') AS acc_period,
(o).agreement_id AS id, -- Required to passthrough WINDOW FUNCTION
(o).id AS order_id,
(ic).consumed_period AS consumed_period,
dense_rank() OVER ( PARTITION BY (o).agreement_id, (o).id ORDER BY (ic).consumed_period ) AS nconf,
row_number() OVER ( wconf ORDER BY (c).sort_order NULLS LAST ) AS nitem,
(sum( ocd.item_cost ) OVER wconf)::numeric( 10, 2) AS conf_cost,
max((ocd.ic).consumed) OVER wconf AS consumed,
CASE WHEN true
THEN (sum( ocd.item_suma ) OVER wconf)::numeric( 10, 2 )
ELSE (sum( ocd.item_cost ) OVER wconf)::numeric( 10, 2 )
END AS conf_suma
FROM order_cost_details( tstzrange( '2021-05-01', '2021-05-01', '[]') ) ocd
WHERE true OR (ocd.ic).consumed_period #> lower( tstzrange( '2021-05-01', '2021-05-01', '[]') )
WINDOW wconf AS ( PARTITION BY (o).agreement_id, (o).id, (ic).consumed_period )
),
gorder AS (
SELECT *,
(conf_suma/6)::numeric( 10, 2 ) as conf_nds,
sum( conf_suma ) FILTER (WHERE nitem = 1) OVER worder AS order_suma
FROM gconf
WINDOW worder AS ( PARTITION BY gconf.id, (o).id )
-- TODO: Ask PG developers: Why changing to (o).agreement_id slows down query?
-- WINDOW worder AS ( PARTITION BY (o).agreement_id, (o).id )
)
SELECT
u.id, consumed_period, nconf, nitem,
(c).id as item_id,
COALESCE( (c).sort_order, pd.sort_order ) as item_order,
COALESCE( st.display, st.name, rt.display, rt.name ) as item_name,
COALESCE( item_qty, (c).amount/rt.unit ) as item_qty,
COALESCE( (p).label, rt.label ) as measure,
item_price, item_cost, item_suma,
conf_cost, consumed, conf_suma, conf_nds, order_suma,
(order_suma/6)::numeric( 10, 2 ) as order_nds,
sum( conf_suma ) FILTER (WHERE nitem = 1 ) OVER wagreement AS total_suma,
sum( (order_suma/6)::numeric( 10, 2 ) ) FILTER (WHERE nitem = 1 AND nconf = 1) OVER wagreement AS total_nds,
pkg.id as package_id,
pkg.link_1c_id as package_1c_id,
COALESCE( pkg.display, pkg.name ) as package,
acc_period
FROM gorder u
LEFT JOIN resource_type rt ON rt.id = (c).resource_type_id
LEFT JOIN service_type st ON st.id = (c).service_type_id
LEFT JOIN package pkg ON pkg.id = (o).package_id
LEFT JOIN package_detail pd ON pd.package_id = (o).package_id
AND pd.resource_type_id IS NOT DISTINCT FROM (c).resource_type_id
AND pd.service_type_id IS NOT DISTINCT FROM (c).service_type_id
-- WHERE (o).agreement_id = 38 -- slow
WHERE u.id = 38 -- fast
WINDOW wagreement AS ( PARTITION BY (o).agreement_id )
As problem workaround we can additionally SELECT an alias for column used at PARTITION BY expression. Then PG apply optimization and use index.
The answer to the question could be: PG does not apply optimization if composite type is used. Notice as it works:
PARTITION | FILTER | IS USED?
------------------------------
ALIAS | ORIG | NO
ALIAS | ALIAS | YES
ORIG | ALIAS | NO
ORIG | ORIG | NO
See this dbfiddle
create table agreement ( ag_id int, name text, cost numeric(10,2) );
create index ag_idx on agreement (ag_id);
insert into agreement (ag_id, name, cost) values ( 1, '333', 22 ),
(1,'333', 33), (1, '333', 7), (2, '555', 18 ), (2, '555', 2), (3, '777', 4);
select * from agreement;
create function initial ()
returns table( agreement_id int, ag agreement ) language sql stable AS $$
select ag_id, t from agreement t;
$$;
select * from initial() t;
explain( analyze, costs, buffers, verbose ) with totals_by_ag as (
select
*,
sum( (t.ag).cost ) over ( partition by agreement_id ) as total
from initial() t
)
select * from totals_by_ag t
where (t.ag).ag_id = 1; -- index is NOT USED
explain( analyze, costs, buffers, verbose ) with totals_by_ag as (
select
*,
sum( (t.ag).cost ) over ( partition by agreement_id ) as total
from initial() t
)
select * from totals_by_ag t
where agreement_id = 1; -- index is used when alias for column is used
explain( analyze, costs, buffers, verbose ) with totals_by_ag as (
select
*,
sum( (t.ag).cost ) over ( partition by (t.ag).ag_id ) as total --renamed
from initial() t
)
select * from totals_by_ag t
where agreement_id = 1; -- index is NOT USED because grouping by original column
explain( analyze, costs, buffers, verbose ) with totals_by_ag as (
select
*,
sum( (t.ag).cost ) over ( partition by (t.ag).ag_id ) as total --renamed
from initial() t
)
select * from totals_by_ag t
where (t.ag).ag_id = 1; -- index is NOT USED even if at both cases original column

PostgreSQL - CTE slowing the query

I have a concern regarding the use of multiple WITH clauses in a query because
in some condition, it is slowing down the performance of the query like the below example,
So first WITH clause taking the 0.345 sec to fetch the 98948 records and second WITH clause taking the 13 sec to fetch the 68199 records even its less record as compare to first one so the only difference is that we have used the aggregate function in the second WITH clause to calculate the sum of charges.
Can anybody please help us to understand why the second query taking too much time.
1.This clause taking the 0.318 sec to fetch the 98948 record,
WITH delinquency_lease_details AS (
SELECT
dp.cid,
dp.id AS delinquency_policy_id,
p.id,
dp.threshold_amount,
dp.small_balance_amount,
dp.delinquency_threshold_type_id,
cl.id,
cl.primary_customer_id AS customer_id,
cl.lease_status_type_id,
cl.occupancy_type_id,
COALESCE( cl.building_name || ' - ' || cl.unit_number_cache, cl.building_name, cl.unit_number_cache ) AS unit_number, func_format_customer_name ( cl.name_first, cl.name_last, cl.company_name ) customer_name, cl.property_name, TZ.time_zone_name AS property_timezone
FROM
cached_leases cl
JOIN lease_details ld ON ( ld.cid = cl.cid AND ld.lease_id = cl.id )
JOIN delinquency_policies dp ON ( dp.cid = ld.cid AND ld.delinquency_policy_id = dp.id )
JOIN properties p ON ( p.cid = lp.cid AND p.id = lp.property_id )
JOIN time_zones TZ ON ( TZ.id = p.time_zone_id )
WHERE
cl.cid = 1111
AND cl.lease_status_type_id IN ( 4, 5 )
AND cl.occupancy_type_id IN ( 1, 2, 3, 4, 6, 9, 10, 11 )
AND cl.termination_list_type_id IS NULL
)
SELECT * FROM delinquency_lease_details;
2. This clause taking the 13 sec to fetch the 68199 records and if I just run the query without WITH clause then it is taking 0.564 seconds,
WITH delinquent_balance AS (
SELECT
dld.cid,
dld.id,
min( c.post_date ) AS min_post_date,
sum( c.transaction_amount_due ) AS delinquent_amount
FROM
cached_leases dld
JOIN charges at ON ( at.cid = dld.cid AND at.lease_id = dld.id AND c.is_temporary = FALSE AND c.is_deleted = FALSE )
JOIN charge_codes cc ON ( c.ar_code_id = cc.id AND c.cid = cc.cid AND cc.ledger_filter_id = 27 )
WHERE
dld.cid = 1111
AND ( ( c.transaction_amount_due > 0 AND c.post_date < CURRENT_DATE ) OR c.transaction_amount_due < 0 )
AND NOT EXISTS (
select
1
from
repayment_charges
WHERE
cid = c.cid
AND property_id = c.property_id
AND charge_id = c.id
AND is_active = true
)
GROUP BY
dld.cid,
dld.id
) select * from delinquent_balance;
As per this link, the WITH clause is the optimization barrier for Postgres database so it's really a cause then what we should use in place of WITH clause for complex queries because I have used the 10 WITH clauses in query and it is slowing down the performance of the query but out of that I have given two clauses only to get the some conclusion because the second clause taking more time as compared to another one.

Avoid Sort operator in index plan of an update query which does not have an ORDER BY keyword

I have an update query:-
Update PM.Contractual_Allowances Set Provider_ID = 3 Where Tenant_ID = 1 and Carrier_ID = 203
For this above query I am getting execution plan as below :
I am trying to understand that why do I get the sort operator when I dont have an ORDER BY clause and what I can do to avoid it.
Below is the plan in text (aka 'Showplan_text'):
|--Sequence
|--Index Update(OBJECT:([Ntier_Master].[PM].[Contractual_Allowances].[IX_Contractual_Allowances_Provider_ID_Tenant_ID]), SET:([Contractual_Allowance_ID1043] = [Ntier_Master].[PM].[Contractual_Allowances].[Contractual_Allowance_ID],[Provider_ID1044] = [Ntier_Master].[PM].[Contractual_Allowances].[Provider_ID],[Tenant_ID1045] = [Ntier_Master].[PM].[Contractual_Allowances].[Tenant_ID]) WITH ORDERED PREFETCH ACTION:([Act1042]))
| |--Sort(ORDER BY:([Ntier_Master].[PM].[Contractual_Allowances].[Provider_ID] ASC, [Ntier_Master].[PM].[Contractual_Allowances].[Tenant_ID] ASC, [Ntier_Master].[PM].[Contractual_Allowances].[Contractual_Allowance_ID] ASC, [Act1042] ASC))
| |--Filter(WHERE:(NOT [Expr1038]))
| |--Table Spool
| |--Split
| |--Clustered Index Update(OBJECT:([Ntier_Master].[PM].[Contractual_Allowances].[PK_Contractual_Allowances_Contractual_Allowance_ID]), SET:([Ntier_Master].[PM].[Contractual_Allowances].[Provider_ID] = [Expr1033],[Ntier_Master].[PM].[Contractual_Allowances].[Contractual_Allowance_TS] = [Expr1003]))
| |--Compute Scalar(DEFINE:([Expr1038]=[Expr1038], [Expr1039]=[Expr1039]))
| |--Compute Scalar(DEFINE:([Expr1038]=CASE WHEN [Expr1007] THEN (1) ELSE (0) END, [Expr1039]=CASE WHEN [Expr1007] THEN (1) ELSE (0) END))
| |--Compute Scalar(DEFINE:([Expr1033]=(3)))
| |--Compute Scalar(DEFINE:([Expr1007]=CASE WHEN [Ntier_Master].[PM].[Contractual_Allowances].[Provider_ID] = (3) THEN (1) ELSE (0) END))
| |--Compute Scalar(DEFINE:([Expr1003]=gettimestamp((10))))
| |--Clustered Index Scan(OBJECT:([Ntier_Master].[PM].[Contractual_Allowances].[PK_Contractual_Allowances_Contractual_Allowance_ID]), WHERE:([Ntier_Master].[PM].[Contractual_Allowances].[Tenant_ID]=(1) AND [Ntier_Master].[PM].[Contractual_Allowances].[Carrier_ID]=(203)) ORDERED FORWARD)
|--Index Update(OBJECT:([Ntier_Master].[PM].[Contractual_Allowances].[IX_Contractual_Allowances_Carrier_ID_Location_ID_Department_ID_Tenant_ID]), SET:([Contractual_Allowance_ID1046] = [Ntier_Master].[PM].[Contractual_Allowances].[Contractual_Allowance_ID],[Modifiers1047] = [Ntier_Master].[PM].[Contractual_Allowances].[Modifiers],[Carrier_ID1048] = [Ntier_Master].[PM].[Contractual_Allowances].[Carrier_ID],[Procedure_Code_ID1049] = [Ntier_Master].[PM].[Contractual_Allowances].[Procedure_Code_ID],[Location_ID1050] = [Ntier_Master].[PM].[Contractual_Allowances].[Location_ID],[Provider_ID1051] = [Ntier_Master].[PM].[Contractual_Allowances].[Provider_ID],[Department_ID1052] = [Ntier_Master].[PM].[Contractual_Allowances].[Department_ID],[Tenant_ID1053] = [Ntier_Master].[PM].[Contractual_Allowances].[Tenant_ID]) WITH ORDERED PREFETCH ACTION:([Act1042]))
|--Sort(ORDER BY:([Ntier_Master].[PM].[Contractual_Allowances].[Carrier_ID] ASC, [Ntier_Master].[PM].[Contractual_Allowances].[Location_ID] ASC, [Ntier_Master].[PM].[Contractual_Allowances].[Department_ID] ASC, [Ntier_Master].[PM].[Contractual_Allowances].[Tenant_ID] ASC, [Ntier_Master].[PM].[Contractual_Allowances].[Contractual_Allowance_ID] ASC, [Act1042] ASC))
|--Filter(WHERE:(NOT [Expr1039]))
|--Table Spool
CREATE NONCLUSTERED INDEX IX_Contractual_Allowances_Location_ID_Tenant_ID ON PM.Contractual_Allowances ( Location_ID ASC , Tenant_ID ASC ) WITH ( PAD_INDEX = OFF ,FILLFACTOR = 100 ,SORT_IN_TEMPDB = OFF , IGNORE_DUP_KEY = OFF , STATISTICS_NORECOMPUTE = OFF , DROP_EXISTING = ON , ONLINE = OFF , ALLOW_ROW_LOCKS = ON , ALLOW_PAGE_LOCKS = ON ) ON [PRIMARY ]
CREATE NONCLUSTERED INDEX IX_Contractual_Allowances_Provider_ID_Tenant_ID ON PM.Contractual_Allowances ( Provider_ID ASC , Tenant_ID ASC ) WITH ( PAD_INDEX = OFF ,FILLFACTOR = 100 ,SORT_IN_TEMPDB = OFF , IGNORE_DUP_KEY = OFF , STATISTICS_NORECOMPUTE = OFF , DROP_EXISTING = ON , ONLINE = OFF , ALLOW_ROW_LOCKS = ON , ALLOW_PAGE_LOCKS = ON ) ON [PRIMARY ]
CREATE NONCLUSTERED INDEX IX_Contractual_Allowances_Carrier_ID_Current_Effective_Date_Tenant_ID ON PM.Contractual_Allowances ( Carrier_ID ASC , Current_Effective_Date ASC , Tenant_ID ASC ) WITH ( PAD_INDEX = OFF ,FILLFACTOR = 100 ,SORT_IN_TEMPDB = OFF , IGNORE_DUP_KEY = OFF , STATISTICS_NORECOMPUTE = OFF , DROP_EXISTING = ON , ONLINE = OFF , ALLOW_ROW_LOCKS = ON , ALLOW_PAGE_LOCKS = ON ) ON [PRIMARY ]
CREATE NONCLUSTERED INDEX IX_Contractual_Allowances_Carrier_ID_Location_ID_Department_ID_Tenant_ID ON PM.Contractual_Allowances ( Carrier_ID ASC , Location_ID ASC , Department_ID ASC , Tenant_ID ASC ) INCLUDE ( Modifiers , Procedure_Code_ID , Provider_ID ) WITH ( PAD_INDEX = OFF ,FILLFACTOR = 100 ,SORT_IN_TEMPDB = OFF , IGNORE_DUP_KEY = OFF , STATISTICS_NORECOMPUTE = OFF , DROP_EXISTING = ON , ONLINE = OFF , ALLOW_ROW_LOCKS = ON , ALLOW_PAGE_LOCKS = ON ) ON [PRIMARY ]
CREATE NONCLUSTERED INDEX IX_Contractual_Allowances_Procedure_Code_ID_Tenant_ID ON PM.Contractual_Allowances ( Procedure_Code_ID ASC , Tenant_ID ASC ) WITH ( PAD_INDEX = OFF ,FILLFACTOR = 100 ,SORT_IN_TEMPDB = OFF , IGNORE_DUP_KEY = OFF , STATISTICS_NORECOMPUTE = OFF , DROP_EXISTING = ON , ONLINE = OFF , ALLOW_ROW_LOCKS = ON , ALLOW_PAGE_LOCKS = ON ) ON [PRIMARY ]
ORDER BY clauses are not the only thing that cause a sort. GROUP BY, DISTINCT, window ranking functions (e.g. ROW_NUMBER, RANK), window aggregate functions (e.g. SUM() OVER (PARTITION BY ...), window frame functions (e.g. LAG and LEAD) are among many things that cause a sort.
When we add indexes we're pre-sorting data so that it does not need to be sorted when queried. Sorts in the exectution plan mean that (1) there was not an index available to handle the sort or (2) the was an index that could handle the sort but the optimizer chose not to use it. Consider the following sample data:
if object_id('tempdb..#sometable') is not null drop table #sometable;
create table #sometable (col1 int, col2 int);
insert #sometable values(1,10),(1,20),(2,15),(2,50),(3,10);
Next, run these queries with "Include Actual Execution Plan" turned on.
select col1, max(col2)
from #sometable
group by col1;
select distinct col1
from #sometable;
select col1, col2, avg(col2) over (partition by col1)
from #sometable;
select col1, col2, avg(col2) over (partition by col1 order by (select null))
from #sometable;
Note the execution plans:
You can get more info about what was sorted by holding the mouse over the sort operator:
Here it's it needs to sort col1 so let's add this index, run the queries and examine the execution plan:
--alter table #sometable
create clustered index uq_sometable on #sometable(col1);
Now the new execution plans:
As you can see, the sorts are all gone. There's so much more to this topic but hopefully this helps you understand how you can get sorts without an ORDER BY and how to get rid of them.

Check for equal amounts of negative numbers as positive numbers

I have a table with two columns: intGroupID, decAmount
I want to have a query that can basically return the intGroupID as a result if for every positive(+) decAmount, there is an equal and opposite negative(-) decAmount.
So a table of (id=1,amount=1.0),(1,2.0),(1,-1.0),(1,-2.0) would return back the intGroupID of 1, because for each positive number there exists a negative number to match.
What I know so far is that there must be an equal number of decAmounts (so I enforce a count(*) % 2 = 0) and the sum of all rows must = 0.0. However, some cases that get by that logic are:
ID | Amount
1 | 1.0
1 | -1.0
1 | 2.0
1 | -2.0
1 | 3.0
1 | 2.0
1 | -4.0
1 | -1.0
This has a sum of 0.0 and has an even number of rows, but there is not a 1-for-1 relationship of positives to negatives. I need a query that can basically tell me if there is a negative amount for each positive amount, without reusing any of the rows.
I tried counting the distinct absolute values of the numbers and enforcing that it is less than the count of all rows, but it's not catching everything.
The code I have so far:
DECLARE #tblTest TABLE(
intGroupID INT
,decAmount DECIMAL(19,2)
);
INSERT INTO #tblTest (intGroupID ,decAmount)
VALUES (1,-1.0),(1,1.0),(1,2.0),(1,-2.0),(1,3.0),(1,2.0),(1,-4.0),(1,-1.0);
DECLARE #intABSCount INT = 0
,#intFullCount INT = 0;
SELECT #intFullCount = COUNT(*) FROM #tblTest;
SELECT #intABSCount = COUNT(*) FROM (
SELECT DISTINCT ABS(decAmount) AS absCount FROM #tblTest GROUP BY ABS(decAmount)
) AS absCount
SELECT t1.intGroupID
FROM #tblTest AS t1
/* Make Sure Even Number Of Rows */
INNER JOIN
(SELECT COUNT(*) AS intCount FROM #tblTest
)
AS t2 ON t2.intCount % 2 = 0
/* Make Sure Sum = 0.0 */
INNER JOIN
(SELECT SUM(decAmount) AS decSum FROM #tblTest)
AS t3 ON decSum = 0.0
/* Make Sure Count of Absolute Values < Count of Values */
WHERE
#intABSCount < #intFullCount
GROUP BY t1.intGroupID
I think there is probably a better way to check this table, possibly by finding pairs and removing them from the table and seeing if there's anything left in the table once there are no more positive/negative matches, but I'd rather not have to use recursion/cursors.
Create TABLE #tblTest (
intA INT
,decA DECIMAL(19,2)
);
INSERT INTO #tblTest (intA,decA)
VALUES (1,-1.0),(1,1.0),(1,2.0),(1,-2.0),(1,3.0),(1,2.0),(1,-4.0),(1,-1.0), (5,-5.0),(5,5.0) ;
SELECT * FROM #tblTest;
SELECT
intA
, MIN(Result) as IsBalanced
FROM
(
SELECT intA, X,Result =
CASE
WHEN count(*)%2 = 0 THEN 1
ELSE 0
END
FROM
(
---- Start thinking here --- inside-out
SELECT
intA
, x =
CASE
WHEN decA < 0 THEN
-1 * decA
ELSE
decA
END
FROM #tblTest
) t1
Group by intA, X
)t2
GROUP BY intA
Not tested but I think you can get the idea
This returns the id that do not conform
The not is easier to test / debug
select pos.*, neg.*
from
( select id, amount, count(*) as ccount
from tbl
where amount > 0
group by id, amount ) pos
full outer join
( select id, amount, count(*) as ccount
from tbl
where amount < 0
group by id, amount ) neg
on pos.id = neg.id
and pos.amount = -neg.amount
and pos.ccount = neg.ccount
where pos.id is null
or neg.id is null
I think this will return a list of id that do conform
select distinct(id) from tbl
except
select distinct(isnull(pos.id, neg.id))
from
( select id, amount, count(*) as ccount
from tbl
where amount > 0
group by id, amount ) pos
full outer join
( select id, amount, count(*) as ccount
from tbl
where amount < 0
group by id, amount ) neg
on pos.id = neg.id
and pos.amount = -neg.amount
and pos.ccount = neg.ccount
where pos.id is null
or neg.id is null
Boy, I found a simpler way to do this than my previous answers. I hope all my crazy edits are saved for posterity.
This works by grouping all numbers for an id by their absolute value (1, -1 grouped by 1).
The sum of the group determines if there are an equal number of pairs. If it is 0 then it is equal, any other value for the sum means there is an imbalance.
The detection of evenness by the COUNT aggregate is only necessary to detect an even number of zeros. I assumed that 0's could exist and they should occur an even number of times. Remove it if this isn't a concern, as 0 will always pass the first test.
I rewrote the query a bunch of different ways to get the best execution plan. The final result below only has one big heap sort which was unavoidable given the lack of an index.
Query
WITH tt AS (
SELECT intGroupID,
CASE WHEN SUM(decAmount) > 0 OR COUNT(*) % 2 = 1 THEN 1 ELSE 0 END unequal
FROM #tblTest
GROUP BY intGroupID, ABS(decAmount)
)
SELECT tt.intGroupID,
CASE WHEN SUM(unequal) != 0 THEN 'not equal' ELSE 'equals' END [pair]
FROM tt
GROUP BY intGroupID;
Tested Values
(1,-1.0),(1,1.0),(1,2),(1,-2), -- should work
(2,-1.0),(2,1.0),(2,2),(2,2), -- fail, two positive twos
(3,1.0),(3,1.0),(3,-1.0), -- fail two 1's , one -1
(4,1),(4,2),(4,-.5),(4,-2.5), -- fail: adds up the same sum, but different values
(5,1),(5,-1),(5,0),(5,0), -- work, test zeros
(6,1),(6,-1),(6,0), -- fail, test zeros
(7,1),(7,-1),(7,-1),(7,1),(7,1) -- fail, 3 x 1
Results
A pairs
_ _____
1 equal
2 not equal
3 not equal
4 not equal
5 equal
6 not equal
7 not equal
The following should return "disbalanced" groups:
;with pos as (
select intGroupID, ABS(decAmount) m
from TableName
where decAmount > 0
), neg as (
select intGroupID, ABS(decAmount) m
from TableName
where decAmount < 0
)
select distinct IsNull(p.intGroupID, n.intGroupID) as intGroupID
from pos p
full join neg n on n.id = p.id and abs(n.m - p.m) < 1e-8
where p.m is NULL or n.m is NULL
to get unpaired elements, select satement can be changed to following:
select IsNull(p.intGroupID, n.intGroupID) as intGroupID, IsNull(p.m, -n.m) as decAmount
from pos p
full join neg n on n.id = p.id and abs(n.m - p.m) < 1e-8
where p.m is NULL or n.m is NULL
Does this help?
-- Expected result - group 1 and 3
declare #matches table (groupid int, value decimal(5,2))
insert into #matches select 1, 1.0
insert into #matches select 1, -1.0
insert into #matches select 2, 2.0
insert into #matches select 2, -2.0
insert into #matches select 2, -2.0
insert into #matches select 3, 3.0
insert into #matches select 3, 3.5
insert into #matches select 3, -3.0
insert into #matches select 3, -3.5
insert into #matches select 4, 4.0
insert into #matches select 4, 4.0
insert into #matches select 4, -4.0
-- Get groups where we have matching positive/negatives, with the same number of each
select mat.groupid, min(case when pos.PositiveCount = neg.NegativeCount then 1 else 0 end) as 'Match'
from #matches mat
LEFT JOIN (select groupid, SUM(1) as 'PositiveCount', Value
from #matches where value > 0 group by groupid, value) pos
on pos.groupid = mat.groupid and pos.value = ABS(mat.value)
LEFT JOIN (select groupid, SUM(1) as 'NegativeCount', Value
from #matches where value < 0 group by groupid, value) neg
on neg.groupid = mat.groupid and neg.value = case when mat.value < 0 then mat.value else mat.value * -1 end
group by mat.groupid
-- If at least one pair within a group don't match, reject
having min(case when pos.PositiveCount = neg.NegativeCount then 1 else 0 end) = 1
You can compare your values this way:
declare #t table(id int, amount decimal(4,1))
insert #t values(1,1.0),(1,-1.0),(1,2.0),(1,-2.0),(1,3.0),(1,2.0),(1,-4.0),(1,-1.0),(2,-1.0),(2,1.0)
;with a as
(
select count(*) cnt, id, amount
from #t
group by id, amount
)
select id from #t
except
select b.id from a
full join a b
on a.cnt = b.cnt and a.amount = -b.amount
where a.id is null
For some reason i can't write comments, however Daniels comment is not correct, and my solution does accept (6,1),(6,-1),(6,0) which can be correct. 0 is not specified in the question and since it is a 0 value it can be handled eather way. My answer does NOT accept (3,1.0),(3,1.0),(3,-1.0)
To Blam: No I am not missing
or b.id is null
My solution is like yours, but not exactly identical

Check value for every month in a year in T-sql using cursor

I need a example of a cursor for my meter system, where the system reads the meter every month.
The cursor needs to check, that every meter has a reading registered in the current year. For meters with missing readings, an estimated value is added, such that the daily consumption is like the daily comsumption in the previous period plus 15%. In no previous period exiss, the above Kwh value is used.
How about something like this. (The MonthSeed table could become a real table in your database)
declare #MonthSeed table (MonthNumber int)
insert into #MonthSeed values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12)
-- assumes declared table "Reading" with fields ( Id int, [Date] datetime, MeterNo varchar(50), Consumption int )
select
m.MeterNo,
r.Date,
calculatedConsumption = isnull(r.Consumption, -- read consumption
isnull((select max(r2.Consumption) Consumption from Reading r2 where datepart(month, r2.Date) = (m.MonthNumber - 1) and r2.MeterNo = m.MeterNo) * 1.15, -- previous consumption + 15%
9999)) -- default consumption
from
(select distinct
MeterNo,
MonthNumber
from
Reading, #MonthSeed) m
left join
Reading r on r.MeterNo = m.MeterNo and datepart(month, r.Date) = m.monthNumber
EDIT FOLLOWING COMMENTS - EXAMPLE OF ADDING MISSING READINGS
As commented need to include an insert before the select insert into Reading (MeterNo, Date, Consumption) and making use of the left join to the reading table include a check for the reading id to be null ie missing where r.Id is null.
I noticed that this would result in null date entries when inserting into the reading table. So I included a date aggregate in the main sub-select Date = dateadd(month, monthnumber, #seeddate); the main select was amended to show a date for missing entries isnull(r.Date, m.Date),
I've calculated the #SeedDate to be the 1st of the current month one year ago but you may want to pass in an earlier date.
declare #MonthSeed table (MonthNumber int)
insert into #MonthSeed values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12)
-- assumes declared table "Reading" with fields ( Id int, [Date] datetime, MeterNo varchar(50), Consumption int )
declare #SeedDate datetime = (select dateadd(month, datediff(month, 0, getdate())-12, 0)) -- this month, last year
insert into Reading (MeterNo, Date, Consumption)
select
m.MeterNo,
isnull(r.Date, m.Date),
calculatedConsumption =
isnull(r.Consumption, -- read consumption
isnull(1.15 * (select max(r2.Consumption) Consumption
from Reading r2
where datepart(month, r2.Date) = (m.MonthNumber - 1)
and r2.MeterNo = m.MeterNo), -- previous consumption + 15%
9999)) -- default consumption
from
(select distinct
MeterNo,
MonthNumber,
Date = dateadd(month, monthnumber, #seeddate)
from
Reading
cross join
#MonthSeed) m
left join
Reading r on r.MeterNo = m.MeterNo and datepart(month, r.Date) = m.monthNumber
where
r.Id is null
select * from Reading
(The following assumes SQL Server 2005 or later.)
Scrounge around in here and see if there's anything of value:
declare #StartDate as Date = '2012-01-01'
declare #Now as Date = GetDate()
declare #DefaultConsumption as Int = 2000 -- KWh.
declare #MeterReadings as Table
( MeterReadingId Int Identity, ReadingDate Date, MeterNumber VarChar(10), Consumption Int )
insert into #MeterReadings ( ReadingDate, MeterNumber, Consumption ) values
( '2012-01-13', 'E154', 2710 ),
( '2012-01-19', 'BR549', 650 ),
( '2012-02-15', 'E154', 2970 ),
( '2012-02-19', 'BR549', 618 ),
( '2012-03-16', 'BR549', 758 ),
( '2012-04-11', 'E154', 2633 ),
( '2012-04-20', 'BR549', 691 )
; with Months ( Month ) as (
select #StartDate as [Month]
union all
select DateAdd( mm, 1, Month )
from Months
where Month < #Now
),
MeterNumbers ( MeterNumber ) as (
select distinct MeterNumber
from #MeterReadings )
select M.Month, MN.MeterNumber,
MR.MeterReadingId, MR.ReadingDate, MR.Consumption,
Coalesce( MR.Consumption, #DefaultConsumption ) as [BillableConsumption],
( select Max( ReadingDate ) from #MeterReadings where MeterNumber = MN.MeterNumber and ReadingDate < M.Month ) as [PriorReadingDate],
( select Consumption from #MeterReadings where MeterNumber = MN.MeterNumber and ReadingDate =
( select Max( ReadingDate ) from #MeterReadings where MeterNumber = MN.MeterNumber and ReadingDate < M.Month ) ) as [PriorConsumption],
( select Consumption from #MeterReadings where MeterNumber = MN.MeterNumber and ReadingDate =
( select Max( ReadingDate ) from #MeterReadings where MeterNumber = MN.MeterNumber and ReadingDate < M.Month ) ) * 1.15 as [PriorConsumptionPlus15Percent]
from Months as M cross join
MeterNumbers as MN left outer join
#MeterReadings as MR on MR.MeterNumber = MN.MeterNumber and DateAdd( dd, 1 - DatePart( dd, MR.ReadingDate ), MR.ReadingDate ) = M.Month
order by M.Month, MN.MeterNumber