Update row using value from row before - tsql

i'm looking for update row using value from row before.
I have something like this:
Group by
Value1
Value2
Value2 - expected result
1
0
20
20
1
3
x
23
1
5
x
28
1
2
x
30
2
0
30
30
2
5
x
35
2
2
x
37
Value2 = Value2 from row before + value 1 but column "Group by" is importand. If Value2 before is in another group then: Value2 = Value2
Can sameone explain mi how do this update statement? I tried using CTE with LAG function but i always fall in infinite loop.
Code for create table:
create table test
(
[GroupBy] int
, [Date] date
, [Value1] int
, [Value2] int
)
Inserting data:
INSERT INTO test ([GroupBy], [Date] [Value1], [Value2])
VALUES
(1, '2022-01-01', 0, 20),
(1, '2022-01-02', 3, NULL),
(1, '2022-01-03', 5, NULL),
(1, '2022-01-04', 2, NULL),
(2, '2022-01-01', 0, 30),
(2, '2022-01-02', 5, NULL),
(2, '2022-01-03', 2, NULL)
Primary key by: [GroupBy], [Date]

Check using LAG function
-- Using LAG function
-- https://learn.microsoft.com/sql/t-sql/functions/lag-transact-sql?view=sql-server-ver15&WT.mc_id=DP-MVP-5001699
;With MyCTE as (
SELECT
t.[date], t.GroupBy,t.Value1,Value2 = ISNULL(t.Value2,0),
PreValue2 = ISNULL(LAG(t.Value2,1,0) OVER (PARTITION BY t.GroupBy ORDER BY [date]),0)
FROM test t
)
SELECT [date], GroupBy, Value1, Value2, PreValue2, [Value1+PreValue2] = Value1+PreValue2
FROM MyCTE
Following the comment, maybe the original request was not well describe and what you need is not "Value2 from row before + value 1" but "SUM of all Value2 from all rows before + value 1"
In this case, check this solution
-- Value1 + total of all previous Value2
;With MyCTE as (
SELECT
t.[date], t.GroupBy,t.Value1,Value2 = ISNULL(t.Value2,0)
,TotalPreValue2 = SUM(ISNULL(t.Value2,0))
OVER (PARTITION BY t.GroupBy ORDER BY [date] ROWS BETWEEN UNBOUNDED PRECEDING and CURRENT ROW)
FROM test t
)
SELECT [date], GroupBy, Value1, Value2, TotalPreValue2, [Value1+TotalPreValue2] = Value1+TotalPreValue2
FROM MyCTE
GO
And if you need something else like "sum of all Value2 from ALL previous rows before + sum of all value1 from, previous rows" then check this
-- total of all previous Value1 + total of all previous Value2
;With MyCTE as (
SELECT
t.[date], t.GroupBy,t.Value1,Value2 = ISNULL(t.Value2,0)
,TotalPreValue2 = SUM(ISNULL(t.Value2,0))
OVER (PARTITION BY t.GroupBy ORDER BY [date] ROWS BETWEEN UNBOUNDED PRECEDING and CURRENT ROW)
,TotalPreValue1 = SUM(ISNULL(t.Value1,0))
OVER (PARTITION BY t.GroupBy ORDER BY [date] ROWS BETWEEN UNBOUNDED PRECEDING and CURRENT ROW)
FROM test t
)
SELECT [date], GroupBy, Value1, Value2, TotalPreValue2, TotalPreValue1, [TotalPreValue1+TotalPreValue2] = TotalPreValue1+TotalPreValue2
FROM MyCTE

Related

postgres aggregate subset from group by rows

I'm trying to evaluate user loyalty bonuses balance when bonuses burns after half-year inactivity. I want my sum consist of ord's 4, 5 and 6 for user 1.
create table transactions (
user int,
ord int, -- transaction date replacement
amount int,
lag interval -- after previous transaction
);
insert into transactions values
(1, 1, 10, '1h'::interval),
(1, 2, 10, '.5y'::interval),
(1, 3, 10, '1h'::interval),
(1, 4, 10, '.5y'::interval),
(1, 5, 10, '.1h'::interval),
(1, 6, 10, '.1h'::interval),
(2, 1, 10, '1h'::interval),
(2, 2, 10, '.5y'::interval),
(2, 3, 10, '.1h'::interval),
(2, 4, 10, '.1h'::interval),
(3, 1, 10, '1h'::interval),
;
select user, sum(
amount -- but starting from last '.5y'::interval if any otherwise everything counts
) from transactions group by user
user | sum(amount)
--------------------
1 | 30 -- (4+5+6), not 50, not 60
2 | 30 -- (2+3+4), not 40
3 | 10
try this:
with cte as(
select *,
case when (lead(lag) over (partition by user_ order by ord)) >= interval '.5 year'
then 1 else 0 end "flag" from test
),
cte1 as (
select *,
case when flag=(lag(flag,1) over (partition by user_ order by ord)) then 0 else 1 end "flag1" from cte
)
select distinct on (user_) user_, sum(amount) over (partition by user_,grp order by ord) from (
select *, sum(flag1) over (partition by user_ order by ord) "grp" from cte1) t1
order by user_ , ord desc
DEMO
Though it is very complicated and slow but resolve your problem
Is this what you're looking for ?
with last_5y as(
select "user", max(ord) as ord
from transactions
where lag = '.5y'::interval group by "user"
) select t.user, sum(amount)
from transactions t, last_5y t2
where t.user = t2.user and t.ord >= t2.ord
group by t.user

Optimising T-SQL reporting performance

I have the table bellow, I need to delete opposite rows between two dates by pairs based on PerCode Value,
In fact, we delete rows inside the date range that have the same PerCode and have equal and opposite values.
The problem is that begin date and end date are provided by users as parameters while reporting but the query take too much time if i try to delete these at runtime.
Example:
Begin date = 01/01/2018
End date = 31/12/2018
I should delete rows 3 and 4.
Do u have any idea how to do that while optimising performance (the table have 200 Millions of rows)
+----+------------+---------+---------+-----------+
| Id | Date | PerCode | Value | IsDeleted |
+----+------------+---------+---------+-----------+
| 1 | 01/10/2017 | C1 | 10 | |
| 2 | 01/01/2018 | C1 | -10 | |
| 3 | 15/02/2018 | C2 | 20 | 1 |
| 4 | 10/03/2018 | C2 | -20 | 1 |
| 5 | 01/12/2018 | C3 | 15 | |
| 6 | 01/02/2019 | C3 | -15 | |
+----+------------+---------+---------------------+
I had a quick go at this, using a table variable to allow me to knock together a query using your test data. However, this might not perform well when used over 2 million rows?
DECLARE #table TABLE (id INT, [date] DATE, percode CHAR(2), [value] INT, isdeleted BIT);
INSERT INTO #table
SELECT 1, '20171001', 'C1', 10, NULL
UNION ALL
SELECT 2, '20180101', 'C1', -10, NULL
UNION ALL
SELECT 3, '20180215', 'C2', 20, NULL
UNION ALL
SELECT 4, '20180310', 'C2', -20, NULL
UNION ALL
SELECT 5, '20181201', 'C3', 15, NULL
UNION ALL
SELECT 6, '20190201', 'C3', -15, NULL;
DECLARE #date_from DATE = '20180101';
DECLARE #date_to DATE = '20181231';
WITH ordered AS (
SELECT
id,
percode,
[value],
ROW_NUMBER() OVER (PARTITION BY percode, [value] ORDER BY [value]) AS order_id
FROM
#table
WHERE
[date] BETWEEN #date_from AND #date_to
AND ISNULL(isdeleted, 0) != 1),
matches AS (
SELECT
m1.id AS match_1_id,
m2.id AS match_2_id
FROM
ordered m1
INNER JOIN ordered m2 ON m1.percode = m2.percode AND m1.[value] = m2.[value] * -1 AND m1.order_id = m2.order_id)
UPDATE
t
SET
isdeleted = 1
FROM
#table t
INNER JOIN matches m ON m.match_1_id = t.id OR m.match_2_id = t.id;
SELECT * FROM #table;
Results:
id date percode value isdeleted
1 2017-10-01 C1 10 NULL
2 2018-01-01 C1 -10 NULL
3 2018-02-15 C2 20 1
4 2018-03-10 C2 -20 1
5 2018-12-01 C3 15 NULL
6 2019-02-01 C3 -15 NULL
How does it work? Well I broke the task down into steps:
make a list of all rows in the date period specified, where they aren't already deleted;
for each row of data assign it a running count number, grouped by the percode and the value. So the first C1 10 would be number #1, then the second C1 10 would be number #2, etc.;
to find matches it's simply a case of finding any value that has the same percode, the equal and opposite value to another value group, and the same running count number;
where there's a match set the isdeleted flag to 1.
Here is my code but this is not performant over 200 millions rows in real time.
and in real life Percode is concatenation of 5 columns (date, varchar(13), varchar(2),varchar(1) and varchar(50)) and Value is 4 numeric columns.
I am searching for other ideas.
--DECLARE #table TABLE (id INT, [date] DATE, percode CHAR(2), [value] INT, isdeleted BIT);
Select * INTO #MasterTable FROM
(
SELECT 1 id, '20171001' [date], 'C1' percode, 10 [value], NULL isdeleted
UNION ALL
SELECT 2, '20180101', 'C1', -10, NULL
UNION ALL
SELECT 3, '20180215', 'C2', 20, NULL
UNION ALL
SELECT 4, '20180310', 'C2', -20, NULL
UNION ALL
SELECT 5, '20181201', 'C3', 15, NULL
UNION ALL
SELECT 6, '20190201', 'C3', -15, NULL
) T ;
DECLARE #date_from DATE = '20180101';
DECLARE #date_to DATE = '20181231';
select F.id
Into #TmpTable
from
(
select Id, PerCode, Value
,ROW_NUMBER() over (partition by PerCode, Value order by (select 0)) Rn2
from
#MasterTable ) F
inner join (
select
PerCode
, Rn1
from (
select
PerCode
,Value
,ROW_NUMBER() over (partition by PerCode, Value order by (select 0)) Rn1
FROM #MasterTable
where
[date] BETWEEN #date_from AND #date_to
) A
group by PerCode , Rn1
having sum(Value) = 0 and count(*)>1
) B on F.PerCode = B.PerCode
and F.Rn2 = B.Rn1
update R
set IsDeleted = 1
from #MasterTable R
inner join #TmpTable P
on R.id = P.id
select * from #MasterTable
drop table #MasterTable ;
drop table #TmpTable;

How to pivot 1 column and 2 rows into 2 columns and 1 row in Db2 SQL

How to achieve 1 column 2 rows to 2 columns 1 row on DB2 please?
eg :
select value from <tablename> WHERE name='VAR' ORDER BY effectivedate DESC FETCH FIRST 2 ROWS ONLY;
which gives
VAR
----
12
57
But I want to get
VAR1,VAR2
-----------
12 ,57
Thanks very much!
Typically, the best way to "pivot" rows to columns is to aggregate over CASE statements.
For example
SELECT MAX(CASE WHEN RN = 1 THEN value END) AS VAR1
, MAX(CASE WHEN RN = 2 THEN value END) AS VAR2
, MAX(CASE WHEN RN = 3 THEN value END) AS VAR3
, MAX(CASE WHEN RN = 4 THEN value END) AS VAR4
FROM (
SELECT *, ROW_NUMBER() OVER(ORDER BY effectivedate DESC) AS RN
FROM a_table T
)
will return this
VAR1 VAR2 VAR3 VAR4
---- ---- ---- ----
12 57 1 NULL
using the table and data in my other answer
which would return
Use substring and aliases
SELECT
SUBSTR(VAR, 1,LOCATE(' ',VAR)-1) as VAR1
, SUBSTR(VAR, LOCATE(' ',VAR)+1) as VAR2
FROM YOURTABLE;
Basically breaking on space, if you have fixed length you can use without locate.
There are many ways to do this. If you are on Db2 LUW 11.1 or above, this will work
SELECT * FROM TABLE(VALUES
( ( select value from a_table WHERE name='VAR' ORDER BY effectivedate DESC FETCH FIRST 1 ROW ONLY)
, ( select value from a_table WHERE name='VAR' ORDER BY effectivedate DESC OFFSET 1 ROW FETCH NEXT 1 ROW ONLY )
)) AS t(VAR1, VAR2)
and with this table and data
create TABLE a_table( value int, name char(3), effectivedate date);
INSERT INTO a_table values (12,'VAR','2018-01-10'),(57,'VAR', '2018-01-09'),(1,'VAR','2018-01-08');
will return this result
VAR1 VAR2
---- ----
12 57

Postgresql dense ranking to start at 2 if there is an initial tie at 1

So i have a table and a query that ranks the cost of items and doesn't allows ties with position 1, if there is a tie at position 1 the ranking starts at 2.
Here is the schema with a sample data
CREATE TABLE applications
(id int, name char(10), cost int);
INSERT INTO applications
(id, name, cost)
VALUES
(1, 'nfhfjs', 10),
(2, 'oopdld', 20),
(3, 'Wedass', 14),
(4, 'djskck', 22),
(5, 'laookd', 25),
(6, 'mfjjf', 25),
(7, 'vfhgg', 28),
(8, 'nvopq', 29),
(9, 'nfhfj', 56),
(10, 'voapp', 56);
Here is the query
WITH start_tie AS (
SELECT
DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank,
lead(cost,1) OVER (ORDER BY cost DESC) as next_app_cost
FROM
applications LIMIT 1
)
SELECT
*,
DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank,
(CASE start_tie.cost_rank WHEN start_tie.next_app_cost THEN cost_rank+1 ELSE cost_rank END) AS right_cost_rank
FROM
applications;
my expected result is
id name cost cost_rank
10 voapp 56 2
9 nfhfj 56 2
8 nvopq 29 3
7 vfhgg 28 4
6 mfjjf 25 5
5 laookd 25 5
4 djskck 22 6
2 oopdld 20 7
3 Wedass 14 8
1 nfhfjs 10 9
Please modify the query to achieve the result.
SQL FIDDLE
All you need to do is to check if the highest cost is the same as the second-highest cost. And if that is the case, add 1 to all rank values:
with start_tie as (
select case
when cost = lead(cost) over (order by cost desc) then 1
else 0
end as tie_offset
from applications
order by cost desc
limit 1
)
select *,
dense_rank() over (order by cost desc) + (select tie_offset from start_tie) cost_rank
from applications;
Example: http://rextester.com/EKSLJK65530
If the number of ties defines the offset to be used for the "new" ranking, the offset could be calculated using this:
with start_tie as (
select count(*) - 1 as tie_offset
from applications a1
where cost = (select max(cost) from applications)
)
select *,
dense_rank() over(order by cost desc) + (select tie_offset from start_tie) cost_rank
from applications;
No tie at first, means more than one with rank 1
replace r.cost_rank+x.c-1 with r.cost_rank+1 if fixed start at 2 rank to regardless of how many are in tie ranks are
WITH r AS (
SELECT
*
,DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank
FROM
applications
), x as (select count(*) as c from r where cost_rank=1)
SELECT
r.*, (CASE WHEN 1<x.c THEN r.cost_rank+x.c-1 ELSE r.cost_rank END) as fixed
FROM
r,x;

GROUP BY for MAX and NULL

I want max startdate but there is a NULL data, it will be null.
Sample data is as follows:
DECLARE #Tbl TABLE (Id INT, StartDate DATETIME)
INSERT INTO #Tbl
VALUES (1, NULL),
(1, '2016.07.30'),
(1, '2016.07.05'),
(1, '2016.07.05'),
(2, '2016.07.07'),
(2, '2016.07.05'),
(3, '2016.07.05'),
(3, NULL)
My Query:
SELECT Id, MAX(StartDate) AS StartDate
FROM #Tbl
GROUP BY Id
Output:
Id StartDate
----------- ----------
1 2016-07-30
2 2016-07-07
3 2016-07-05
Desired Output:
Id StartDate
----------- ----------
1 NULL
2 2016-07-07
3 NULL
To solve this problem we can use a count function that behave different in two cases:
when we use count(*) then all rows are count (also with null value)
when we use count(someFieldName) then only rows with not null value are count
You can see this different behaviour on this example using sample data from the question
select Id, count(*) as count_all, count(StartDate) as count_StartDate
from #Tbl
group by Id;
On the output we can see this
Id count_all count_StartDate
1 4 3
2 2 2
3 2 1
We can use this different behaviour to solve problem from question by this query
select Id, case when count(*) = count(StartDate)
then max(StartDate)
else null
end as StartDate
from #Tbl
group by Id
On the output we can see the desired result
Id StartDate
1 NULL
2 2016-07-07 00:00:00.000
3 NULL
Found the result.
SELECT Id, CASE
WHEN MAX(COALESCE(StartDate, '2099.01.01')) = '2099.01.01' THEN NULL
ELSE MAX(StartDate) END AS StartDate
FROM #Tbl
GROUP BY Id