Forming a Tsql query that ranks and categorizes date field - tsql

I have this datset:
create table #date_example
(
date_val datetime, rownum int
)
insert #date_example values('3/1/14',1)
insert #date_example values('3/1/14',2)
insert #date_example values('3/1/14',3)
insert #date_example values('2/1/14',4)
insert #date_example values('1/3/14',5)
select --top 1 with ties
date_val,
ROW_NUMBER() OVER(PARTITION BY rownum ORDER BY date_val DESC) AS 'RowNum'
from #date_example
order by date_val
desc
With output:
date_val RowNum
2014-03-01 00:00:00.000 1
2014-03-01 00:00:00.000 1
2014-03-01 00:00:00.000 1
2014-02-01 00:00:00.000 1
2014-01-03 00:00:00.000 1
But I want instead output:
date_val RowNum
2014-03-01 00:00:00.000 1
2014-03-01 00:00:00.000 1
2014-03-01 00:00:00.000 1
2014-02-01 00:00:00.000 2
2014-01-03 00:00:00.000 3
So I want the RowNum to be a ranking which includes ties. How can I do this?

I found the answer from another post:
select
date_val,
Rank() OVER(ORDER BY date_val DESC) AS 'RowNum'
from #date_example

Related

BigQuery SQL: Group rows with shared ID that occur within 7 days of each other, and return values from most recent occurrence

I have a table of datestamped events that I need to bundle into 7-day groups, starting with the earliest occurrence of each event_id.
The final output should return each bundle's start and end date and 'value' column of the most recent event from each bundle.
There is no predetermined start date, and the '7-day' windows are arbitrary, not 'week of the year'.
I've tried a ton of examples from other posts but none quite fit my needs or use things I'm not sure how to refactor for BigQuery
Sample Data;
Event_Id
Event_Date
Value
1
2022-01-01
010203
1
2022-01-02
040506
1
2022-01-03
070809
1
2022-01-20
101112
1
2022-01-23
131415
2
2022-01-02
161718
2
2022-01-08
192021
3
2022-02-12
212223
Expected output;
Event_Id
Start_Date
End_Date
Value
1
2022-01-01
2022-01-03
070809
1
2022-01-20
2022-01-23
131415
2
2022-01-02
2022-01-08
192021
3
2022-02-12
2022-02-12
212223
You might consider below.
CREATE TEMP FUNCTION cumsumbin(a ARRAY<INT64>) RETURNS INT64
LANGUAGE js AS """
bin = 0;
a.reduce((c, v) => {
if (c + Number(v) > 6) { bin += 1; return 0; }
else return c += Number(v);
}, 0);
return bin;
""";
WITH sample_data AS (
select 1 event_id, DATE '2022-01-01' event_date, '010203' value union all
select 1 event_id, '2022-01-02' event_date, '040506' value union all
select 1 event_id, '2022-01-03' event_date, '070809' value union all
select 1 event_id, '2022-01-20' event_date, '101112' value union all
select 1 event_id, '2022-01-23' event_date, '131415' value union all
select 2 event_id, '2022-01-02' event_date, '161718' value union all
select 2 event_id, '2022-01-08' event_date, '192021' value union all
select 3 event_id, '2022-02-12' event_date, '212223' value
),
binning AS (
SELECT *, cumsumbin(ARRAY_AGG(diff) OVER w1) bin
FROM (
SELECT *, DATE_DIFF(event_date, LAG(event_date) OVER w0, DAY) AS diff
FROM sample_data
WINDOW w0 AS (PARTITION BY event_id ORDER BY event_date)
) WINDOW w1 AS (PARTITION BY event_id ORDER BY event_date)
)
SELECT event_id,
MIN(event_date) start_date,
ARRAY_AGG(
STRUCT(event_date AS end_date, value) ORDER BY event_date DESC LIMIT 1
)[OFFSET(0)].*
FROM binning GROUP BY event_id, bin;

Show every week of the Year even if there is no data

I have query that pulls data by week and groups it together. But i does not display weeks that doesn't have any data. I want show all weeks even if they don't have data as null maybe
Here is the query if someone can help me with this it will awesome
SELECT
DATEADD (week, datediff(week, 0, StartDate), -1) as 'WeekOf'
,DATEADD (week, datediff(week, 0, StartDate), +5) as 'to'
,DATEPART(wk, StartDate) as 'WeekNumber'
FROM [DESOutage].[dbo].[OPSInterruption]
Where StartDate > '2020-01-01' and EndDate <'2020-02-01'
Group by DATEADD (week, datediff(week, 0, StartDate), -1),DATEPART(wk, StartDate),DATEADD (week, datediff(week, 0, StartDate), +5)
***************Output***************
As you could see week 2 and 4 is missing out since there is no data being returned. I would still like to see week 2 and 4 in the output with maybe 0 as result.
WeekOf to WeekNumber
2019-12-29 00:00:00.000 2020-01-04 00:00:00.000 1
2020-01-12 00:00:00.000 2020-01-18 00:00:00.000 3
2020-01-26 00:00:00.000 2020-02-01 00:00:00.000 5
You probably need a calendar table. Here is a quick way of generating one, with an untested implementation of your code. I am assuming that the StartDate may contain a time component thus the need to coalesce the dates.
DECLARE #StartYear DATETIME = '20200101'
DECLARE #days INT = 366
;WITH
E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), -- 1*10^1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), -- 1*10^2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), -- 1*10^4 or 10,000 rows
E8(N) AS (SELECT 1 FROM E4 a, E4 b), -- 1*10^8 or 100,000,000 rows
Tally(N) AS (SELECT TOP (#Days) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E8),
Calendar AS (
SELECT StartOfDay = DATEADD(dd,N-1,#StartYear),
EndOfDay = DATEADD(second, -1, DATEADD(dd,N ,#StartYear))
FROM Tally)
SELECT DATEADD (week, datediff(week, 0, COALESCE(x.StartDate, c.StartOfDay) ), -1) as 'WeekOf'
, DATEADD (week, datediff(week, 0, COALESCE(x.StartDate, c.StartOfDay)), +5) as 'to'
, DATEPART(wk, COALESCE(x.StartDate, c.StartOfDay)) as 'WeekNumber'
FROM Calendar c
INNER JOIN [DESOutage].[dbo].[OPSInterruption] x
ON x.StartDate > c.StartOfDay AND x.StartDate <= c.EndOfDay
WHERE c.StartOfDay > '2020-01-01' AND c.StartOfDay <'2020-02-01'
GROUP BY DATEADD (week, datediff(week, 0, COALESCE(x.StartDate, c.StartOfDay)), -1),
DATEPART(wk, COALESCE(x.StartDate, c.StartOfDay)),
DATEADD (week, datediff(week, 0, COALESCE(x.StartDate, c.StartOfDay)), +5)

Repeating value of previous row in a join

I have one table including accounts and their balance. I would like to report the balance for each day while for missing days report the last day.
Table accounts:
AccountName Date Balance
thomas 2008-10-09 1000
thomas 2008-10-20 5000
david 2008-02-18 2000
david 2008-03-10 200000
let's say we want the report for 2018-10 I need to get something like this
thomas 2008-10-01 0
...
thomas 2008-10-09 1000
thomas 2008-10-10 1000
...
thomas 2008-10-20 5000
...
thomas 2008-10-31 5000
I went this far:
DECLARE #StartDate datetime = '2008/10/9';
DECLARE #EndDate datetime = '2008/10/20';
WITH theDates AS
(
SELECT #StartDate as theDate
UNION ALL
SELECT DATEADD(day, 1, theDate)
FROM theDates
WHERE DATEADD(day, 1, theDate) <= #EndDate
)
select * from accounts a
right outer join thedates d on a.date=d.theDate
order by thedate
Results:
AccountNo Date Balance theDate
----------- ---------- -------- ----------
thomas 2008-10-09 1000 2008-10-09
NULL NULL NULL 2008-10-10
NULL NULL NULL 2008-10-11
NULL NULL NULL 2008-10-12
NULL NULL NULL 2008-10-13
NULL NULL NULL 2008-10-14
NULL NULL NULL 2008-10-15
NULL NULL NULL 2008-10-16
NULL NULL NULL 2008-10-17
NULL NULL NULL 2008-10-18
NULL NULL NULL 2008-10-19
thomas 2008-10-20 5000 2008-10-20
Any idea?
Update:
I end up using cursor. This is version working perfectly including the situation where an account has no entry.
DECLARE #Date datetime
declare #result table (accountname nvarchar(50), balance int, date datetime)
DECLARE #StartDate datetime = '2008/10/1';
DECLARE #EndDate datetime = '2008/10/29';
declare cur cursor for
WITH theDates AS
(
SELECT #StartDate as theDate
UNION ALL
SELECT DATEADD(day, 1, theDate)
FROM theDates
WHERE DATEADD(day, 1, theDate) <= #EndDate
)
select * from theDates
open cur
fetch next from cur into #date
while ##FETCH_STATUS=0
begin
insert into #result
select b.accountName, isnull(balance,
(select isnull((select top 1 balance from accounts where date<#date and accountName=b.accountName order by date desc),0))
), #date from
(select * from accounts where date = #date) a
right outer join (select distinct(accountname) from accounts ) b on a.accountname = b.accountname
fetch next from cur into #date
end
close cur
deallocate cur
select * from #result
Try this:
DECLARE #StartDate datetime = '2008/10/9';
DECLARE #EndDate datetime = '2008/10/20';
WITH theDates AS
(
SELECT #StartDate as theDate
UNION ALL
SELECT DATEADD(day, 1, theDate)
FROM theDates
WHERE DATEADD(day, 1, theDate) <= #EndDate
),
acc AS(
SELECT a.AccountName,
a.Balance,
a.Date,
isnull(c.CloseDate, cast(GETDATE()as date)) as CloseDate
FROM accounts a
CROSS APPLY(SELECT MIN(b.Date) as CloseDate
FROM accounts b
WHERE b.Date > a.Date) c
)
SELECT a.AccountName, a.Balance, a.Date, d.theDate
FROM acc a, theDates d
WHERE a.Date <= d.theDate
AND a.CloseDate > d.theDate
option (maxrecursion 0)
Results:
AccountName Balance Date theDate
----------- ----------- ------------------- -----------------------
thomas 1000 2008-10-09 00:00:00 2008-10-09 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-10 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-11 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-12 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-13 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-14 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-15 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-16 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-17 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-18 00:00:00.000
thomas 1000 2008-10-09 00:00:00 2008-10-19 00:00:00.000
thomas 5000 2008-10-20 00:00:00 2008-10-20 00:00:00.000
You can try to use aggregate function MIN and MAX make calendar table then OUTER JOIN
WITH theDates AS
(
SELECT AccountName, MIN(Date) as StartDt,MAX(Date) EndDt
FROM accounts
GROUP BY AccountName
UNION ALL
SELECT AccountName,DATEADD(day, 1, StartDt),EndDt
FROM theDates
WHERE DATEADD(day, 1, StartDt) <= EndDt
)
select d.AccountName,
d.StartDt [date],
ISNULL(a.Balance,0) Balance
from accounts a
LEFT join thedates d on a.date=d.StartDt
order by StartDt

How to find gap date and minimum date in the same query?

I have a table customer_history which log customer_id and modification_date.
When customer_id is not modified there is no entry in the table
I can find when customer_id haven't been modified (=last_date_with_no_modification). I look for when the date is missing (= Gaps and Islands problem).
But in the same query if no date is missing the value last_date_with_no_modification should
be DATEADD(DAY,-1,min(modification_date)) for the customer_id.
I don't know how to add this last condition in my SQL query?
I use following tables:
"Customer_history" table:
customer_id modification_date
1 2017-12-20
1 2017-12-19
1 2017-12-17
2 2017-12-20
2 2017-12-18
2 2017-12-17
2 2017-12-15
3 2017-12-20
3 2017-12-19
"#tmp_calendar" table:
date
2017-12-15
2017-12-16
2017-12-17
2017-12-18
2017-12-19
2017-12-20
Query used to qet gap date:
WITH CTE_GAP AS
(SELECT ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM customer_history ch )
SELECT cg.customer_id,
DATEADD(DAY,1,MAX(cg.GapStart)) as last_date_with_no_modification
FROM CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY,1,cg.GapStart) AND DATEADD(DAY,-1,cg.GapEnd)
GROUP BY cg.customer_id
Result:
customer_id last_date_with_no_modification
1 2017-12-18
2 2017-12-19
3 2017-12-19 (Row missing)
How to get customer_id 3?
Something this should work:
WITH CTE_GAP
AS
(
SELECT
ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM #customer_history ch
)
SELECT DISTINCT
C.customer_id
, ISNULL(LD.last_date_with_no_modification, LD_NO_GAP.last_date_with_no_modification) last_date_with_no_modification
FROM
customer_history C
LEFT JOIN
(
SELECT
cg.customer_id,
DATEADD(DAY, 1, MAX(cg.GapStart)) last_date_with_no_modification
FROM
CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE
cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY, 1, cg.GapStart) AND DATEADD(DAY, -1, cg.GapEnd)
GROUP BY cg.customer_id
) LD
ON C.customer_id = LD.customer_id
LEFT JOIN
(
SELECT
customer_id
, DATEADD(DAY, -1, MIN(modification_date)) last_date_with_no_modification
FROM customer_history
GROUP BY customer_id
) LD_NO_GAP
ON C.customer_id = LD_NO_GAP.customer_id

Comparison of two rows in TSQL

Good Day
I have the following result set returned:
financeYearEnd FromDate ToDate ClientPortfolioCode
2013-12-31 00:00:00.000 2014-01-01 2014-01-31 C1
2013-12-31 00:00:00.000 2014-01-01 2014-01-31 C2
2012-12-31 00:00:00.000 2013-12-01 2013-12-31 C1
2012-12-31 00:00:00.000 2013-12-01 2013-12-31 C2
What I need to do is the following:
I need to compare the financeYearEnd of all the C1 Fields (there will always only be two), and see if they are different to each other
2013-12-31 00:00:00.000 2014-01-01 2014-01-31 C1
2012-12-31 00:00:00.000 2013-12-01 2013-12-31 C1
As seen, the financeYearEnd does differ, so I need to store that result as a row in a temporary Table.
This needs to be done for all distinct ClientPortfolioCodes(Which will always appear in groups of two)
How can this be achieved?
I have tried select distinct .. - didn't work. It returned all my rows
EDIT -
WITH cteCompareTopTwoYears
AS (
SELECT TOP (
SELECT COUNT(*) * 2
FROM #ClientPortFolios
) FinancialYearEnd AS financeYearEnd
,FromDate
,ToDate
,CA.ClientPortfolioCode
FROM rpt.F3_fn_ClientPortfolios_CapitalAccount_IncludingYTD CA
WHERE (
(
CA.FromDate = (DATEADD(m, - 1, #FromDate))
AND CA.ToDate = (DATEADD(m, - 1, #ToDate))
)
OR (
CA.FromDate = #FromDate
AND CA.ToDate = #ToDate
)
)
AND (
CA.ClientPortFolioCode IN (
SELECT ClientPortfolioCode
FROM #ClientPortfolios
)
)
ORDER BY FromDate DESC
)
SELECT *
FROM cteCompareTopTwoYears c
Perhaps with the help of ROW_NUMBER, e.g.:
WITH CTE AS
(
SELECT financeYearEnd, FromDate, ToDate, ClientPortfolioCode,
rn = row_Number () OVER (Partition By ClientPortfolioCode
Order By financeYearEnd ASC)
FROM dbo.TableName
)
SELECT financeYearEnd, FromDate, ToDate, ClientPortfolioCode
FROM CTE
WHERE rn > 1
Demo