TSQL get unique (not overlapping) datetime ranges - tsql

This is a question like this: TSQL get overlapping periods from datetime ranges but with a different result request.
This is the table:
create table period (
id int,
starttime datetime,
endtime datetime,
type varchar(64)
);
insert into period values (1,'2013-04-07 8:00','2013-04-07 13:00','Work');
insert into period values (2,'2013-04-07 14:00','2013-04-07 17:00','Work');
insert into period values (3,'2013-04-08 8:00','2013-04-08 13:00','Work');
insert into period values (4,'2013-04-08 14:00','2013-04-08 17:00','Work');
insert into period values (5,'2013-04-07 10:00','2013-04-07 11:00','Holyday'); /* 1h overlapping with 1*/
insert into period values (6,'2013-04-08 10:00','2013-04-08 20:00','Transfer'); /* 6h overlapping with 3 and 4*/
insert into period values (7,'2013-04-08 11:00','2013-04-08 12:00','Test'); /* 1h overlapping with 3 and 6*/
I need the unique not overlapping datetime ranges table.
In the before example the result would be:
'2013-04-07 08:00','2013-04-07 13:00'
'2013-04-07 14:00','2013-04-07 17:00'
'2013-04-08 08:00','2013-04-08 20:00'
It is not very important if could be time fragmentation such as:
'2013-04-08 08:00','2013-04-08 13:00'
'2013-04-08 12:00','2013-04-08 20:00'
--EDIT--
Another example:
create table period (
id int,
starttime datetime,
endtime datetime,
type varchar(64)
);
insert into period values (1,'2013-06-13 8:30','2013-06-13 12:30','');
insert into period values (2,'2013-06-13 8:38','2013-06-13 12:38','');
insert into period values (3,'2013-06-13 13:18','2013-06-13 17:45','');
insert into period values (4,'2013-06-13 13:30','2013-06-13 17:30','');
insert into period values (5,'2013-06-13 20:00','2013-06-13 23:59','');
this should return:
2013-06-13 08:30 - 2013-06-13 12:38
2013-06-13 13:18 - 2013-06-13 17:45
2013-06-13 20:00 - 2013-06-13 23:59

But you have only one non-overlapping period, or did I understand the question wrong?
select *
from period t
where id in (
select t1.id
from period t1
join period t2 on t1.id <> t2.id
where t2.endtime <= t1.starttime or t2.starttime >= t1.endtime
group by t1.id
having count(*) + 1 = (select count(*) from period)
)
Result:
'2013-04-07 14:00','2013-04-07 17:00'
Update: Ok, so you want to merge overlapping ranges. Try this:
select starttime, endtime
from period
where id in (
select t1.id
from period t1
join period t2 on t1.id <> t2.id
where t2.endtime < t1.starttime or t2.starttime > t1.endtime
group by t1.id
having count(*) + 1 = (select count(*) from period)
)
union all
select min(start), max(fin) from (
select
case when t2.starttime < t1.starttime then t2.starttime else t1.starttime end as start,
case when t2.endtime < t1.endtime then t1.endtime else t2.endtime end as fin
from period t1
join period t2 on t1.id < t2.id
where t2.endtime >= t1.starttime and t2.starttime <= t1.endtime) overlaps
group by datepart(dd, start), datepart(dd, fin)

I found this solution... I think this is not the best way, but seems to work.
DECLARE #union_unique TABLE (id INT IDENTITY(1, 1) primary key ,starttime datetime,endtime datetime)
DECLARE #idset TABLE (id int)
DECLARE #i int
SET #i = 1
IF (SELECT COUNT(*) FROM period) > 0
WHILE (#i <= (SELECT MAX(id) FROM period))
BEGIN
delete from #idset
insert into #idset
select distinct t2.id
from period t1
join #union_unique t2 on convert(date, t1.starttime)=convert(date, t2.starttime)
where t1.id=#i and
(
t1.starttime >= t2.starttime and t1.starttime <= t2.endtime
or
t1.endtime >= t2.starttime and t1.endtime <= t2.endtime
or
t1.starttime <= t2.starttime and t1.endtime >= t2.endtime
)
if(select count(*) from #idset)=0
insert into #union_unique (starttime, endtime) select starttime, endtime from period where id=#i
else
BEGIN
insert into #union_unique (starttime, endtime)
select
min(starttime),
max(endtime)
from (
select starttime, endtime from #union_unique where id in (select id from #idset)
union
select starttime, endtime from period where id=#i
) alll
delete from #union_unique where id in (select id from #idset)
END
SET #i = #i + 1
END
select * from #union_unique order by starttime

Related

TSQL -- quick way to get a count across all tables

If database d1 has tables T1,T2,T3,T4 all with the field "Date1".
What is the best way to get a count of all records across all tables with a date older than 3 days ago?
I know one could do unions, I assume there is no nifty syntax that would omit all tables [like a 'parent' object in C++].
Here best may mean more efficient, or just a pleasing syntax in T-SQL.
This is for SSMS 17.7. Microsoft SQL Server 2014 (SP2)
If you know the table names in advance, a simple query on union all will probably be the simplest way:
SELECT COUNT(*)
FROM
(
SELECT Date1
FROM T1
UNION ALL
SELECT Date1
FROM T2
SELECT Date1
FROM T3
SELECT Date1
FROM T4
) As t
WHERE Date1 <= DATEADD(DAY, -3, GETDATE())
If you don't know the table names in advance, you can use information_schema.columns to build the union query dynamically.
Well, you're interested in a parent object, that would be a view, then. You can reuse it for a variety of queries. Alternatively, add more columns if you need them:
CREATE VIEW parent AS
SELECT Date1 FROM t1 UNION ALL
SELECT Date1 FROM t2 UNION ALL
SELECT Date1 FROM t3 UNION ALL
SELECT Date1 FROM t4;
And now, that can be queried in the way you want
SELECT COUNT(*) FROM parent WHERE Date1 <= DATEADD(DAY, -3, GETDATE())
Without UNION?
Since a COUNT without a GROUP BY returns 1 value, why not use CROSS JOIN for once?
SELECT
t1.Cnt AS [T1],
t2.Cnt AS [T2],
t3.Cnt AS [T3],
t4.Cnt AS [T4],
(t1.Cnt + t2.Cnt + t3.Cnt + t4.Cnt) AS [T1234]
FROM
(SELECT COUNT(*) AS Cnt FROM T1 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t1
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM T2 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t2
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM T3 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t3
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM T4 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t4
Or a CROSS APPLY
SELECT
t1.Cnt AS [T1],
t2.Cnt AS [T2],
t3.Cnt AS [T3],
t4.Cnt AS [T4],
(t1.Cnt + t2.Cnt + t3.Cnt + t4.Cnt) AS [T1234]
FROM (SELECT CAST(GetDate()-3 AS DATE) as Dt) d
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T1 WHERE [Date1] < d.Dt) AS t1
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T2 WHERE [Date1] < d.Dt) AS t2
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T3 WHERE [Date1] < d.Dt) AS t3
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T4 WHERE [Date1] < d.Dt) AS t4
Example snippet for Sql Server:
declare #T1 table (id int primary key identity(1,1), [Date1] date);
declare #T2 table (id int primary key identity(1,1), [Date1] date);
declare #T3 table (id int primary key identity(1,1), [Date1] date);
declare #T4 table (id int primary key identity(1,1), [Date1] date);
insert into #T1 ([Date1]) values (getdate()-6),(getdate()-5),(getdate()-4),(getdate()-3),(getdate()-2),(getdate()-1),(getdate()-0);
insert into #T2 ([Date1]) select top 6 [Date1] from #T1 order by [Date1] desc;
insert into #T3 ([Date1]) select top 5 [Date1] from #T1 order by [Date1] desc;
insert into #T4 ([Date1]) select top 4 [Date1] from #T1 order by [Date1] desc;
SELECT
t1.Cnt AS [T1],
t2.Cnt AS [T2],
t3.Cnt AS [T3],
t4.Cnt AS [T4],
(t1.Cnt + t2.Cnt + t3.Cnt + t4.Cnt) AS [T1234]
FROM
(SELECT COUNT(*) AS Cnt FROM #T1 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t1
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM #T2 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t2
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM #T3 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t3
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM #T4 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t4
Returns:
T1 T2 T3 T4 T1234
3 2 1 0 6
Instead create a view, you can use a CTE (Common Table Expression). It works like a view, but not persists on database. Please try it:
WITH CteDate( Date1 )
AS ( SELECT Date1 FROM t1 UNION ALL
SELECT Date1 FROM t2 UNION ALL
SELECT Date1 FROM t3 UNION ALL
SELECT Date1 FROM t4
)
SELECT COUNT(*) FROM CteDate WHERE Date1 <= DATEADD(DAY, -3, GETDATE())
It works for all SQL Server greater or equal then 2005.

Merge consecutive duplicate records including time range

I have a very similar problem to the question asked here: Merge duplicate temporal records in database
The difference here is, that I need the end date to be an actual date instead of NULL.
So given the following data:
EmployeeId StartDate EndDate Column1 Column2
1000 2009/05/01 2010/04/30 X Y
1000 2010/05/01 2011/04/30 X Y
1000 2011/05/01 2012/04/30 X X
1000 2012/05/01 2013/04/30 X Y
1000 2013/05/01 2014/04/30 X X
1000 2014/05/01 2014/06/01 X X
The desired result is:
EmployeeId StartDate EndDate Column1 Column2
1000 2009/05/01 2011/04/30 X Y
1000 2011/05/01 2012/04/30 X X
1000 2012/05/01 2013/04/30 X Y
1000 2013/05/01 2014/06/01 X X
The proposed solution in the linked thread is this:
with t1 as --tag first row with 1 in a continuous time series
(
select t1.*, case when t1.column1=t2.column1 and t1.column2=t2.column2
then 0 else 1 end as tag
from test_table t1
left join test_table t2
on t1.EmployeeId= t2.EmployeeId and dateadd(day,-1,t1.StartDate)= t2.EndDate
)
select t1.EmployeeId, t1.StartDate,
case when min(T2.StartDate) is null then null
else dateadd(day,-1,min(T2.StartDate)) end as EndDate,
t1.Column1, t1.Column2
from (select t1.* from t1 where tag=1 ) as t1 -- to get StartDate
left join (select t1.* from t1 where tag=1 ) as t2 -- to get a new EndDate
on t1.EmployeeId= t2.EmployeeId and t1.StartDate < t2.StartDate
group by t1.EmployeeId, t1.StartDate, t1.Column1, t1.Column2;
However, this does not seem to work when you need the end date instead of just NULL.
Could someone help me with this issue?
How about this?
create table test_table (EmployeeId int, StartDate date, EndDate date, Column1 char(1), Column2 char(1))
;
insert into test_table values
(1000 , '2009-05-01','2010-04-30','X','Y')
,(1000 , '2010-05-01','2011-04-30','X','Y')
,(1000 , '2011-05-01','2012-04-30','X','X')
,(1000 , '2012-05-01','2013-04-30','X','Y')
,(1000 , '2013-05-01','2014-04-30','X','X')
,(1000 , '2014-05-01','2014-06-01','X','X')
;
SELECT EmployeeId, StartDate, EndDate, Column1, Column2 FROM
(
SELECT EmployeeId, StartDate
, MAX(EndDate) OVER(PARTITION BY EmployeeId, RN) AS EndDate
, Column1
, Column2
, DIFF
FROM
(
SELECT t.*
, SUM(DIFF) OVER(PARTITION BY EmployeeId ORDER BY StartDate ) AS RN
FROM
(
SELECT t.*
, CASE WHEN
Column1 = LAG(Column1,1) OVER(PARTITION BY EmployeeId ORDER BY StartDate)
AND Column2 = LAG(Column2,1) OVER(PARTITION BY EmployeeId ORDER BY StartDate)
THEN 0 ELSE 1 END AS DIFF
FROM
test_table t
) t
)
)
WHERE DIFF = 1
;
This is another solution (taken from How do I group on continuous ranges). It is simpler to code and also caters for NULL values (i.e. treats NULL = NULL unlike the simple LAG() comparison). However it might not be quite as efficient on large volumes of data due to the GROUP BY
SELECT EmployeeId
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
, Column1
, Column2
FROM
(
SELECT t.*
, ROW_NUMBER() OVER(PARTITION BY EmployeeId, Column1, Column2 ORDER BY StartDate ) AS GRN
, ROW_NUMBER() OVER(PARTITION BY EmployeeId ORDER BY StartDate ) AS RN
FROM
test_table t
) t
GROUP BY
EmployeeId
, Column1
, Column2
, RN - GRN

Creating a Void Function in PostgreSQL

I am getting an error on this create function code in Postgresql. The error says it is happening around Line 2 at DELETE, but it happens at WITH if I remove that line so I think it is a problem with the format of my Creat Function
create or replace function retention_data(shopId integer) returns void as $$
delete from retention where shop_id = shopId;
WITH ret_grid_step1 as (
select * from (
SELECT
order_id as order_name,
cust_name as cust_name,
email as email,
date(order_date) as created_at,
count(*) as num_items_in_order,
sum(total_price) as sales ,
rank() over (partition BY order_id ORDER BY cust_name ASC) as rnk_shipping_name,
rank() over (partition BY order_id ORDER BY email ASC) as rnk_email
FROM orders
WHERE shop_id = shopId
and order_date is not null and order_date > now()::date - 365 and order_date < now()::date + 1
group by 1,2,3,4
) x
where rnk_shipping_name = 1 and rnk_email = 1
)
insert into retention(shop_id, cust_name, email, last_purchase_dt, total_sales, num_orders, days_since_last_order)
select
shopId as shop_id,
coalesce(b.cust_name,'null') as cust_name,
a.email,
a.last_purchase_dt,
total_sales,
num_orders,
current_date - last_purchase_dt as days_since_last_order
from (
select
email,
max(created_at) as last_purchase_dt,
count(*) as num_orders,
sum(sales) as total_sales
from ret_grid_step1
group by 1
) as a
left join (
select
email,
cust_name,
rank() over (partition BY email ORDER BY created_at DESC) as rnk
from ret_grid_step1
--where cust_name is not null
group by 1,2,created_at
) as b
on a.email = b.email
where b.rnk = 1
and a.email <> '';
$$ language plpgsql;

TSQL- Calculation with previous record in SQL Server 2008 R2

-- Create Sample data
Use AdventureWorks2012;
IF OBJECT_ID('TempTable1', 'U') IS NOT NULL
DROP TABLE TempTable1
-- Grab data from AdventureWorks2012
SELECT SOD.ProductId as ResourceID,
SOH.DueDate as DueDate,
SOD.OrderQty as DayIncrement,
Row_Number() over (PARTITION BY SOD.ProductID ORDER By SOH.DueDate) as JResourceNumber
INTO TempTable1
FROM Sales.SalesOrderHeader SOH
INNER JOIN Sales.SalesOrderDetail SOD
ON SOH.SalesOrderID = SOD.SalesOrderID
ORDER by ResourceID, JResourceNumber
-- Reduce records to 2637
IF OBJECT_ID('TempTable', 'U') IS NOT NULL
DROP TABLE TempTable
SELECT ResourceID, DueDate, DayIncrement, Row_Number() over (ORDER By ResourceID) as JRowNumber, GETDATE() as SchedDate
INTO TempTable
FROM TempTable1
WHERE JResourceNumber <= 10
--END create sample data
--
-- Calulate a rolling available date for ResourceId = Prev.SchedDate + Curr.DayIncrement
--
IF OBJECT_ID('FINALTABLE', 'U') IS NOT NULL
DROP TABLE FinalTable
SELECT Curr.ResourceID, Curr.DueDate, Curr.DayIncrement, Curr.JRowNumber as CurrRowNumber, Prev.JRowNumber as PrevJRowNumber,
Prev.SchedDate as ShouldBePrevSchedDate,
Case
WHEN Curr.ResourceID = Prev.ResourceID THEN DATEADD(DAY, Curr.DayIncrement, Prev.SchedDate)
ELSE GETDATE()
END AS SchedDate
-- This is the self join
FROM TempTable Curr
LEFT JOIN TempTable Prev ON Prev.JRowNumber = Curr.JRowNumber - 1

Combining multiple CTE in TSQL

I have two CTEs and I want to combine them together. I tried a lot but I got a syntax errors. First Part:
declare #TimeRanges as TABLE (SessionStart datetime, SessionEnd datetime);
with TimeRanges as (
select #Start as StartTime, #Start + #TimeRange as EndTime
union all
select StartTime + #TimeRange, EndTime + #TimeRange
from TimeRanges
where EndTime < #Finish )
Here is the second part:
;with cte as
(
select SessionStartTime as changetime,1 as CC from Calls
union all
select SessionCloseTime,-1 from Calls
)
select top 1 changetime,rt from
(
select * from cte
cross apply
(select SUM(cc) as rt from cte c where c.changetime<=cte.changetime) rt
) v
order by rt desc
What I want to do:
#Start datetime,
#Finish datetime,
#TimeRange time
AS
BEGIN
SET NOCOUNT ON;
declare #res int SET #res = 0
declare #TimeRanges as TABLE (SessionStart datetime, SessionEnd datetime);
with TimeRanges as
( select #Start as StartTime, #Start + #TimeRange as EndTime
union all
select StartTime + #TimeRange, EndTime + #TimeRange
from TimeRanges
where EndTime < #Finish ),
cte as
(
select SessionStart as changetime,1 as CC from TimeRanges
union all
select SessionEnd,-1 from TimeRanges
)
select top 1 changetime,rt from
(
select * from cte
cross apply
(select SUM(cc) as rt from cte c where c.changetime<=cte.changetime) rt
) v
order by rt desc
select StartTime, EndTime,cte.rt
from TimeRanges as TR left outer join
dbo.Test as Test on TR.StartTime <= Test.SessionStartTime
and Test.SessionCloseTime < TR.EndTime
where Test.ScenarioID = 24
group by TR.StartTime, TR.EndTime,cte.rt
END
First CTE, groups or splits times according to the #timerange between StartTime and EndTime. For Example, StartTime 11:00 EndTime 11:10 and TimeRange 05:00(5 min) then splits them into two parts: 11:00 - 11:05 and 11:05 - 11:10. Second CTE counts something for each these ranges. Not important in here. I tried to combine them but I get there errors:
Invalid column name 'SessionStart'
Invalid object name 'TimeRanges'
Because in the TimeRanges CTE, you've named the columns differently:
with TimeRanges as
( select #Start as StartTime, #Start + #TimeRange as EndTime --StartTime and EndTime
union all
select StartTime + #TimeRange, EndTime + #TimeRange
from TimeRanges
where EndTime < #Finish ),
cte as
(
select StartTime as changetime,1 as CC from TimeRanges --StartTime, not SessionStart
union all
select EndTime,-1 from TimeRanges --EndTime
)
select top 1 changetime,rt from
(
select * from cte
cross apply
(select SUM(cc) as rt from cte c where c.changetime<=cte.changetime) rt
) v
order by rt desc
But you then attempt to refer to the CTE again in your second query. You can't do that - each CTE applies to a single query.
You could repeat it:
with TimeRanges as
( select #Start as StartTime, #Start + #TimeRange as EndTime --StartTime and EndTime
union all
select StartTime + #TimeRange, EndTime + #TimeRange
from TimeRanges
where EndTime < #Finish )
select StartTime, EndTime,cte.rt
from TimeRanges as TR left outer join
dbo.Test as Test on TR.StartTime <= Test.SessionStartTime
and Test.SessionCloseTime < TR.EndTime
where Test.ScenarioID = 24
group by TR.StartTime, TR.EndTime,cte.rt