Grouping on a column which has multiple Groups of Duplicate data - tsql

I need to group some data based on dates at locations including identifying when a range of dates has no location. I'm some of the way there in that I have managed to produce a list of ALL the dates in the range and the location.
date1 location1
date2 location1
date3 location1
date4 Unknown
date5 Unknown
date6 Unknown
date7 Location2
date8 Location2
date9 Location2
date10 Location2
date11 location1
date12 location1
date13 location1
using a normal group by (showing min(date) and max(date) I would get something like:
Location1,date1,date13
Location2,date7,date10
Unknown, date4,date6
But I want this:
Location1,date1,date3
Unknown,date4,date6
Location2,date7,date9
Location1,date11,date13
I also need to filter out short ranges of Unknown but that's secondary.
I hope this makes sense, it looks like something that should be really easy.

Take a look into the Islands and Gaps problem and Itzik Ben-gan. There is a set based way to get the results you want.
I was looking into using ROW_NUMBER or RANK, but then I stumbled upon LAG and LEAD (introduced in SQL 2012) which are nice. I've got the solution below. It could definitely be simplified, but having it as several CTEs makes my thought process (as flawed as it may be) easier to see. I just slowly transform the data into what I want. Uncomment one select at a time if you want to see what each new CTE produces.
create table Junk
(aDate Datetime,
aLocation varchar(32))
insert into Junk values
('2000', 'Location1'),
('2001', 'Location1'),
('2002', 'Location1'),
('2004', 'Unknown'),
('2005', 'Unknown'),
('2006', 'Unknown'),
('2007', 'Location2'),
('2008', 'Location2'),
('2009', 'Location2'),
('2010', 'Location2'),
('2011', 'Location1'),
('2012', 'Location1'),
('2013', 'Location1'),
('2014', 'Location3')
;WITH StartsMiddlesAndEnds AS
(
select
aLocation,
aDate,
CASE(LAG(aLocation) OVER (ORDER BY aDate, aLocation)) WHEN aLocation THEN 0 ELSE 1 END [isStart],
CASE(LEAD(aLocation) OVER (ORDER BY aDate, aLocation)) WHEN aLocation THEN 0 ELSE 1 END [isEnd]
from Junk
)
--select * from NumberedStartsMiddlesAndEnds
,NumberedStartsAndEnds AS --let's get rid of the rows that are in the middle of consecutive date groups
(
select
aLocation,
aDate,
isStart,
isEnd,
ROW_NUMBER() OVER(ORDER BY aDate, aLocation) i
FROM StartsMiddlesAndEnds
WHERE NOT(isStart = 0 AND isEnd = 0) --it is a middle row
)
--select * from NumberedStartsAndEnds
,CombinedStartAndEnds AS --now let's put the start and end dates in the same row
(
select
rangeStart.aLocation,
rangeStart.aDate [aStart],
rangeEnd.aDate [aEnd]
FROM NumberedStartsAndEnds rangeStart
join NumberedStartsAndEnds rangeEnd ON rangeStart.aLocation = rangeEnd.aLocation
WHERE rangeStart.i = rangeEnd.i - 1 --consecutive rows
and rangeStart.isStart = 1
and rangeEnd.isEnd = 1
)
--select * from CombinedStartAndEnds
,OneDateIntervals AS --don't forget the cases where a single row is both a start and end
(
select
aLocation,
aDate [aStart],
aDate [aEnd]
FROM NumberedStartsAndEnds
WHERE isStart = 1 and isEnd = 1
)
--select * from OneDateIntervals
select aLocation, DATEPART(YEAR, aStart) [start], DATEPART(YEAR, aEnd) [end] from OneDateIntervals
UNION
select aLocation, DATEPART(YEAR, aStart) [start], DATEPART(YEAR, aEnd) [end] from CombinedStartAndEnds
ORDER BY DATEPART(YEAR, aStart)
and it produces
aLocation start end
Location1 2000 2002
Unknown 2004 2006
Location2 2007 2010
Location1 2011 2013
Location3 2014 2014
Don't have 2012? Then you can still get the same StartsMiddlesAndEnds CTE using ROW_NUMBER:
;WITH NumberedRows AS
(
SELECT aLocation, aDate, ROW_NUMBER() OVER (ORDER BY aDate, aLocation) [i] FROM Junk
)
,StartsMiddlesAndEnds AS
(
select
currentRow.aLocation,
currentRow.aDate,
CASE upperRow.aLocation WHEN currentRow.aLocation THEN 0 ELSE 1 END [isStart],
CASE lowerRow.aLocation WHEN currentRow.aLocation THEN 0 ELSE 1 END [isEnd]
from
NumberedRows currentRow
left outer join NumberedRows upperRow on upperRow.i = currentRow.i-1
left outer join NumberedRows lowerRow on lowerRow.i = currentRow.i+1
)
--select * from StartsMiddlesAndEnds

Related

How to find the First, Second, Third and Fourth Saturday in month?

How do I find the first, second, third and fourth saturday of the month?
Ex.: I want to end up with this format...
Blockquote
YYYY, MM, Week#1
Blockquote
YYYY, MM, Week#2
Blockquote
Thanks,
This solution does not depend on Datefirst setting.
declare #d datetime = getdate();
select
dateadd(dd, n, firstSaturday)
from (
select
firstSaturday = dateadd(day, 7-(##datefirst+datepart(weekday, dateadd(day,-1, convert(char(6),#d,112)+'01')))%7, dateadd(day,-1, convert(char(6),#d,112)+'01'))
) t
cross apply (values (0), (7), (14), (21)) q(n)
Here is one way to do it, using a stacked cte to create an inline tally table, with another cte on top of that to generate the months calendar.
Please note that you can change the GETDATE() in the first row code to any date you want (even if it's in the middle of the month) and the code will produce all the Saturdays in that month.
-- Get the current month's start date
DECLARE #MonthStart datetime = DATEADD(MONTH, (DATEDIFF(MONTH, 0, GETDATE())), 0)
;WITH lv0 AS (SELECT 0 g UNION ALL SELECT 0)
,lv1 AS (SELECT 0 g FROM lv0 a CROSS JOIN lv0 b) -- 4
,lv2 AS (SELECT 0 g FROM lv1 a CROSS JOIN lv1 b) -- 16
,lv3 AS (SELECT 0 g FROM lv2 a CROSS JOIN lv2 b) -- 256
,Tally (n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM lv3)
-- gets all the dates in the current month
,CurrentMonth AS (SELECT TOP (32) dateadd(day, n-1, #MonthStart) As TheDate
FROM Tally
WHERE MONTH(dateadd(day, n-1, #MonthStart)) = MONTH(#MonthStart)
ORDER BY n)
-- gets all the Saturday dates of the current month
SELECT TheDate, DATENAME(WEEKDAY, TheDate)
FROM CurrentMonth
WHERE DATEPART(WEEKDAY, TheDate) = 7 -- Depending on server settings!
If you already have a numbers table, you can use it instead of the stacked cte. If you don't know what is a numbers table and why you should have one, read The "Numbers" or "Tally" Table: What it is and how it replaces a loop by Jeff Moden
You can try this.
SET DATEFIRST 1
DECLARE #MonthId INT = 5
DECLARE #FirstDayOfTheMonth DATE = CONCAT(YEAR(GETDATE()), RIGHT(CONCAT('00', #MonthId),2), '01')
DECLARE #SaturdayId INT = 6
SELECT
DATEADD(DAY, #SaturdayId + WK.ID - DATEPART(WEEKDAY, #FirstDayOfTheMonth), #FirstDayOfTheMonth)
FROM ( VALUES(0),(7),(14),(21),(28)) AS WK(ID)
WHERE
MONTH(DATEADD(DAY, #SaturdayId + WK.ID- DATEPART(WEEKDAY, #FirstDayOfTheMonth),#FirstDayOfTheMonth)) = #MonthId

Adding rows to SQL query result

I have a custom query in my Java application that looks like that:
select
to_char(search.timestamp,'Mon') as mon,
COUNT(DISTINCT(search.ip_address))
from
searches
WHERE
searches.city = 1
group by 1;
which should return all months that occur within the database, and number of distinct IP addresses within each month. However, at this point, some months do not have any entries, and they are missing in the SQL query result. How can I make sure that all of the months are displayed there, even if their count is 0?
Got it working with:
select
to_char (gs.m,'Mon') as mon,
count (distinct search.ip_address)
from
generate_series (
date_trunc('month', current_date - interval '11 month'),
current_date,
'1 month'
) gs (m)
left join searches
on date_trunc('month', search.timestamp) = gs.m AND search.city = 1
group by gs.m
order by gs.m;
select
to_char (gs.m,'Mon') as mon,
count (distinct(search.ip_address))
from
searches
right join
generate_series (
date_trunc('month', current_date - interval '1 year'),
current_date,
'1 month'
) gs (m) on date_trunc('month', search.timestamp) = gs.m
where searches.city = 1
group by gs.m
order by gs.m;
Something like this (untested):
select
months.mon
, COUNT(DISTINCT(searchs.ip_address))
from
(select
to_char(searches.timestamp,'Mon') as mon
from
searches
group by 1
) months
left join searches
on to_char(searchs.timestamp,'Mon') = months.mon
and searches.city = 1
group by 1;
And if you wanted the years in there, too, try something like this (untested):
select
months.mon
, COUNT(DISTINCT(searchs.ip_address))
from
(select
extract(year from searches.timestamp) as yr
, to_char(searches.timestamp,'Mon') as mon
, to_char(yr,'9999') || mon yrmon
from
searches
group by 1
) months
left join searches
on to_char(extract(year from searches.timestamp),'9999' ||
to_char(searchs.timestamp,'Mon') = months.yrmon
and searches.city = 1
group by 1;

Last Working Day is showing null while on weekend

Here is my code but its showing null while today is friday. But I would like to get last working day.
-- Insert statements for procedure here
--Below is the param you would pass
DECLARE #dateToEvaluate date=GETDATE();
--Routine
DECLARE #startDate date=CAST('1/1/'+CAST(YEAR(#dateToEvaluate) AS char(4)) AS date); -- let's get the first of the year
WITH
tally(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1 FROM sys.all_columns),
dates AS (
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS dt_id,
DATEADD(DAY,n,#startDate) AS dt,
DATENAME(WEEKDAY,DATEADD(DAY,n,#startdate)) AS dt_name
FROM tally
WHERE n<366 --arbitrary
AND DATEPART(WEEKDAY,DATEADD(DAY,n,#startDate)) NOT IN (6)
AND DATEADD(DAY,n,#startDate) NOT IN (SELECT CAST(HolidayDate AS date) FROM Holiday)),
curr_id(id) AS (SELECT dt_id FROM dates WHERE dt=#dateToEvaluate)
SELECT d.dt
FROM dates AS d
CROSS JOIN
curr_id c
WHERE d.dt_id+1=c.id
The code below will take any date and "walk backward" to find the previous week day (M-F) which is not in the #holidays table.
declare #currentdate datetime = '2015-03-22'
declare #holidays table (holiday datetime)
insert #holidays values ('2015-03-20')
;with cte as (
select
#currentdate k
union all
select
dateadd(day, -1, k)
from cte
where
k = #currentdate
or ((datepart(dw, k) + ##DATEFIRST - 1 - 1) % 7) + 1 > 5 --determine day of week independent of culture
or k in (select holiday from #holidays)
)
select min(k) from cte
The dates table doesn't have any FRIDAY dates in it. Change the NOT IN (6) to NOT IN (1, 7). This will remove Saturday and Sundays from the dates table.

Get First and Last Day of Any Year

I'm currently trying to get the first and last day of any year. I have data from 1950 and I want to get the first day of the year in the dataset to the last day of the year in the dataset (note that the last day of the year might not be December 31rst and same with the first day of the year).
Initially I thought I could use a CTE and call DATEPART with the day of the year selection, but this wouldn't partition appropriately. I also tried a CTE self-join, but since the last day or first day of the year might be different, this also yields inaccurate results.
For instance, using the below actually generates some MINs in the MAX and vice versa, though in theory it should only grab the MAX date for the year and the MIN date for the year:
;WITH CT AS(
SELECT Points
, Date
, DATEPART(DY,Date) DA
FROM Table
WHERE DATEPART(DY,Date) BETWEEN 363 AND 366
OR DATEPART(DY,Date) BETWEEN 1 AND 3
)
SELECT MIN(c.Date) MinYear
, MAX(c.Date) MaxYear
FROM CT c
GROUP BY YEAR(c.Date)
You want something like this for the first day of the year:
dateadd(year, datediff(year,0, c.Date), 0)
and this for the last day of the year:
--first day of next year -1
dateadd(day, -1, dateadd(year, datediff(year,0, c.Date) + 1, 0)
try this
for getting first day ,last day of the year && firstofthe next_year
SELECT
DATEADD(yy, DATEDIFF(yy,0,getdate()), 0) AS Start_Of_Year,
dateadd(yy, datediff(yy,-1, getdate()), -1) AS Last_Day_Of_Year,
DATEADD(yy, DATEDIFF(yy,0,getdate()) + 1, 0) AS FirstOf_the_NextYear
so putting this in your query
;WITH CT AS(
SELECT Points
, Date
, DATEPART(DY,Date) DA
FROM Table
WHERE DATEPART(DY,Date) BETWEEN
DATEPART(day,DATEADD(yy, DATEDIFF(yy,0,getdate()), 0)) AND
DATEPART(day,dateadd(yy, datediff(yy,-1, getdate()), -1))
)
SELECT MIN(c.Date) MinYear
, MAX(c.Date) MaxYear
FROM CT c
GROUP BY YEAR(c.Date)
I should refrain from developing in the evenings because I solved it, and it's actually quite simple:
SELECT MIN(Date)
, MAX(Date)
FROM Table
GROUP BY YEAR(Date)
I can put these values into a CTE and then JOIN on the dates and get what I need:
;WITH CT AS(
SELECT MIN(Date) Mi
, MAX(Date) Ma
FROM Table
GROUP BY YEAR(Date)
)
SELECT c.Mi
, m.Points
, c.Ma
, f.Points
FROM CT c
INNER JOIN Table m ON c.Mi = m.Date
INNER JOIN Table f ON c.Ma = f.Date

How do you get positive and negative rows based on date

I'm trying to get ordinance by the current date from a table. this query does what I want but it seems overkill:
WITH dates
AS (SELECT Month,
FQ,
FY,
MonthDisplay,
CAST (datepart(yyyy, [Month]) AS VARCHAR) + '-' + RIGHT(CAST ((datepart(MM, [Month]) + 100) AS VARCHAR), 2) AS YM,
fh,
LEFT(CONVERT (VARCHAR, [Month], 100), 3) + ' ' + RIGHT(fy, 4) AS MY,
LEFT(CONVERT (VARCHAR, [Month], 100), 3) AS ShortMonthName
FROM Pipeline.DimTime AS dt),
datesafter
AS (SELECT dt.FH,
dt.FQ,
dt.FY,
dt.MY,
dt.Month,
dt.MonthDisplay,
dt.ShortMonthName,
dt.YM,
ROW_NUMBER() OVER ( ORDER BY [Month]) AS RowNum
FROM dates AS dt
WHERE dt.[Month] >= (SELECT TOP 1 DATEADD(MONTH, DATEDIFF(MONTH, 0, ds.SnapshotDate), 0)
FROM dbo.vw_DimSnapshot AS ds
WHERE ds.SnapshotWeek = 'Current')),
datesbefore
AS (SELECT dt.FH,
dt.FQ,
dt.FY,
dt.MY,
dt.Month,
dt.MonthDisplay,
dt.ShortMonthName,
dt.YM,
(ROW_NUMBER() OVER ( ORDER BY [Month] DESC)) * -1 AS RowNum
FROM dates AS dt
WHERE dt.[Month] < (SELECT TOP 1 DATEADD(MONTH, DATEDIFF(MONTH, 0, ds.SnapshotDate), 0)
FROM dbo.vw_DimSnapshot AS ds
WHERE ds.SnapshotWeek = 'Current'))
SELECT *
FROM datesafter
UNION ALL
SELECT *
FROM datesbefore
ORDER BY [month];
I think you can do it in a single query by using datediff. Pass current date as one parameter and the table date as another. This will work if you are ok with skipping missing dates. For example if current date is Nov 21, then Nov 20 will show up as -1 and Nov 18 will show up as -3 even if Nov 19 is missing in the data. I am not sure what your business requirement is, so cannot comment beyond that.
And by the way if you are looking for ordinance based on months instead of day, you can use still use datediff but use the correct datepart (See: http://msdn.microsoft.com/en-us/library/ms189794.aspx).
HTH.
-Tabrez