Parse Text File, Group rows based on Text string marker - tsql

I am importing a large text file that consists of several 'reports'. Each report consists of several rows of data. The only way I know when a new report starts is the line starts with "XX". Then all rows following belong to that master row with XX. I am trying to put in a grouping ID so that I can work with the data and parse it into the database.
CREATE TABLE RawData(
ID int IDENTITY(1,1) NOT NULL
,Grp1 int NULL
,Grp2 int NULL
,Rowdata varchar(max) NULL
)
INSERT INTO RawData(Rowdata) VALUES 'XX Monday'
INSERT INTO RawData(Rowdata) VALUES 'Tues day'
INSERT INTO RawData(Rowdata) VALUES 'We d ne s day'
INSERT INTO RawData(Rowdata) VALUES 'Thurs day'
INSERT INTO RawData(Rowdata) VALUES 'F r i d day'
INSERT INTO RawData(Rowdata) VALUES 'XX January'
INSERT INTO RawData(Rowdata) VALUES 'Feb r u a'
INSERT INTO RawData(Rowdata) VALUES 'XX Sun d a y'
INSERT INTO RawData(Rowdata) VALUES 'Sat ur day'
I need to write a script that will update the Grp1 field based on where the "XX" line is at. When I am finished I'd like the table to look like this:
ID Grp1 Grp2 RowData
1 1 1 XX Monday
2 1 2 Tues day
3 1 3 We d ne s day
4 1 4 Thurs day
5 1 5 F r i d day
6 2 1 XX January
7 2 2 Feb r u a
8 3 1 XX Sun d a y
9 3 2 Sat ur day
I know for Grp2 field I can use the DENSE_RANK. The issue I am having is how do I fill in all the values for Grp1. I can do an update where I see the 'XX', but that does not fill in the values below.
Thank you for any advise/help.

This should do the trick
-- sample data
DECLARE #RawData TABLE
(
ID int IDENTITY(1,1) NOT NULL
,Grp1 int NULL
,Grp2 int NULL
,Rowdata varchar(max) NULL
);
INSERT INTO #RawData(Rowdata)
VALUES ('XX Monday'),('Tues day'),('We d ne s day'),('Thurs day'),('F r i d day'),
('XX January'),('Feb r u a'),('XX Sun d a y'),('Sat ur day');
-- solution
WITH rr AS
(
SELECT ID, thisVal = ROW_NUMBER() OVER (ORDER BY ID)
FROM #rawData
WHERE RowData LIKE 'XX %'
),
makeGrp1 AS
(
SELECT
ID,
Grp1 = (SELECT MAX(thisVal) FROM rr WHERE r.id >= rr.id),
RowData
FROM #rawData r
)
SELECT
ID,
Grp1,
Grp2 = ROW_NUMBER() OVER (PARTITION BY Grp1 ORDER BY ID),
RowData
FROM makeGrp1;
UPDATE: below is the code to update you #RawData table; I just re-read the requirement. I'm leaving the original solution as it will help you bbetter understand how my update works:
-- sample data
DECLARE #RawData TABLE
(
ID int IDENTITY(1,1) NOT NULL
,Grp1 int NULL
,Grp2 int NULL
,Rowdata varchar(max) NULL
);
INSERT INTO #RawData(Rowdata)
VALUES ('XX Monday'),('Tues day'),('We d ne s day'),('Thurs day'),('F r i d day'),
('XX January'),('Feb r u a'),('XX Sun d a y'),('Sat ur day');
-- Solution to update the #RawData Table
WITH rr AS
(
SELECT ID, thisVal = ROW_NUMBER() OVER (ORDER BY ID)
FROM #rawData
WHERE RowData LIKE 'XX %'
),
makeGroups AS
(
SELECT
ID,
Grp1 = (SELECT MAX(thisVal) FROM rr WHERE r.id >= rr.id),
Grp2 = ROW_NUMBER()
OVER (PARTITION BY (SELECT MAX(thisVal) FROM rr WHERE r.id >= rr.id) ORDER BY ID)
FROM #rawData r
)
UPDATE #RawData
SET Grp1 = mg.Grp1, Grp2 = mg.Grp2
FROM makeGroups mg
JOIN #RawData rd ON mg.ID = rd.ID;

;with cte0 as (
Select *,Flag = case when RowData like 'XX%' then 1 else 0 end
From RawData )
Update RawData
Set Grp1 = B.Grp1
,Grp2 = B.Grp2
From RawData U
Join (
Select ID
,Grp1 = Sum(Flag) over (Order by ID)
,Grp2 = Row_Number() over (Partition By (Select Sum(Flag) From cte0 Where ID<=a.ID) Order by ID)
From cte0 A
) B on U.ID=B.ID
Select * from RawData
The Updated RawData looks like this

Related

Capture First Character of Last Group of 1s in a Binary Series Part II: Multiple IDs

I have data something like this:
ID 1 1 1 1 1 1 1 1 1 1 1 1
Month J F M A M J J A S O N D
Status 1 0 0 1 0 1 0 0 1 1 1 1
ID 2 2 2 2 2 2 2 2 2 2 2 2
Month J F M A M J J A S O N D
Status 1 0 1 0 1 0 1 0 1 0 1 1
ID 3 3 3 3 3 3 3 3 3 3 3 3
Month J F M A M J J A S O N D
Status 0 0 0 0 0 0 0 0 0 0 0 1
Using t-SQL, I am trying to capture the month corresponding to the first STATUS = 1 in the last group of 1s for each ID, i.e., September, November and December in this example.
Here is the code I'm using:
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL DROP TABLE #Temp1
;WITH PARTITIONED1 AS
(SELECT t0.ID
, t0.Year_Month
, LAST_VALUE(t0.Year_Month) OVER (PARTITION BY t0.Account_Number ORDER BY t0.Year_Month) AS STATUS
, ROW_NUMBER() OVER (PARTITION BY t0.Account_Number ORDER BY t0.Year_Month) AS rn1
FROM #Temp0 t0
)
SELECT *
INTO #Temp1
FROM PARTITIONED1 p1
ORDER BY t0.ID
, t0.Year_Month
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
SELECT *
INTO #Temp
FROM #Temp1 t1
WHERE t1.rn1 = (SELECT MAX(b.rn1) + 1 FROM #Temp1 b WHERE b.STATUS = 0)
GROUP BY t1.ID
, t1.Year_Month
, t1.rn1
However, this just returns the last instance where STATUS = 1 is achieved overall as the first 1 of the last group of 1s, in this case January.
I've tried using CASE statements and grouping in various combinations (hence the intermediate step reading the data into #Temp1), but have not been able to get results for all three IDs; is anyone able to assist?
Thanks in advance!
Assuming Ju for June and Jl for July:
--Sample Data
IF OBJECT_ID('tempdb..#Temp0') IS NOT NULL DROP TABLE #Temp0
CREATE TABLE #Temp0 (ID INT, Year_Month VARCHAR(1), Status INT)
INSERT INTO #Temp0
VALUES(1,'J',1),(1,'F',0),(1,'M',0),(1,'A',1),(1,'M',0),(1,'J',1),(1,'J',0),(1,'A',0),(1,'S',1),(1,'O',1),(1,'N',1),(1,'D',1),(2,'J',1),(2,'F',0),(2,'M',1),(2,'A',0),(2,'M',1),(2,'J',0),(2,'J',1),(2,'A',0),(2,'S',1),(2,'O',0),(2,'N',1),(2,'D',1),(3,'J',0),(3,'F',0),(3,'M',0),(3,'A',0),(3,'M',0),(3,'J',0),(3,'J',0),(3,'A',0),(3,'S',0),(3,'O',0),(3,'N',0),(3,'D',1);
--Query
WITH A
AS ( SELECT *,
CASE Year_Month
WHEN 'J' THEN 1
WHEN 'F' THEN 2
WHEN 'M' THEN 3
WHEN 'A' THEN 4
WHEN 'M' THEN 5
WHEN 'Ju' THEN 6
WHEN 'Jl' THEN 7
WHEN 'A' THEN 8
WHEN 'S' THEN 9
WHEN 'O' THEN 10
WHEN 'N' THEN 11
WHEN 'D' THEN 12
END
AS MonthNumber
FROM #Temp0 ),
StartingPoints
AS ( SELECT ID,
Year_Month,
MonthNumber,
Status
FROM A
WHERE NOT EXISTS
(
SELECT 1
FROM A
AS B
WHERE B.ID=A.ID
AND B.Status=A.Status-1
) ),
MonthRanking
AS ( SELECT A.*,
ROW_NUMBER( ) OVER( PARTITION BY A.ID ORDER BY A.MonthNumber )
AS rownum
FROM A
INNER JOIN
(
SELECT ID,
MAX( MonthNumber )+1
AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
)
AS B
ON A.ID=B.ID
AND A.MonthNumber>=B.StartOfLastGroup )
SELECT *
FROM MonthRanking
WHERE rownum=1;
Results:
If Month Names are recorded in Full as in July, June then this would work as well:
WITH StartingPoints
AS (SELECT ID,
Year_Month,
MonthNUmber = MONTH('01-'+Year_Month+'-2010'),
Status
FROM #Temp0
WHERE NOT EXISTS
(
SELECT 1
FROM #Temp0 AS B
WHERE B.ID = #Temp0.ID
AND B.Status = #Temp0.Status - 1
)),
MonthRanking
AS (SELECT A.*,
ROW_NUMBER() OVER(PARTITION BY A.ID ORDER BY MONTH('01-'+A.Year_Month+'-2010')) AS rownum
FROM #Temp0 AS A
INNER JOIN
(
SELECT ID,
MAX(MonthNumber) + 1 AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
) AS B ON A.ID = B.ID
AND MONTH('01-'+A.Year_Month+'-2010') >= B.StartOfLastGroup)
SELECT *
FROM MonthRanking
WHERE rownum = 1;
Results:
And if we assume that the data is as Iamdave assumes then it simply like so:
WITH StartingPoints
AS (SELECT ID,
Year_Month,
Status
FROM #Temp0
WHERE NOT EXISTS
(
SELECT 1
FROM #Temp0 AS B
WHERE B.ID = #Temp0.ID
AND B.Status = #Temp0.Status - 1
)),
MonthRanking
AS (SELECT A.*,
ROW_NUMBER() OVER(PARTITION BY A.ID ORDER BY Year_Month) AS rownum
FROM #Temp0 AS A
INNER JOIN
(
SELECT ID,
MAX(Year_Month) + 1 AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
) AS B ON A.ID = B.ID
AND A.Year_Month >= B.StartOfLastGroup)
SELECT *
FROM MonthRanking
WHERE rownum = 1;
Results:
You can do this with a couple derived tables that stack two window functions on top of one another (which can't be done in the same select). I have assumed that your data is slightly different to the table you have provided, based on the column names in your query. If they are not as I have them below, I strongly recommend having a look at how you store your data:
declare #t table(ID int, YearMonth int,StatusValue bit);
insert into #t values (1,201501,1),(1,201502,0),(1,201503,0),(1,201504,1),(1,201505,0),(1,201506,1),(1,201507,0),(1,201508,0),(1,201509,1),(1,201510,1),(1,201511,1),(1,201512,1),(2,201601,1),(2,201602,0),(2,201603,1),(2,201604,0),(2,201605,1),(2,201606,0),(2,201607,1),(2,201608,0),(2,201609,1),(2,201610,0),(2,201611,1),(2,201612,1),(3,201701,0),(3,201702,0),(3,201703,0),(3,201704,0),(3,201705,0),(3,201706,0),(3,201707,0),(3,201708,0),(3,201709,0),(3,201710,0),(3,201711,0),(3,201712,1);
with c as
(
select ID
,YearMonth
,StatusValue
,case when StatusValue = 1
and lead(StatusValue,1,1) over (partition by ID
order by YearMonth desc) = 0
then 1
else 0
end as c
from #t
), sc as
(
select ID
,YearMonth
,StatusValue
,sum(c) over (partition by ID order by YearMonth desc) as sc
from c
where c = 1
)
select ID
,YearMonth
,StatusValue
from sc
where sc = 1
order by ID;
Output:
+----+-----------+-------------+
| ID | YearMonth | StatusValue |
+----+-----------+-------------+
| 1 | 201509 | 1 |
| 2 | 201611 | 1 |
| 3 | 201712 | 1 |
+----+-----------+-------------+

Cumulative sum with group by and join

I'm a little struggled with finding a clean way to do this. Assume that I have the following records in my table named Records:
|Name| |InsertDate| |Size|
john 30.06.2015 1
john 10.01.2016 10
john 12.01.2016 100
john 05.03.2016 1000
doe 01.01.2016 1
How do I get the records for year of 2016 and month is equal to or less than 3 grouped by month(even that month does not exists e.g. month 2 in this case) with cumulative sum of Size including that month? I want to get the result as the following:
|Name| |Month| |Size|
john 1 111
john 2 111
john 3 1111
doe 1 1
As other commenters have already stated, you simply need a table with dates in that you can join from to give you the dates that your source table does not have records for:
-- Build the source data table.
declare #t table(Name nvarchar(10)
,InsertDate date
,Size int
);
insert into #t values
('john','20150630',1 )
,('john','20160110',10 )
,('john','20160112',100 )
,('john','20160305',1000)
,('doe' ,'20160101',1 );
-- Specify the year you want to search for by storing the first day here.
declare #year date = '20160101';
-- This derived table builds a set of dates that you can join from.
-- LEFT JOINing from here is what gives you rows for months without records in your source data.
with Dates
as
(
select #year as MonthStart
,dateadd(day,-1,dateadd(month,1,#year)) as MonthEnd
union all
select dateadd(month,1,MonthStart)
,dateadd(day,-1,dateadd(month,2,MonthStart))
from Dates
where dateadd(month,1,MonthStart) < dateadd(yyyy,1,#year)
)
select t.Name
,d.MonthStart
,sum(t.Size) as Size
from Dates d
left join #t t
on(t.InsertDate <= d.MonthEnd)
where d.MonthStart <= '20160301' -- Without knowing what your logic is for specifying values only up to March, I have left this part for you to automate.
group by t.Name
,d.MonthStart
order by t.Name
,d.MonthStart;
If you have a static date reference table in your database, you don't need to do the derived table creation and can just do:
select d.DateValue
,<Other columns>
from DatesReferenceTable d
left join <Other Tables> o
on(d.DateValue = o.AnyDateColumn)
etc
Here's another approach that utilizes a tally table (aka numbers table) to create the date table. Note my comments.
-- Build the source data table.
declare #t table(Name nvarchar(10), InsertDate date, Size int);
insert into #t values
('john','20150630',1 )
,('john','20160110',10 )
,('john','20160112',100 )
,('john','20160305',1000)
,('doe' ,'20160101',1 );
-- A year is fine, don't need a date data type
declare #year smallint = 2016;
WITH -- dummy rows for a tally table:
E AS (SELECT E FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(e)),
dateRange(totalDays, mn, mx) AS -- Get the range and number of months to create
(
SELECT DATEDIFF(MONTH, MIN(InsertDate), MAX(InsertDate)), MIN(InsertDate), MAX(InsertDate)
FROM #t
),
iTally(N) AS -- Tally Oh! Create an inline Tally (aka numbers) table starting with 0
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1))-1
FROM E a CROSS JOIN E b CROSS JOIN E c CROSS JOIN E d
),
RunningTotal AS -- perform a running total by year/month for each person (Name)
(
SELECT
yr = YEAR(DATEADD(MONTH, n, mn)),
mo = MONTH(DATEADD(MONTH, n, mn)),
Name,
Size = SUM(Size) OVER
(PARTITION BY Name ORDER BY YEAR(DATEADD(MONTH, n, mn)), MONTH(DATEADD(MONTH, n, mn)))
FROM iTally
CROSS JOIN dateRange
LEFT JOIN #t ON MONTH(InsertDate) = MONTH(DATEADD(MONTH, n, mn))
WHERE N <= totalDays
) -- Final output will only return rows where the year matches #year:
SELECT
name = ISNULL(name, LAG(Name, 1) OVER (ORDER BY yr, mo)),
yr, mo,
size = ISNULL(Size, LAG(Size, 1) OVER (ORDER BY yr, mo))
FROM RunningTotal
WHERE yr = #year
GROUP BY yr, mo, name, size;
Results:
name yr mo size
---------- ----------- ----------- -----------
doe 2016 1 1
john 2016 1 111
john 2016 2 111
john 2016 3 1111

how to do dead reckoning on column of table, postgresql

I have a table looks like,
x y
1 2
2 null
3 null
1 null
11 null
I want to fill the null value by conducting a rolling
function to apply y_{i+1}=y_{i}+x_{i+1} with sql as simple as possible (inplace)
so the expected result
x y
1 2
2 4
3 7
1 8
11 19
implement in postgresql. I may encapsulate it in a window function, but the implementation of custom function seems always complex
WITH RECURSIVE t AS (
select x, y, 1 as rank from my_table where y is not null
UNION ALL
SELECT A.x, A.x+ t.y y , t.rank + 1 rank FROM t
inner join
(select row_number() over () rank, x, y from my_table ) A
on t.rank+1 = A.rank
)
SELECT x,y FROM t;
You can iterate over rows using a recursive CTE. But in order to do so, you need a way to jump from row to row. Here's an example using an ID column:
; with recursive cte as
(
select id
, y
from Table1
where id = 1
union all
select cur.id
, prev.y + cur.x
from Table1 cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
You can see the query at SQL Fiddle. If you don't have an ID column, but you do have another way to order the rows, you can use row_number() to get an ID:
; with recursive sorted as
(
-- Specify your ordering here. This example sorts by the dt column.
select row_number() over (order by dt) as id
, *
from Table1
)
, cte as
(
select id
, y
from sorted
where id = 1
union all
select cur.id
, prev.y + cur.x
from sorted cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
Here's the SQL Fiddle link.

TSQL - COUNT number of rows in a different state than current row

It's kind of hard to explain, but from this example it should be clear.
Table TABLE:
Name State Time
--------------------
A 1 1/4/2012
B 0 1/3/2012
C 0 1/2/2012
D 1 1/1/2012
Would like to
select * from TABLE where state=1 order by Time desc
plus an additional column 'Skipped' containing the number of rows after one where state=1 in state 0, in other words the output should look like this:
Name State Time Skipped
A 1 1/4/2012 2 -- 2 rows after A where State != 1
D 1 1/1/2012 0 -- 0 rows after D where State != 1
0 should also be reported in case of 2 consecutive rows are in state = 1, i.e. there is nothing between these rows in a state other than 1.
It seems like CTE are must here, but can't figure out how to count rows where state != 1.
Any help will be appreciated.
(MS Sql Server 2008)
I've used a CTE to establish RowNo, so that you're not dependent on consecutive dates:
WITH CTE_Rows as
(
select name,state,time,
rowno = ROW_NUMBER() over (order by [time])
from MyTable
)
select name,state,time,
gap = isnull(r.rowno - x.rowno - 1,0)
from
CTE_Rows r
outer apply (
select top 1 rowno
from CTE_Rows sub
where sub.rowno < r.rowno and sub.state = 1
order by sub.rowno desc) x
where r.state = 1
If you just want to do it by date, then its simpler - just need an outer apply:
select name,state,r.time,
gap = convert(int,isnull(r.time - x.time - 1,0))
from
MyTable r
outer apply (
select top 1 time
from MyTable sub
where sub.time < r.time and sub.state = 1
order by sub.time desc) x
where r.state = 1
FYI the test data is used was created as follows:
create table MyTable
(Name char(1), [state] tinyint, [Time] datetime)
insert MyTable
values
('E',1,'2012-01-05'),
('A',1,'2012-01-04'),
('B',0,'2012-01-03'),
('C',0,'2012-01-02'),
('D',1,'2012-01-01')
Okay, here you go (it gets a little messy):
SELECT U.CurrentTime,
(SELECT COUNT(*)
FROM StateTable AS T3
WHERE T3.State=0
AND T3.Time BETWEEN U.LastTime AND U.CurrentTime) AS Skipped
FROM (SELECT T1.Time AS CurrentTime,
(SELECT TOP 1 T2.Time
FROM StateTable AS T2
WHERE T2.Time < T1.Time AND T2.State=1
ORDER BY T2.Time DESC) AS LastTime
FROM StateTable AS T1 WHERE T1.State = 1) AS U

Dealing with periods and dates without using cursors

I would like to solve this issue avoiding to use cursors (FETCH).
Here comes the problem...
1st Table/quantity
------------------
periodid periodstart periodend quantity
1 2010/10/01 2010/10/15 5
2st Table/sold items
-----------------------
periodid periodstart periodend solditems
14343 2010/10/05 2010/10/06 2
Now I would like to get the following view or just query result
Table Table/stock
-----------------------
periodstart periodend itemsinstock
2010/10/01 2010/10/04 5
2010/10/05 2010/10/06 3
2010/10/07 2010/10/15 5
It seems impossible to solve this problem without using cursors, or without using single dates instead of periods.
I would appreciate any help.
Thanks
DECLARE #t1 TABLE (periodid INT,periodstart DATE,periodend DATE,quantity INT)
DECLARE #t2 TABLE (periodid INT,periodstart DATE,periodend DATE,solditems INT)
INSERT INTO #t1 VALUES(1,'2010-10-01T00:00:00.000','2010-10-15T00:00:00.000',5)
INSERT INTO #t2 VALUES(14343,'2010-10-05T00:00:00.000','2010-10-06T00:00:00.000',2)
DECLARE #D1 DATE
SELECT #D1 = MIN(P) FROM (SELECT MIN(periodstart) P FROM #t1
UNION ALL
SELECT MIN(periodstart) FROM #t2) D
DECLARE #D2 DATE
SELECT #D2 = MAX(P) FROM (SELECT MAX(periodend) P FROM #t1
UNION ALL
SELECT MAX(periodend) FROM #t2) D
;WITH
L0 AS (SELECT 1 AS c UNION ALL SELECT 1),
L1 AS (SELECT 1 AS c FROM L0 A CROSS JOIN L0 B),
L2 AS (SELECT 1 AS c FROM L1 A CROSS JOIN L1 B),
L3 AS (SELECT 1 AS c FROM L2 A CROSS JOIN L2 B),
L4 AS (SELECT 1 AS c FROM L3 A CROSS JOIN L3 B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS i FROM L4),
Dates AS(SELECT DATEADD(DAY,i-1,#D1) AS D FROM Nums where i <= 1+DATEDIFF(DAY,#D1,#D2)) ,
Stock As (
SELECT D ,t1.quantity - ISNULL(t2.solditems,0) AS itemsinstock
FROM Dates
LEFT OUTER JOIN #t1 t1 ON t1.periodend >= D and t1.periodstart <= D
LEFT OUTER JOIN #t2 t2 ON t2.periodend >= D and t2.periodstart <= D ),
NStock As (
select D,itemsinstock, ROW_NUMBER() over (order by D) - ROW_NUMBER() over (partition by itemsinstock order by D) AS G
from Stock)
SELECT MIN(D) AS periodstart, MAX(D) AS periodend, itemsinstock
FROM NStock
GROUP BY G, itemsinstock
ORDER BY periodstart
Hopefully a little easier to read than Martin's. I used different tables and sample data, hopefully extrapolating the right info:
CREATE TABLE [dbo].[Quantity](
[PeriodStart] [date] NOT NULL,
[PeriodEnd] [date] NOT NULL,
[Quantity] [int] NOT NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[SoldItems](
[PeriodStart] [date] NOT NULL,
[PeriodEnd] [date] NOT NULL,
[SoldItems] [int] NOT NULL
) ON [PRIMARY]
INSERT INTO Quantity (PeriodStart,PeriodEnd,Quantity)
SELECT '20100101','20100115',5
INSERT INTO SoldItems (PeriodStart,PeriodEnd,SoldItems)
SELECT '20100105','20100107',2 union all
SELECT '20100106','20100108',1
The actual query is now:
;WITH Dates as (
select PeriodStart as DateVal from SoldItems union select PeriodEnd from SoldItems union select PeriodStart from Quantity union select PeriodEnd from Quantity
), Periods as (
select d1.DateVal as StartDate, d2.DateVal as EndDate
from Dates d1 inner join Dates d2 on d1.DateVal < d2.DateVal left join Dates d3 on d1.DateVal < d3.DateVal and d3.DateVal < d2.DateVal where d3.DateVal is null
), QuantitiesSold as (
select StartDate,EndDate,COALESCE(SUM(si.SoldItems),0) as Quantity
from Periods p left join SoldItems si on p.StartDate < si.PeriodEnd and si.PeriodStart < p.EndDate
group by StartDate,EndDate
)
select StartDate,EndDate,q.Quantity - qs.Quantity
from QuantitiesSold qs inner join Quantity q on qs.StartDate < q.PeriodEnd and q.PeriodStart < qs.EndDate
And the result is:
StartDate EndDate (No column name)
2010-01-01 2010-01-05 5
2010-01-05 2010-01-06 3
2010-01-06 2010-01-07 2
2010-01-07 2010-01-08 4
2010-01-08 2010-01-15 5
Explanation: I'm using three Common Table Expressions. The first (Dates) is gathering all of the dates that we're talking about, from the two tables involved. The second (Periods) selects consecutive values from the Dates CTE. And the third (QuantitiesSold) then finds items in the SoldItems table that overlap these periods, and adds their totals together. All that remains in the outer select is to subtract these quantities from the total quantity stored in the Quantity Table
John, what you could do is a WHILE loop. Declare and initialise 2 variables before your loop, one being the start date and the other being end date. Your loop would then look like this:
WHILE(#StartEnd <= #EndDate)
BEGIN
--processing goes here
SET #StartEnd = #StartEnd + 1
END
You would need to store your period definitions in another table, so you could retrieve those and output rows when required to a temporary table.
Let me know if you need any more detailed examples, or if I've got the wrong end of the stick!
Damien,
I am trying to fully understand your solution and test it on a large scale of data, but I receive following errors for your code.
Msg 102, Level 15, State 1, Line 20
Incorrect syntax near 'Dates'.
Msg 102, Level 15, State 1, Line 22
Incorrect syntax near ','.
Msg 102, Level 15, State 1, Line 25
Incorrect syntax near ','.
Damien,
Based on your solution I also wanted to get a neat display for StockItems without overlapping dates. How about this solution?
CREATE TABLE [dbo].[SoldItems](
[PeriodStart] [datetime] NOT NULL,
[PeriodEnd] [datetime] NOT NULL,
[SoldItems] [int] NOT NULL
) ON [PRIMARY]
INSERT INTO SoldItems (PeriodStart,PeriodEnd,SoldItems)
SELECT '20100105','20100106',2 union all
SELECT '20100105','20100108',3 union all
SELECT '20100115','20100116',1 union all
SELECT '20100101','20100120',10
;WITH Dates as (
select PeriodStart as DateVal from SoldItems
union
select PeriodEnd from SoldItems
union
select PeriodStart from Quantity
union
select PeriodEnd from Quantity
), Periods as (
select d1.DateVal as StartDate, d2.DateVal as EndDate
from Dates d1
inner join Dates d2 on d1.DateVal < d2.DateVal
left join Dates d3 on d1.DateVal < d3.DateVal and
d3.DateVal < d2.DateVal where d3.DateVal is null
), QuantitiesSold as (
select StartDate,EndDate,SUM(si.SoldItems) as Quantity
from Periods p left join SoldItems si on p.StartDate < si.PeriodEnd and si.PeriodStart < p.EndDate
group by StartDate,EndDate
)
select StartDate,EndDate, qs.Quantity
from QuantitiesSold qs
where qs.quantity is not null