DB2: How do I display all dates in a range - db2

I want to expand a range using DB2(on iSeries) query. For example, I have the following value in a table
2016-10-01 2016-10-03 600
I want the output as
2016-10-01 200
2016-10-02 200
2016-10-03 200
I tried but I am not able to develop the query. It should be somewhere in similar lines as below.
Table (MYTABLE) has two columns. Below is snapshot
START_DT END_DT
2016-01-01 2016-01-03
On this query
with temp1 as
(
SELECT start_dt, end_dt, start_dt as dt
FROM mytable
UNION
SELECT start_dt, end_dt, dt + 1 day as dt
FROM temp1
WHERE dt < end_dt
)
SELECT dt
FROM temp1
I am getting the error "Column list not valid for table".
I tried this as well
with temp1 (start_dt, end_dt, dt) as
(
SELECT start_dt, end_dt, start_dt as dt
FROM mytable
UNION
SELECT start_dt, end_dt, dt + 1 day as dt
FROM temp1
WHERE dt < end_dt
)
SELECT dt
FROM temp1
This is throwing error "Keyword not allowed in recursive common table expression TEMP1."

I did a test -- this works on 9.7
with table1(start_dt,end_dt, amount) as
(
values (timestamp('2017-01-01'), timestamp('2017-01-03'), 600)
), this_is_not_a_reserved_word (start_dt, end_dt, d, amount) as
(
SELECT start_dt, end_dt, start_dt as d,
amount/ (timestampdiff(16,end_dt-start_dt)+1) as amount
FROM table1
-- WHERE tab_id_id = 518621
UNION ALL
SELECT start_dt, end_dt, d + 1 day , amount
FROM this_is_not_a_reserved_word
WHERE d < end_dt
)
SELECT d, amount
FROM this_is_not_a_reserved_word
original answer
Here you go:
with this_is_not_a_reserved_word as
(
SELECT start_dt, end_dt, start_dt as dt, amount/timestampdiff(16,start_dt-end_dt) as amount
FROM table1
WHERE tab_id_id = 518621
UNION
SELECT start_dt, end_dt, dt + 1 day as dt, amount
FROM this_is_not_a_reserved_word
WHERE dt < end_dt
)
SELECT dt, amount
FROM this_is_not_a_reserved_word
If start_dt and end_dt are type date and not timestamp use:
amount/timestampdiff(16,timestamp(start_dt)-timestamp(end_dt)) as amount

try this
with temp1 ( start_dt, end_dt, DateCalc, num) as
(
SELECT start_dt, end_dt, start_dt, 0
FROM yourtable
UNION all
SELECT start_dt, end_dt, DateCalc+ 1 day, num +1
FROM temp1
WHERE DateCalc < end_dt
)
SELECT DateCalc
FROM temp1

Related

postgresql combining several periods into one

I'm trying to combine range.
WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29'
union
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43'
), b AS (
SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
FROM a
)
, c AS (
SELECT *, count(step) OVER (ORDER BY begat) AS grp
FROM b
)
SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
FROM c
GROUP BY grp
ORDER BY 1
Result
1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
4 "2017-09-22 12:24:16";"2017-09-22 13:18:29"
5 "2017-09-28 09:48:54";"2017-09-28 13:39:13"
positions 3,4 intersect (endata> next begat)
How do I make the union of all the intersections into one large interval
I need result
1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
4 "2017-09-28 09:48:54";"2017-09-28 13:39:13"
Hey I would suggest using the following process :
1- Identify when a row is new, so you give a value of 1 to values that do not overlap (CTE b)
2- Sequence together the rows that have overlaps with others. This way you can see have a common identifier that will allow you to MAX and MIN begat and endat (CTE c)
3- For each sequence, give the MIN of begat and the MAX of endat so you will have your final values
WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29'
union
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43'
)
, b AS (
SELECT
begat
, endat
, (begat > MAX(endat) OVER w IS TRUE)::INT is_new
FROM a
WINDOW w AS (ORDER BY begat ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
)
, c AS (
SELECT
begat
, endat
, SUM((is_new)) OVER (ORDER BY begat) seq
FROM b
)
SELECT
MIN(begat) beg_at
, MAX(endat) end_at
FROM c
GROUP BY seq
EDITED
If you need speed you can use a psql function:
create or replace function append_ranges_in_a() returns setof a
language plpgsql
as
$BODY$
declare
v_current a%rowtype;
v_new a%rowtype;
v_first boolean:=true;
begin
for v_current in select begat, endat from a order by begat, endat
loop
if v_first then
v_first := false;
v_new.begat := v_current.begat;
elsif v_new.endat < v_current.begat then
return next v_new;
v_new.begat := v_current.begat;
end if;
v_new.endat := greatest(v_current.endat,v_new.endat);
end loop;
return next v_new;
return;
end;
$BODY$;
select * from append_ranges_in_a()
I test it with ~ 400000 rows:
delete from a;
insert into a (begat, endat)
select time::text, (time+'1 day'::interval)::text
from (select t+(round(random()*23.0)||' hours')::interval as time
from generate_series('1401-01-01'::timestamp,'2018-08-21'::timestamp,'1 day'::interval) t
) t;
select count(*) from a;
select * from append_ranges_in_a() offset 100000 limit 10
and it is twice fast as O(n^2) pure SQL version.
OLD slow solution:
You can use a recursive WITH query https://www.postgresql.org/docs/current/static/queries-with.html to construct the result row by row.
I create the table
The first row is the candidate first row (ending where ending), but the row is not "ready"
Then I look at the next row (step) and if it is not intersecting I add a ready row,
Also I add a not ready row with the current (last) observed range
When I do not have more rows I calculate the last row
I retain ready rows and the last row
Here is the code
CREATE TABLE a as
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29'
union
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43';
WITH RECURSIVE t(begat, endat, ready, step) AS (
select * from (
select *,false,1 from a order by begat, endat limit 1) a
UNION ALL
SELECT new_rows.*
FROM (SELECT * FROM t ORDER BY begat DESC limit 1) t,
lateral (SELECT * FROM a ORDER BY begat, endat OFFSET step LIMIT 1) a,
lateral (
SELECT t.begat, t.endat, true as ready, step WHERE t.endat < a.begat
UNION SELECT CASE WHEN t.endat < a.begat THEN a.begat ELSE t.begat END, greatest(a.endat, t.endat), false, step+1
) new_rows
)
select begat, endat
from (
select begat, endat, ready, row_number() over (order by begat desc, endat desc)=1 is_last
from t
order by begat, endat) t
where ready or is_last;
i using range type
https://www.postgresql.org/docs/9.3/static/rangetypes.html
WITH tmp AS (
-- preparation range type
select begat, coalesce( endat, 'infinity' ) as endAt, tsrange( begat, coalesce( endat, 'infinity' ) ) as rg
from (
select '2017-09-11 17:13:03'::timestamp as begat ,'2017-09-12 12:24:09'::timestamp as endat union
select '2017-09-19 15:34:40','2017-09-20 11:04:45' union
select '2017-09-20 08:32:00','2017-09-22 13:28:37' union
select '2017-09-20 13:52:43','2017-09-20 14:14:43' union
select '2017-09-21 12:24:16','2017-09-21 13:28:29' union
select '2017-09-22 12:24:16','2017-09-22 13:28:29' union
select '2017-09-22 12:34:16','2017-09-23 13:28:29' union
select '2017-09-22 12:25:16','2017-09-24 13:28:29' union
select '2017-09-28 09:48:54','2017-09-28 13:39:13' union
select '2017-09-28 14:22:16','2017-09-28 15:52:15' union
select '2017-10-05 12:17:45','2017-10-06 12:35:38' union
select '2017-10-06 16:20:44','2017-10-07 10:11:09' union
select '2017-10-07 20:38:32','2017-10-09 14:42:29' union
select '2017-10-12 18:22:14','2017-10-12 20:52:45'
) a
),a as (
-- group intersecting range
select l.*
from tmp l left join tmp r on l.begAt > r.begAt and r.rg #> l.rg
where r.begAt is null
),
b AS (
SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
FROM a
)
, c AS (
SELECT *, count(step) OVER (ORDER BY begat) AS grp
FROM b
)
SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
FROM c
GROUP BY grp
ORDER BY 1

Merge consecutive duplicate records including time range

I have a very similar problem to the question asked here: Merge duplicate temporal records in database
The difference here is, that I need the end date to be an actual date instead of NULL.
So given the following data:
EmployeeId StartDate EndDate Column1 Column2
1000 2009/05/01 2010/04/30 X Y
1000 2010/05/01 2011/04/30 X Y
1000 2011/05/01 2012/04/30 X X
1000 2012/05/01 2013/04/30 X Y
1000 2013/05/01 2014/04/30 X X
1000 2014/05/01 2014/06/01 X X
The desired result is:
EmployeeId StartDate EndDate Column1 Column2
1000 2009/05/01 2011/04/30 X Y
1000 2011/05/01 2012/04/30 X X
1000 2012/05/01 2013/04/30 X Y
1000 2013/05/01 2014/06/01 X X
The proposed solution in the linked thread is this:
with t1 as --tag first row with 1 in a continuous time series
(
select t1.*, case when t1.column1=t2.column1 and t1.column2=t2.column2
then 0 else 1 end as tag
from test_table t1
left join test_table t2
on t1.EmployeeId= t2.EmployeeId and dateadd(day,-1,t1.StartDate)= t2.EndDate
)
select t1.EmployeeId, t1.StartDate,
case when min(T2.StartDate) is null then null
else dateadd(day,-1,min(T2.StartDate)) end as EndDate,
t1.Column1, t1.Column2
from (select t1.* from t1 where tag=1 ) as t1 -- to get StartDate
left join (select t1.* from t1 where tag=1 ) as t2 -- to get a new EndDate
on t1.EmployeeId= t2.EmployeeId and t1.StartDate < t2.StartDate
group by t1.EmployeeId, t1.StartDate, t1.Column1, t1.Column2;
However, this does not seem to work when you need the end date instead of just NULL.
Could someone help me with this issue?
How about this?
create table test_table (EmployeeId int, StartDate date, EndDate date, Column1 char(1), Column2 char(1))
;
insert into test_table values
(1000 , '2009-05-01','2010-04-30','X','Y')
,(1000 , '2010-05-01','2011-04-30','X','Y')
,(1000 , '2011-05-01','2012-04-30','X','X')
,(1000 , '2012-05-01','2013-04-30','X','Y')
,(1000 , '2013-05-01','2014-04-30','X','X')
,(1000 , '2014-05-01','2014-06-01','X','X')
;
SELECT EmployeeId, StartDate, EndDate, Column1, Column2 FROM
(
SELECT EmployeeId, StartDate
, MAX(EndDate) OVER(PARTITION BY EmployeeId, RN) AS EndDate
, Column1
, Column2
, DIFF
FROM
(
SELECT t.*
, SUM(DIFF) OVER(PARTITION BY EmployeeId ORDER BY StartDate ) AS RN
FROM
(
SELECT t.*
, CASE WHEN
Column1 = LAG(Column1,1) OVER(PARTITION BY EmployeeId ORDER BY StartDate)
AND Column2 = LAG(Column2,1) OVER(PARTITION BY EmployeeId ORDER BY StartDate)
THEN 0 ELSE 1 END AS DIFF
FROM
test_table t
) t
)
)
WHERE DIFF = 1
;
This is another solution (taken from How do I group on continuous ranges). It is simpler to code and also caters for NULL values (i.e. treats NULL = NULL unlike the simple LAG() comparison). However it might not be quite as efficient on large volumes of data due to the GROUP BY
SELECT EmployeeId
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
, Column1
, Column2
FROM
(
SELECT t.*
, ROW_NUMBER() OVER(PARTITION BY EmployeeId, Column1, Column2 ORDER BY StartDate ) AS GRN
, ROW_NUMBER() OVER(PARTITION BY EmployeeId ORDER BY StartDate ) AS RN
FROM
test_table t
) t
GROUP BY
EmployeeId
, Column1
, Column2
, RN - GRN

How to create a table with dates in sequence between range in Hive?

I'm trying to Create a table with column date, And I want to insert date in sequence between Range.
Here's what I have tried:
SET StartDate = '2009-01-01';
SET EndDate = '2016-06-31';
CREATE TABLE DateRangeTable(mydate DATE, qty INT);
INSERT INTO DateRangeTable VALUES (select a.Date, 0
from (
select current_date - INTERVAL (a.a + (10 * b.a) + (100 * c.a)) DAY as Date
from (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as a
cross join (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as b
cross join (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as c
) AS a where a.Date between '2019-01-01' and '2016-06-30');
This is the similar one:
select date_add(t.f1, t.start_r - pe.i) as date_range from (select '2022-01-01' as f1,datediff('2022-01-07','2022-01-01') as start_r,0 as end_r) t lateral view posexplode(split(space(start_r - end_r),' ')) pe as i,s;
You do not need VALUES keyword when using INSERT ... SELECT.
Working example:
set hivevar:start_date=2009-01-01;
set hivevar:end_date=2016-06-31;
CREATE TABLE DateRangeTable(mydate DATE, qty INT);
with date_range as
(--this query generates date range
select date_add ('${hivevar:start_date}',s.i) as dt
from ( select posexplode(split(space(datediff('${hivevar:end_date}','${hivevar:start_date}')),' ')) as (i,x) ) s
)
INSERT INTO TABLE DateRangeTable
select d.dt, 0 qty
from date_range d
where d.dt between '2019-01-01' and '2016-06-30');

How to Calculate Percentage of Revenue based on aggregate column Total sales?

I am trying to add a column that calculate the Percentage of total revenue and I am stuck with the following error:
Error: Msg 207, Level 16, State 1, Line 14 Invalid column name
'Customerkey'.
In that line I’m trying to join Table 1 and Table 3 but MS SQL Server won’t recognize T.Customerkey even though customerkey exists in the dbo.FactInternetSales table.
Also, when I add T.Grand_Tot_Rev in my Group By clause, it returns 0.04 for every row. I know it's wrong because I do not want T.Grand_Tot_Rev to be part of the aggregate, because it should remain constant for every record. How can I achieve that I am looking for? Thank you in advance. By the way, I am using the AdventureWorksDW2012 database.
SELECT fs.CustomerKey ,
M.Total_sales ,
M.Total_cost ,
M.Total_sales - M.Total_cost AS Total_Margin ,
T.Grand_Tot_Rev( M.Total_sales / T.Grand_Tot_Rev ) * 100 AS Prct_Total_Revenue
FROM dbo.FactInternetSales fs , -- Table 1 --
(
SELECT customerkey ,
SUM( SalesAmount )AS Total_Sales ,
SUM( TotalProductCost )Total_cost
FROM dbo.FactInternetSales
GROUP BY customerkey
) M , --Table 2 --
(
SELECT SUM( SalesAmount )AS Grand_Tot_Rev
FROM dbo.FactInternetSales
) T --Table 3 --
WHERE fs.CustomerKey = M.CustomerKey -- Join 1 --
AND M.CustomerKey = T.Customerkey -- Join 2 --
GROUP BY fs.CustomerKey ,
M.Total_sales ,
M.Total_cost ,
T.Grand_Tot_Rev
ORDER BY 2 DESC;
If you want the T.Grand_Tot_Rev as a constant over all rows try removing the second join AND M.CustomerKey = T.Customerkey -- Join 2 -- so the query looks like this:
SELECT fs.CustomerKey ,
M.Total_sales ,
M.Total_cost ,
M.Total_sales - M.Total_cost AS Total_Margin ,
T.Grand_Tot_Rev,
( M.Total_sales / T.Grand_Tot_Rev ) * 100 AS Prct_Total_Revenue
FROM dbo.FactInternetSales fs , -- Table 1 --
(
SELECT customerkey ,
SUM( SalesAmount )AS Total_Sales ,
SUM( TotalProductCost )Total_cost
FROM dbo.FactInternetSales
GROUP BY customerkey
) M , --Table 2 --
(
SELECT SUM( SalesAmount )AS Grand_Tot_Rev
FROM dbo.FactInternetSales
) T --Table 3 --
WHERE fs.CustomerKey = M.CustomerKey -- Join 1 --
--AND M.CustomerKey = T.Customerkey -- Join 2 --
GROUP BY fs.CustomerKey ,
M.Total_sales ,
M.Total_cost ,
T.Grand_Tot_Rev
ORDER BY 2 DESC;
Another way to write the same query that is a bit more compact and might have slightly better performance:
;WITH
T AS (
SELECT SUM(SalesAmount) AS Grand_Tot_Rev
FROM dbo.FactInternetSales
),
M AS (
SELECT customerkey ,
SUM(SalesAmount) AS Total_Sales ,
SUM(TotalProductCost) AS Total_cost
FROM dbo.FactInternetSales
GROUP BY CustomerKey
)
SELECT
customerkey ,
Total_Sales ,
Total_cost,
Total_Sales - Total_cost AS Total_Margin ,
Grand_Tot_Rev,
Total_Sales / Grand_Tot_Rev * 100 AS Prct_Total_Revenue
FROM M, T
ORDER BY 2 DESC;
To see the really small values you can force a conversion to a wider data type:
;WITH
T AS (
SELECT CAST(SUM(SalesAmount) AS decimal) AS Grand_Tot_Rev
FROM dbo.FactInternetSales
),
M AS (
SELECT customerkey ,
CAST(SUM(SalesAmount) AS decimal(15,10)) AS Total_Sales ,
CAST(SUM(TotalProductCost) AS decimal(15,10)) AS Total_cost
FROM dbo.FactInternetSales
GROUP BY CustomerKey
)
SELECT
customerkey ,
Total_Sales ,
Total_cost,
Total_Sales - Total_cost AS Total_Margin ,
Grand_Tot_Rev,
Total_Sales / Grand_Tot_Rev * 100 AS Prct_Total_Revenue
FROM M, T
ORDER BY 2 DESC;

concatenating single column in TSQL

I am using SSMS 2008 and trying to concatenate one of the rows together based on a different field's grouping. I have two columns, people_id and address_desc. They look like this:
address_desc people_id
---------- ------------
Murfreesboro, TN 37130 F15D1135-9947-4F66-B778-00E43EC44B9E
11 Mohawk Rd., Burlington, MA 01803 C561918F-C2E9-4507-BD7C-00FB688D2D6E
Unknown, UN 00000 C561918F-C2E9-4507-BD7C-00FB688D2D6E Jacksonville, NC 28546 FC7C78CD-8AEA-4C8E-B93D-010BF8E4176D
Memphis, TN 38133 8ED8C601-5D35-4EB7-9217-012905D6E9F1
44 Maverick St., Fitchburg, MA 8ED8C601-5D35-4EB7-9217-012905D6E9F1
Now I want to concatenate the address_desc field / people_id. So the first one here should just display "Murfreesboro, TN 37130" for address_desc. But second person should have just one line instead of two which says "11 Mohawk Rd., Burlington, MA 01803;Unknown, UN 00000" for address_desc.
How do I do this? I tried using CTE, but this was giving me ambiguity error:
WITH CTE ( people_id, address_list, address_desc, length )
AS ( SELECT people_id, CAST( '' AS VARCHAR(8000) ), CAST( '' AS VARCHAR(8000) ), 0
FROM dbo.address_view
GROUP BY people_id
UNION ALL
SELECT p.people_id, CAST( address_list +
CASE WHEN length = 0 THEN '' ELSE ', ' END + c.address_desc AS VARCHAR(8000) ),
CAST( c.address_desc AS VARCHAR(8000)), length + 1
FROM CTE c
INNER JOIN dbo.address_view p
ON c.people_id = p.people_id
WHERE p.address_desc > c.address_desc )
SELECT people_id, address_list
FROM ( SELECT people_id, address_list,
RANK() OVER ( PARTITION BY people_id ORDER BY length DESC )
FROM CTE ) D ( people_id, address_list, rank )
WHERE rank = 1 ;
Here was my initial SQL query:
SELECT a.address_desc, a.people_id
FROM dbo.address_view a
INNER JOIN (SELECT people_id
FROM dbo.address_view
GROUP BY people_id
HAVING COUNT(*) > 1) t
ON a.people_id = t.people_id
order by a.people_id
You can use FOR XML PATH('') like this:
DECLARE #TestData TABLE
(
address_desc NVARCHAR(100) NOT NULL
,people_id UNIQUEIDENTIFIER NOT NULL
);
INSERT #TestData
SELECT 'Murfreesboro, TN 37130', 'F15D1135-9947-4F66-B778-00E43EC44B9E'
UNION ALL
SELECT '11 Mohawk Rd., Burlington, MA 01803', 'C561918F-C2E9-4507-BD7C-00FB688D2D6E'
UNION ALL
SELECT 'Unknown, UN 00000', 'C561918F-C2E9-4507-BD7C-00FB688D2D6E'
UNION ALL
SELECT 'Memphis, TN 38133', '8ED8C601-5D35-4EB7-9217-012905D6E9F1'
UNION ALL
SELECT '44 Maverick St., Fitchburg, MA', '8ED8C601-5D35-4EB7-9217-012905D6E9F1';
SELECT a.people_id,
(SELECT SUBSTRING(
(SELECT ';'+b.address_desc
FROM #TestData b
WHERE a.people_id = b.people_id
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)')
,2
,4000)
) GROUP_CONCATENATE
FROM #TestData a
GROUP BY a.people_id
Results:
people_id GROUP_CONCATENATE
------------------------------------ ------------------------------------------------------
F15D1135-9947-4F66-B778-00E43EC44B9E Murfreesboro, TN 37130
C561918F-C2E9-4507-BD7C-00FB688D2D6E 11 Mohawk Rd., Burlington, MA 01803;Unknown, UN 00000
8ED8C601-5D35-4EB7-9217-012905D6E9F1 Memphis, TN 38133;44 Maverick St., Fitchburg, MA