Create additional columns based on other column values in PostgreSQL - postgresql

I have following data in a PostgreSQL table:
trial start_date end_date
1 20_12_2001 20_01_2005
The expected output is below:
trial start_date end_date Date[(start_end_date)] marker_start_end
1 20_12_2001 20_01_2005 20_12_2001 start
1 20_12_2001 20_01_2005 20_01_2005 end
Is there a way to calculate the additional two columns (Date[(start_end_date)], marker_start_end) without join, but a CASE expression

You can use a lateral join to turn two columns into two rows:
select *
from the_table t
cross join lateral (
values (t.start_date, 'start'), (t.end_date, 'end')
) as x(start_end_date, marker);
The UNION ALL solution might be faster though.

UNION ALL
select trial, start_date, end_date, start_date as date, 'start' marker_start_end from table1
union all
select trial, start_date, end_date, end_date as date, 'end' marker_start_end from table1
UNNEST with CASE
select trial, start_date, end_date,
case when a.num = 1 then start_date else end_date end date,
case when a.num = 1 then 'start' else 'end' end marker_start_end from
(
select trial, start_date, end_date,
unnest(array[1,2]) num from table1
) a
Hidden JOIN (but still join)
select
trial,
start_date,
end_date,
case when a.num = 1 then start_date else end_date end date,
marker_start_end
from table1, (values(1,'start'),(2, 'end')) a(num,marker_start_end)
Db fiddle

Related

Use min function without grouping

How can I retrieve the min of a date without group by?
declare #table table
(
SaleDate date
)
insert into #table
select '7/8/2021' union
select '7/21/2021'
declare #dimdate table
(
fulldate date,
WeekNumberOfYear int
)
insert into #dimdate
select '7/4/2021', 28 union
select '7/5/2021', 28 union
select '7/6/2021', 28 union
select '7/7/2021', 28 union
select '7/8/2021', 28 union
select '7/9/2021', 28 union
select '7/10/2021', 28 union
select '7/11/2021', 29 union
select '7/18/2021', 30 union
select '7/19/2021', 30 union
select '7/20/2021', 30 union
select '7/21/2021', 30 union
select '7/22/2021', 30 union
select '7/23/2021', 30 union
select '7/24/2021', 30
select datepart(week, saledate) 'wk',
min(fulldate) as 'Beginning_Week'
from #table t inner join #dimdate d on
datepart(week, saledate) = WeekNumberOfYear
group by datepart(week, saledate), WeekNumberOfYear
How can I retrieve the same result as above without a group by?
Do you mean something like this?
select dt,
(select min(WeekNumberOfYear) from DimDate) as minWeekNumberOfYear
from #table
Just use a windowed aggregate:
SELECT dt,
MIN(dt) OVER () AS MinDt
FROM #table;
You can use MIN() window function if you partition by WeekNumberOfYear and use DISTINCT in the SELECT statement so that there are no duplicates:
SELECT DISTINCT
d.WeekNumberOfYear wk,
MIN(fulldate) OVER (PARTITION BY d.WeekNumberOfYear) Beginning_Week
FROM #table t INNER JOIN #dimdate d
ON DATEPART(week, t.saledate) = d.WeekNumberOfYear;
See the demo.

Create New Rows based on valid to and valid from dates

I have a table that has account number, end of month valid from and end of month valid to columns.
What I need is a table that has account number and a column that has all the end of month dates of when the account was live, inclusive of end of month valid to. The Current Table looks like this
New table will need to be like this
I have tried using a calendar table and an CTE table type query but have had no success.
Any help would be great.
This can be achieved using Using multiple comma separated CTEs in a statement
Query
with t0 (i) AS (select 0 union all select 0 union all select 0 union all select 0 union all select 0 union all select 0),
t1 (i) AS (select 0 from t0 a inner join t0 b on a.i = b.i),
n (i) AS (select row_number()over(order by i) from t1),
Account_details (Account_number,valid_from,valid_to,mth,Live_date)As(
select Account_number,valid_from,valid_to, datediff(month,valid_from,valid_to ) mth, valid_from"Live_date"
from tbl1
union all
select Account_number,valid_from,valid_to, datediff(month,valid_from,valid_to ) mth, EOMONTH (dateadd(month,n.i,valid_from)) "Live_date"
from tbl1
inner join n on 1=1 and n.i between 1 and datediff(month,valid_from,valid_to )
)
select *
from Account_details
where Account_details.Account_number =1
order by Account_details.Account_number
Output
CTE Table t0, t1 and n will generate numbers. This is a best way to generate rows without any data.
Then the CTE table Account_details is used to pull data from the table.
Based on sql on the msdn thread how to get month end date between two dates.
DECLARE #Old AS Table (AccountNumber INT, ValidFrom DATE, ValidTo DATE)
DECLARE #New AS Table (AccountNumber INT, LiveDate DATE)
INSERT INTO #old
SELECT 1, '20130630', '20131130' UNION ALL
SELECT 2, '20130630', '20131231' UNION ALL
SELECT 3, '20120430', '20120531' UNION ALL
SELECT 4, '20170331', '20171130'
SELECT TOP 100 * FROM #old
DECLARE #AccountNumber INT, #ValidFrom DATE, #ValidTo DATE
DECLARE #Cursor CURSOR
SET #Cursor = CURSOR FOR
SELECT AccountNumber, ValidFrom, ValidTo
FROM #old
OPEN #Cursor
FETCH NEXT INTO #Cursor FROM #AccountNumber, #ValidFrom, #ValidTo
WHILE ##FETCH_STATUS = 0
BEGIN
;WITH cteEndMonthDates (MonthEndDate)
AS
(
SELECT eomonth(#ValidFrom) AS MonthEndDate
UNION ALL
SELECT eomonth( dateadd(day, 1, MonthEndDate)) AS MonthEndDate
FROM cteEndMonthDates
WHERE MonthEndDate < eomonth(#ValidTo)
)
INSERT INTO #new (AccountNumber, LiveDate)
SELECT #AccountNumber, MonthEndDate
FROM cteEndMonthDates
FETCH NEXT FROM #Cursor INTO #AccountNumber, #ValidFrom, #ValidTo
END
CLOSE #Cursor
DEALLOCATE #Cursor
SELECT * FROM #New
Edit: Or without the cursor
DECLARE #Old AS Table (AccountNumber INT, ValidFrom DATE, ValidTo DATE)
DECLARE #New AS Table (AccountNumber INT, LiveDate DATE)
INSERT INTO #old
SELECT 1, '20130630', '20131130' UNION ALL
SELECT 2, '20130630', '20131231' UNION ALL
SELECT 3, '20120430', '20120531' UNION ALL
SELECT 4, '20170331', '20171130' UNION ALL
SELECT 5, '20180430', '20190131' UNION ALL
SELECT 6, '20160430', '20180531'
SELECT TOP 100 * FROM #old
;WITH cteEndMonthDates (AccountNumber, MonthEndDate)
AS
(
SELECT AccountNumber, eomonth(ValidFrom) AS MonthEndDate
FROM #Old
UNION ALL
SELECT x.AccountNumber, eomonth( dateadd(day, 1, MonthEndDate)) AS MonthEndDate
FROM cteEndMonthDates x
JOIN #Old o ON o.AccountNumber = x.AccountNumber
WHERE MonthEndDate < eomonth(ValidTo)
)
SELECT AccountNumber, MonthEndDate
FROM cteEndMonthDates
order by AccountNumber, MonthEndDate
This should work.
;WITH Span AS (
SELECT
AccountNumber,
ValidFrom AS Valid
FROM dbo.Input
UNION ALL
SELECT
AccountNumber,
DATEADD(DAY, 1, Span.Valid) AS Valid
FROM Span
WHERE DATEADD(DAY, 1, Span.Valid) <= (SELECT ValidTo FROM dbo.Input WHERE AccountNumber = Span.AccountNumber)
)
SELECT * FROM Span
ORDER BY Span.AccountNumber, Span.Valid
OPTION (MAXRECURSION 0);

TSQL -- quick way to get a count across all tables

If database d1 has tables T1,T2,T3,T4 all with the field "Date1".
What is the best way to get a count of all records across all tables with a date older than 3 days ago?
I know one could do unions, I assume there is no nifty syntax that would omit all tables [like a 'parent' object in C++].
Here best may mean more efficient, or just a pleasing syntax in T-SQL.
This is for SSMS 17.7. Microsoft SQL Server 2014 (SP2)
If you know the table names in advance, a simple query on union all will probably be the simplest way:
SELECT COUNT(*)
FROM
(
SELECT Date1
FROM T1
UNION ALL
SELECT Date1
FROM T2
SELECT Date1
FROM T3
SELECT Date1
FROM T4
) As t
WHERE Date1 <= DATEADD(DAY, -3, GETDATE())
If you don't know the table names in advance, you can use information_schema.columns to build the union query dynamically.
Well, you're interested in a parent object, that would be a view, then. You can reuse it for a variety of queries. Alternatively, add more columns if you need them:
CREATE VIEW parent AS
SELECT Date1 FROM t1 UNION ALL
SELECT Date1 FROM t2 UNION ALL
SELECT Date1 FROM t3 UNION ALL
SELECT Date1 FROM t4;
And now, that can be queried in the way you want
SELECT COUNT(*) FROM parent WHERE Date1 <= DATEADD(DAY, -3, GETDATE())
Without UNION?
Since a COUNT without a GROUP BY returns 1 value, why not use CROSS JOIN for once?
SELECT
t1.Cnt AS [T1],
t2.Cnt AS [T2],
t3.Cnt AS [T3],
t4.Cnt AS [T4],
(t1.Cnt + t2.Cnt + t3.Cnt + t4.Cnt) AS [T1234]
FROM
(SELECT COUNT(*) AS Cnt FROM T1 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t1
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM T2 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t2
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM T3 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t3
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM T4 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t4
Or a CROSS APPLY
SELECT
t1.Cnt AS [T1],
t2.Cnt AS [T2],
t3.Cnt AS [T3],
t4.Cnt AS [T4],
(t1.Cnt + t2.Cnt + t3.Cnt + t4.Cnt) AS [T1234]
FROM (SELECT CAST(GetDate()-3 AS DATE) as Dt) d
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T1 WHERE [Date1] < d.Dt) AS t1
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T2 WHERE [Date1] < d.Dt) AS t2
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T3 WHERE [Date1] < d.Dt) AS t3
CROSS APPLY (SELECT COUNT(*) AS Cnt FROM T4 WHERE [Date1] < d.Dt) AS t4
Example snippet for Sql Server:
declare #T1 table (id int primary key identity(1,1), [Date1] date);
declare #T2 table (id int primary key identity(1,1), [Date1] date);
declare #T3 table (id int primary key identity(1,1), [Date1] date);
declare #T4 table (id int primary key identity(1,1), [Date1] date);
insert into #T1 ([Date1]) values (getdate()-6),(getdate()-5),(getdate()-4),(getdate()-3),(getdate()-2),(getdate()-1),(getdate()-0);
insert into #T2 ([Date1]) select top 6 [Date1] from #T1 order by [Date1] desc;
insert into #T3 ([Date1]) select top 5 [Date1] from #T1 order by [Date1] desc;
insert into #T4 ([Date1]) select top 4 [Date1] from #T1 order by [Date1] desc;
SELECT
t1.Cnt AS [T1],
t2.Cnt AS [T2],
t3.Cnt AS [T3],
t4.Cnt AS [T4],
(t1.Cnt + t2.Cnt + t3.Cnt + t4.Cnt) AS [T1234]
FROM
(SELECT COUNT(*) AS Cnt FROM #T1 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t1
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM #T2 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t2
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM #T3 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t3
CROSS JOIN
(SELECT COUNT(*) AS Cnt FROM #T4 WHERE [Date1] < CAST(GetDate()-3 AS DATE)) AS t4
Returns:
T1 T2 T3 T4 T1234
3 2 1 0 6
Instead create a view, you can use a CTE (Common Table Expression). It works like a view, but not persists on database. Please try it:
WITH CteDate( Date1 )
AS ( SELECT Date1 FROM t1 UNION ALL
SELECT Date1 FROM t2 UNION ALL
SELECT Date1 FROM t3 UNION ALL
SELECT Date1 FROM t4
)
SELECT COUNT(*) FROM CteDate WHERE Date1 <= DATEADD(DAY, -3, GETDATE())
It works for all SQL Server greater or equal then 2005.

Merge consecutive duplicate records including time range

I have a very similar problem to the question asked here: Merge duplicate temporal records in database
The difference here is, that I need the end date to be an actual date instead of NULL.
So given the following data:
EmployeeId StartDate EndDate Column1 Column2
1000 2009/05/01 2010/04/30 X Y
1000 2010/05/01 2011/04/30 X Y
1000 2011/05/01 2012/04/30 X X
1000 2012/05/01 2013/04/30 X Y
1000 2013/05/01 2014/04/30 X X
1000 2014/05/01 2014/06/01 X X
The desired result is:
EmployeeId StartDate EndDate Column1 Column2
1000 2009/05/01 2011/04/30 X Y
1000 2011/05/01 2012/04/30 X X
1000 2012/05/01 2013/04/30 X Y
1000 2013/05/01 2014/06/01 X X
The proposed solution in the linked thread is this:
with t1 as --tag first row with 1 in a continuous time series
(
select t1.*, case when t1.column1=t2.column1 and t1.column2=t2.column2
then 0 else 1 end as tag
from test_table t1
left join test_table t2
on t1.EmployeeId= t2.EmployeeId and dateadd(day,-1,t1.StartDate)= t2.EndDate
)
select t1.EmployeeId, t1.StartDate,
case when min(T2.StartDate) is null then null
else dateadd(day,-1,min(T2.StartDate)) end as EndDate,
t1.Column1, t1.Column2
from (select t1.* from t1 where tag=1 ) as t1 -- to get StartDate
left join (select t1.* from t1 where tag=1 ) as t2 -- to get a new EndDate
on t1.EmployeeId= t2.EmployeeId and t1.StartDate < t2.StartDate
group by t1.EmployeeId, t1.StartDate, t1.Column1, t1.Column2;
However, this does not seem to work when you need the end date instead of just NULL.
Could someone help me with this issue?
How about this?
create table test_table (EmployeeId int, StartDate date, EndDate date, Column1 char(1), Column2 char(1))
;
insert into test_table values
(1000 , '2009-05-01','2010-04-30','X','Y')
,(1000 , '2010-05-01','2011-04-30','X','Y')
,(1000 , '2011-05-01','2012-04-30','X','X')
,(1000 , '2012-05-01','2013-04-30','X','Y')
,(1000 , '2013-05-01','2014-04-30','X','X')
,(1000 , '2014-05-01','2014-06-01','X','X')
;
SELECT EmployeeId, StartDate, EndDate, Column1, Column2 FROM
(
SELECT EmployeeId, StartDate
, MAX(EndDate) OVER(PARTITION BY EmployeeId, RN) AS EndDate
, Column1
, Column2
, DIFF
FROM
(
SELECT t.*
, SUM(DIFF) OVER(PARTITION BY EmployeeId ORDER BY StartDate ) AS RN
FROM
(
SELECT t.*
, CASE WHEN
Column1 = LAG(Column1,1) OVER(PARTITION BY EmployeeId ORDER BY StartDate)
AND Column2 = LAG(Column2,1) OVER(PARTITION BY EmployeeId ORDER BY StartDate)
THEN 0 ELSE 1 END AS DIFF
FROM
test_table t
) t
)
)
WHERE DIFF = 1
;
This is another solution (taken from How do I group on continuous ranges). It is simpler to code and also caters for NULL values (i.e. treats NULL = NULL unlike the simple LAG() comparison). However it might not be quite as efficient on large volumes of data due to the GROUP BY
SELECT EmployeeId
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
, Column1
, Column2
FROM
(
SELECT t.*
, ROW_NUMBER() OVER(PARTITION BY EmployeeId, Column1, Column2 ORDER BY StartDate ) AS GRN
, ROW_NUMBER() OVER(PARTITION BY EmployeeId ORDER BY StartDate ) AS RN
FROM
test_table t
) t
GROUP BY
EmployeeId
, Column1
, Column2
, RN - GRN

Postgresql select date range between two tables

I have two tables that have date fields in them. I want to select data from table 1 where the date is +/- 1 day from any date in table 2.
try something like this :
select * from table1,table2
where table1.date BETWEEN (table2.date - '1 day'::interval)
AND (table2.date + '1 day'::interval)
and ...
If only +/- 1 day, you could use a workaround like this:
select col1, col2, ...
from table1
where date_col in (select distinct date_col
from table2
union all
select distinct (date_col - '1 day'::interval)
from table2
union all
select distinct (date_col + '1 day'::interval)
from table2
);
This has quite good peformance because the subquery only be calculated one time and will be cache for comparing