Recursive queries with two aggregate columns - postgresql

I want to compute certain statistics montly in a postgres database
WITH RECURSIVE totals(start, t_end, null_count, not_null_count) AS (
VALUES (date_trunc('month', current_date + interval '1 month'),
date_trunc('month', current_date + interval '2 months'),
0::bigint)
UNION
SELECT start - interval '1 month' as start, start as t_end,
(SELECT count(*) filter (WHERE flag IS NULL) FROM tbl
WHERE created_at >= start
and created_at < t_end
and deleted_at < current_timestamp
) as null_count,
(SELECT count(*) filter (WHERE flag IS NOT NULL) FROM tbl
WHERE created_at >= start
and created_at < t_end
and deleted_at < current_timestamp
) as not_null_count
FROM totals
WHERE start > current_date - interval '1 year'
)
select * from totals
This would give me what I want, but would scan tbl twice.
Is there a way to to this scanning only once, like one would do in a plain query
SELECT count(*) filter (WHERE flag IS NULL) null_count,
count(*) filter (WHERE flag IS NOT NULL) not_null_count, FROM tbl
WHERE created_at >= start
AND created_at < t_end
AND deleted_at < current_timestamp
I know I could group by date_trunk('month', created_at) but doing that causes the query to sort the rows, and this is very costly in this case.

Yes. You can update your query to -
WITH RECURSIVE totals(start, t_end, null_count, not_null_count) AS (
VALUES (date_trunc('month', current_date + interval '1 month'),
date_trunc('month', current_date + interval '2 months'),
0::bigint)
UNION
SELECT start - interval '1 month' as start, start as t_end,
count(*) filter (WHERE flag IS NULL) as null_count,
count(*) filter (WHERE flag IS NOT NULL) as not_null_count
FROM tbl
JOIN totals ON created_at >= start
and created_at < t_end
WHERE start > current_date - interval '1 year'
AND deleted_at < current_timestamp
)
select * from totals

Related

Postgres - Pass dynamically generated date to where clause

I need to generate series of date till current_date based on job's last run date
last run date ='2022-10-01'
current date = '2022-10-05'
generate date like
varchar dynamic_date = '2022-10-01','2022-10-02','2022-10-03','2022-10-04','2022-10-05'
and pass to where to clause
select *
from t1
where created_date in (dynamic_date)
this is not allowed as dynamic_date is varchar and created_date is date column
trying to find efficient way to do this
You can use generate_series()
select *
from t1
where created_date in (select g.dt::date
from generate_series(date '2022-10-01',
current_date,
interval '1 day') as g(dt)
)
Or even simpler:
select *
from t1
where created_date >= date '2022-10-01'
and created_date <= current_date

PostgreSQL Time Dimension (By Hours and Days) Error

I am am building a Time Dimension table in PostgreSQL with DATE_ID and DATE_DESC.
My T-SQL (works perfectly) script is:
set DATEFIRST 1
;WITH DATES AS (
SELECT CAST('2019-01-01 00:00:00.000' AS datetime) AS [DATE]
UNION ALL
SELECT DATEADD(HH,1,[DATE])
FROM DATES
WHERE DATEADD(HH,1,[DATE]) <= CAST('2019-12-31' AS datetime)
)
SELECT
DATE_ID, DATE_DESC
from
(
SELECT
CONVERT(int, CONVERT(char(8), DATE, 112)) AS DATE_ID,
DATE AS DATE_DESC
FROM
DATES)a
order by 1
OPTION (MAXRECURSION 0)
At the moment Im trying to convert this code to PostgreSQL readable one and it does not work..
Here is mine at the moment:
set EXTRACT(DOW FROM TIMESTAMP '2019-01-01 00:00:00.000')+1
;WITH DATES AS (
SELECT CAST('2019-01-01 00:00:00.000' AS timestamp) AS DATE
UNION ALL
SELECT CURRENT_DATE + INTERVAL '1 hour'
FROM DATES
WHERE CURRENT_DATE + INTERVAL '1 hour' <= CAST('2019-12-31' AS timestamp)
)
SELECT DATE_ID, DATE_DESC from
(SELECT cast(to_char((DATE)::TIMESTAMP,'yyyymmddhhmiss') as BIGInt) AS DATE_ID,
DATE AS DATE_DESC
FROM
DATES)a
order by 1
OPTION (MAXRECURSION 0)
I need all the hours (24h) between 2019-01-01 and 2019-12-31 . At the moment I think OPTION (MAXRECURSION 0) and set EXTRACT(DOW FROM TIMESTAMP '2019-01-01 00:00:00.000')+1 is not working properly.
Its a problem of Recursive CTE, In Postgresql, your desired query will be like below
WITH recursive DATES AS (
SELECT CAST('2019-01-01 00:00:00.000' AS timestamp) AS date_
UNION ALL
SELECT date_ + INTERVAL '1 hour'
FROM DATES
WHERE date_ + INTERVAL '1 hour' <= CAST('2019-12-31' AS timestamp)
)
SELECT DATE_ID, DATE_DESC from
(SELECT cast(to_char((date_)::TIMESTAMP,'yyyymmddhhmiss') as BIGInt) AS DATE_ID,
date_ AS DATE_DESC
FROM
DATES)a
order by 1
DEMO

how to divide actual months to 5 days periods using plpgsql function

I need to get first day of each five days of month using posgrees. For example if no is 2.03 then I would get 1.03. If today would be 19.03 then I would get 15.03. I tried to use following if statements but for today;s day I got null and I'm not sure why. Also there surely is some less complicated way to do this operation. Any ideas?
create or replace function getFirstDayOfFive()
returns timestamp with time zone as $$
declare
firstDay timestamp;
begin
if (now()::date>date_trunc('month', now()::date) and now()::date < date_trunc('month', now()::date) + interval '5 day') then
return firstDay = date_trunc('month', now()::date);
elsif (now()::date>date_trunc('month', now()::date) + interval '5 day' and now()::date < date_trunc('month', now()::date) + interval '10 day') then
return firstDay = (date_trunc('month', now()::date) + interval '5 day')::date;
elsif (now()::date>date_trunc('month', now()::date) + interval '10 day' and now()::date < date_trunc('month', now()::date) + interval '15 day') then
return firstDay = (date_trunc('month', now()::date) + interval '10 day')::date;
elsif (now()::date>date_trunc('month', now()::date) + interval '15 day' and now()::date < date_trunc('month', now()::date) + interval '20 day') then
return firstDay = (date_trunc('month', now()::date) + interval '15 day')::date;
elsif (now()::date>date_trunc('month', now()::date) + interval '20 day' and now()::date < date_trunc('month', now()::date) + interval '25 day') then
return firstDay = (date_trunc('month', now()::date) + interval '20 day')::date;
end if;
end;
$$
language plpgsql;
demo:db<>fiddle
SELECT
make_date( -- 4
date_part('year', my_date)::int, -- 3
date_part('month', my_date)::int,
greatest( -- 2
floor(date_part('day', my_date) / 5) * 5, -- 1
1
)::int
)
Get day of the current date using date_part(). After it round it to full multiple 5 with floor(day / 5) * 5
The provided algorithm always gives the previous multiple of 5 (for 19 it gives 15, for 6 it gives 5, ...). However, for days 1 to 4 it gives 0. So this is an exception which needs to be handled. This is done here
Get year and month part of the current date
Create the expected date.
This query can be embedded into a function, of course:
create or replace function getFirstDayOfFive()
returns timestamp with time zone as $$
declare
firstDay timestamp;
begin
SELECT
make_date(
date_part('year', now())::int,
date_part('month', now())::int,
greatest(
floor(date_part('day', now()) / 5) * 5,
1
)::int
)
INTO firstDay;
RETURN firstDay;
end;
$$
language plpgsql;
Edit: From the comments: Same for Last Day:
You have to change:
floor() to ceil()
the greatest(..., 1) into least(..., last day of current month)
To get the last date of the current month, you have to find the first day using date_trunc('month', ...), add one month to get the first day of the next month and subtract one day from it:
demo:db<>fiddle
SELECT
least(
ceil(date_part('day', now()) / 5) * 5,
date_part('day', date_trunc('month', now()) + interval '1 month - 1 day')
)

return query from psql function based on function arguments

I have a function in postgresql which returns query based on the input supplied:
create or replace function func(last_month date, arg1 varchar default 'any')
returns table(id bigint, start_date date, end_date date) as $$
begin
if arg1 = 'multiple' then
SELECT DISTINCT id, (last_month - interval '1 year' + interval '1 month')::date as start_date,
last_month as end_date
FROM table
WHERE month BETWEEN (last_month - interval '1 year' + interval '1 month')::date
AND last_month
and month >= '2017-09-01'
AND activity >= 5;
else
SELECT DISTINCT id, (last_month - interval '1 year' + interval '1 month')::date as start_date,
last_month as end_date
FROM table
WHERE month BETWEEN (last_month - interval '1 year' + interval '1 month')::date
AND last_month
and month >= '2017-09-01'
end if;
end; $$ language sql;
However when I run it, it gives me the following error :
syntax error at or near "if"
I have checked returning only integer for testing purposes and it works and syntax appears to be fine. How can I return different query results from a function based on the given input.
I was able to resolve in he following way:
create or replace function func(last_month date, arg1 varchar default 'any')
returns table(id bigint, start_date date, end_date date) as $BODY$
begin
case arg1
when arg1 = 'multiple' then
return query
SELECT DISTINCT id, (last_month - interval '1 year' + interval '1 month')::date as start_date,
last_month as end_date
FROM table
WHERE month BETWEEN (last_month - interval '1 year' + interval '1 month')::date
AND last_month
and month >= '2017-09-01'
AND activity >= 5;
else
return query
SELECT DISTINCT id, (last_month - interval '1 year' + interval '1 month')::date as start_date,
last_month as end_date
FROM table
WHERE month BETWEEN (last_month - interval '1 year' + interval '1 month')::date
AND last_month
and month >= '2017-09-01';
end case;
end;
$BODY$ language plpgsql;

TSQL get unique (not overlapping) datetime ranges

This is a question like this: TSQL get overlapping periods from datetime ranges but with a different result request.
This is the table:
create table period (
id int,
starttime datetime,
endtime datetime,
type varchar(64)
);
insert into period values (1,'2013-04-07 8:00','2013-04-07 13:00','Work');
insert into period values (2,'2013-04-07 14:00','2013-04-07 17:00','Work');
insert into period values (3,'2013-04-08 8:00','2013-04-08 13:00','Work');
insert into period values (4,'2013-04-08 14:00','2013-04-08 17:00','Work');
insert into period values (5,'2013-04-07 10:00','2013-04-07 11:00','Holyday'); /* 1h overlapping with 1*/
insert into period values (6,'2013-04-08 10:00','2013-04-08 20:00','Transfer'); /* 6h overlapping with 3 and 4*/
insert into period values (7,'2013-04-08 11:00','2013-04-08 12:00','Test'); /* 1h overlapping with 3 and 6*/
I need the unique not overlapping datetime ranges table.
In the before example the result would be:
'2013-04-07 08:00','2013-04-07 13:00'
'2013-04-07 14:00','2013-04-07 17:00'
'2013-04-08 08:00','2013-04-08 20:00'
It is not very important if could be time fragmentation such as:
'2013-04-08 08:00','2013-04-08 13:00'
'2013-04-08 12:00','2013-04-08 20:00'
--EDIT--
Another example:
create table period (
id int,
starttime datetime,
endtime datetime,
type varchar(64)
);
insert into period values (1,'2013-06-13 8:30','2013-06-13 12:30','');
insert into period values (2,'2013-06-13 8:38','2013-06-13 12:38','');
insert into period values (3,'2013-06-13 13:18','2013-06-13 17:45','');
insert into period values (4,'2013-06-13 13:30','2013-06-13 17:30','');
insert into period values (5,'2013-06-13 20:00','2013-06-13 23:59','');
this should return:
2013-06-13 08:30 - 2013-06-13 12:38
2013-06-13 13:18 - 2013-06-13 17:45
2013-06-13 20:00 - 2013-06-13 23:59
But you have only one non-overlapping period, or did I understand the question wrong?
select *
from period t
where id in (
select t1.id
from period t1
join period t2 on t1.id <> t2.id
where t2.endtime <= t1.starttime or t2.starttime >= t1.endtime
group by t1.id
having count(*) + 1 = (select count(*) from period)
)
Result:
'2013-04-07 14:00','2013-04-07 17:00'
Update: Ok, so you want to merge overlapping ranges. Try this:
select starttime, endtime
from period
where id in (
select t1.id
from period t1
join period t2 on t1.id <> t2.id
where t2.endtime < t1.starttime or t2.starttime > t1.endtime
group by t1.id
having count(*) + 1 = (select count(*) from period)
)
union all
select min(start), max(fin) from (
select
case when t2.starttime < t1.starttime then t2.starttime else t1.starttime end as start,
case when t2.endtime < t1.endtime then t1.endtime else t2.endtime end as fin
from period t1
join period t2 on t1.id < t2.id
where t2.endtime >= t1.starttime and t2.starttime <= t1.endtime) overlaps
group by datepart(dd, start), datepart(dd, fin)
I found this solution... I think this is not the best way, but seems to work.
DECLARE #union_unique TABLE (id INT IDENTITY(1, 1) primary key ,starttime datetime,endtime datetime)
DECLARE #idset TABLE (id int)
DECLARE #i int
SET #i = 1
IF (SELECT COUNT(*) FROM period) > 0
WHILE (#i <= (SELECT MAX(id) FROM period))
BEGIN
delete from #idset
insert into #idset
select distinct t2.id
from period t1
join #union_unique t2 on convert(date, t1.starttime)=convert(date, t2.starttime)
where t1.id=#i and
(
t1.starttime >= t2.starttime and t1.starttime <= t2.endtime
or
t1.endtime >= t2.starttime and t1.endtime <= t2.endtime
or
t1.starttime <= t2.starttime and t1.endtime >= t2.endtime
)
if(select count(*) from #idset)=0
insert into #union_unique (starttime, endtime) select starttime, endtime from period where id=#i
else
BEGIN
insert into #union_unique (starttime, endtime)
select
min(starttime),
max(endtime)
from (
select starttime, endtime from #union_unique where id in (select id from #idset)
union
select starttime, endtime from period where id=#i
) alll
delete from #union_unique where id in (select id from #idset)
END
SET #i = #i + 1
END
select * from #union_unique order by starttime