Recursive PostgreSQL dynamic average - postgresql

I have PostgreSQL dynamic averaging problem that I cannot solve:
I have data for individuals with start and finish dates for employment are as follows:
"parentid" "Name" "startdate" "enddate"
"01e7de72-843d-4aa5-b3ae-2e2887d1b342" "Isabelle Smith" "2011-05-23" "2016-04-16"
"027ee658-8c4d-4910-b93e-62c0900f2147" "Emelie Blogs" "2012-09-17" "2016-03-16"
"02cbb478-adf3-4a8b-a5aa-ae9f03943ce4" "Joshauh Jow" "2015-04-04" NULL
"0328f382-2845-4623-a940-ab68af5d11cc" "VICTORIA Fred" "2015-05-11" NULL
"03823a20-51bc-4ae5-ab73-79056355ea36" "Elin Tree" "2014-03-24" NULL
"03878ef8-1c3a-4310-b3d5-7b8d18634707" "Michaela Apple" "2011-07-08" NULL
"03c36926-395b-4e3c-9f77-c6214ce763a2" "Immad Cheese" "2012-05-15" NULL
"0436824c-29a6-4140-ba4a-d0f56facd8fc" "Burak Teal" "2009-06-22" NULL
"04d7a07a-0ad4-4091-98d2-a7ff35798b6f" "Roberto Purple" "2015-03-30" "2016-03-01"
"04f32c2f-887f-4e03-be67-bc023aa3a7c2" "Iftikar Orange" "2012-06-27" NULL
"055b690a-153a-49c8-8ac0-112681f79551" "Josef Red" "2014-02-21" "2016-04-13"
"055be2f6-baec-4626-b876-7ff16dc95464" "Harry Green" "2016-03-27" NULL
"05a570b0-ec76-49d9-a742-5bf08f215fec" "Sofie Blue" "2010-06-15" "2016-05-16"
"05c92e7a-efde-44f0-a57c-298cbe129259" "BANARAS Yellow" "2015-06-22" NULL
"05fe0113-9bda-407b-bd72-5bf2a9deae15" "Bengt Drury" "2015-03-30" "2016-06-16"
"063c454f-2e97-48a8-96fc-9e84d29f5d96" "Son That" "2016-03-27" NULL
"07b76b47-8086-4df6-a3da-50dcfcd2de89" "Sam This" "2015-03-21" "2016-05-24"
"082771ee-2f02-4623-abc2-696447f9f791" "Felix This" "2014-11-24" "2016-05-31"
"08e39639-176b-4f44-ae75-1025219730c6" "ROBIN That" "2015-10-26" NULL
"09ab8491-9d9a-4091-b448-8315e3b5d3f0" "Kaziah This" "2016-05-14" NULL
"0a74dd0c-e1ee-4b32-a893-c486f7402363" "Luke Him" "2015-12-16" NULL
"0b098799-7d92-47df-9778-b48edf948af9" "MARIA Her" "2015-05-11" NULL
"0b480b25-8d2b-441b-8039-48b4e9188769" "That Adebayor" "2015-04-09" NULL
"0b86b44e-f3e0-4ddf-8e72-e0d7f9470279" "This Ă…lund" "2012-02-07" "2016-06-05"
"0c3e13d0-f602-41da-b10c-f70072605e63" "First Ekmark" "2013-02-08" NULL
"0d2367f4-a6b4-4381-b7dc-3e0c9063285f" "Anna Check" "2015-03-13" NULL
"0e31731b-0384-43ef-adeb-503ad5a137f9" "Assign Test1" "2015-05-22" NULL
"0e3f8b57-cba2-4240-abd4-d157832ef421" "Ramises Person "2016-10-11" NULL
"0f6af1c8-7672-4f0b-912c-91675cf52845" "Lars Surname" "2016-03-28" NULL
For this report a user would input two dates startOfPeriod and endOfPeriod
I need an SQL statement that for those dynamic dates would give me a week by week output on the number of people who were employed for each week during that period.
(A week would constitute each 7 days from the startOfPeriod date)
Is this possible in PostgreSQL and how would I do it?

Use the type daterange and the overlap operator &&.
The first query in WITH defines the period, the second generates series of weeks:
with period(start_of_period, end_of_period) as (
values ('2012-01-20'::date, '2012-02-15'::date)
),
weeks as (
select daterange(d::date, d::date+ 7) a_week
from period,
lateral generate_series (start_of_period, end_of_period, '7d'::interval) d
)
select lower(a_week) start_of_week, count(*)
from weeks
left join a_table
on daterange(startdate, enddate) && a_week
group by 1
order by 1;
start_of_week | count
---------------+-------
2012-01-20 | 4
2012-01-27 | 4
2012-02-03 | 5
2012-02-10 | 5
(4 rows)

Idea is generate series of week between start and end date, select starting and ending week from employment, then for each week count.
I've not tested it for bound cases but something OP coud starts with
WITH startDate(d) as (VALUES ('2010-01-01'::DATE))
, endDate(d) as (VALUES ('2016-06-06'::DATE))
, weeks as (select to_char(startDate.d+s.a,'YYYY-WW') as w
from startDate,endDate,generate_series(0,(endDate.d - startDate.d),7) as s(a))
, emp as (select name,to_char(sd,'YYYY-WW') as sw
, to_char(coalesce(ed,endDate.d),'YYYY-WW') as ew
from startDate,endDate,public.so where sd > startDate.d )
SELECT
w.w
,(select ARRAY_AGG(name) from emp Where w.w BETWEEN sw AND ew ) as emps
,(select count(name) from emp Where w.w BETWEEN sw AND ew ) as empCount
FROM weeks w
Test setup
create table public.so (
name TEXT
,sd DATE
,ed DATE
);
INSERT INTO public.so (name,sd,ed) VALUES
('a','2011-05-23','2016-04-16')
,('b','2012-09-17','2016-03-16')
,('c','2009-12-12',null)
,('d','2015-03-30','2016-03-01')
,('e','2012-06-27',null)
,('f','2014-02-21','2016-04-13')
,('g','2016-03-27',null)
,('h','2010-06-15','2016-05-16')
;

Related

Postgres query how to club date and time if time is not null

with data as(SELECT c."id",c."accountId",c."name",c."campaignType",c."status",
(CASE WHEN cb."executionDetails"->>'initiatedAt' IS NULL THEN csr."startDate"
ELSE cast(cb."executionDetails"->>'initiatedAt' as TIMESTAMP)
END) as "startDate",
CASE WHEN cb."executionDetails"->>'initiatedAt' IS NOT NULL THEN NULL
ELSE csr."timeSlot"->>'type' END as "timeSlotType",
(CASE WHEN cb."executionDetails"->>'initiatedAt' IS not NULL THEN Null ELSE
-- CASE WHEN csr."timeSlotType"->>'startTime' IS NULL THEN NULL
CASE WHEN csr."timeSlot"->>'type'='MORNING' THEN '07:00'
WHEN csr."timeSlot"->>'type'='AFTERNOON' THEN '12:00'
WHEN csr."timeSlot"->>'type'='EVENING' THEN '17:00'
WHEN csr."timeSlot"->>'type'='CUSTOM' THEN (csr."timeSlot"->>'startTime')::json->>'hour'||':'||((csr."timeSlot"->>'startTime')::json->>'minute')
ELSE csr."timeSlot"->>'startTime' END END )::TIME as "startTime",
split_part(cb."batchRunId", '-',6)::decimal as batchNumber,
'CAMPAIGN' as type
FROM "Campaigns" c
LEFT JOIN "CampaignScheduleRequests" csr
ON c."id"=csr."campaignId"
LEFT JOIN "CampaignBatches" cb
ON csr."id"=cb."requestId")
SELECT * FROM data as d
WHERE d."status" IN ('ACTIVATED')
OUTPUT of the above query
Required o/p
Start time column should be concatenation of start date and startTime
with data as(
SELECT c."id",
c."accountId",
c."name",
c."campaignType",
c."status",
coalesce((cb."executionDetails"->>'initiatedAt')::timestamp,
csr."startDate")
) as "startDate",
CASE WHEN cb."executionDetails" ? 'initiatedAt' THEN NULL
ELSE csr."timeSlot"->>'type'
END as "timeSlotType",
(CASE WHEN cb."executionDetails" ? 'initiatedAt' THEN NULL
ELSE CASE csr."timeSlot"->>'type'
WHEN 'MORNING' THEN '07:00'
WHEN 'AFTERNOON' THEN '12:00'
WHEN 'EVENING' THEN '17:00'
WHEN 'CUSTOM' THEN (csr."timeSlot"->'startTime')->>'hour'
||':'
||(csr."timeSlot"->'startTime')->>'minute'
ELSE csr."timeSlot"->>'startTime' --invalid format could cause problems with ::time
END
END )::TIME as "startTime",
split_part(cb."batchRunId", '-',6)::decimal as batchNumber,
'CAMPAIGN' as type
FROM "Campaigns" c
LEFT JOIN "CampaignScheduleRequests" csr ON c."id"=csr."campaignId"
LEFT JOIN "CampaignBatches" cb ON csr."id"=cb."requestId"
WHERE c."status" IN ('ACTIVATED')
)
SELECT *,
"startDate"+coalesce("startTime",'00:00'::time) as "newStartTimestamp"
FROM data;
Use coalesce() to shorten the null replacements:
CASE WHEN cb."executionDetails"->>'initiatedAt' IS NULL
THEN csr."startDate"
ELSE cast(cb."executionDetails"->>'initiatedAt' as TIMESTAMP)
END
is the same as
coalesce((cb."executionDetails"->>'initiatedAt')::timestamp, csr."startDate")
In CASE you can do a single expression evaluation:
CASE expression
WHEN value1 THEN...
WHEN value2 THEN...
instead of a series of checks
CASE
WHEN expression=value1 THEN...
WHEN expression=value2 THEN...
Instead of casting back to json after using the ->> operator that gives you text: (jsonb->>'key1')::json->>'key2', you can just use -> to keep json output the first time.
? operator lets you check the presence of a key json?'key1' without having to check for null in an attempted read json->>'key1' is null.
You can add time to date or timestamp directly, the same how you'd add an interval. And to avoid nullifying your intitiatedAt-based startDate when adding a null-valued startTime, you can use coalesce() again - which I think was your main question.

find the first date after a given date in SQL

I have two tables related to vital signs like below:
enter image description here
and have the second table
enter image description here
I want to show the table below:
enter image description here
My problem is when I used with statement in SQL to join the tables, the date sometimes didn't show the first record after the date of admission, but it showed the second or third one like this:
enter image description here
I want to show the first and last record after the admission date.
Can anyone help me with this?
With admissionCTE AS (SELECT DISTINCT on (id) id, dateofadmission:: date as Date_of_admission
WHERE
dateofadmission::date is not NULL
and dateofadmission::date < now()),
first_BMI_CTE AS (select id,bmi, date::date as first_assess
from vital_signs
where vital_signs.bmi is not null
order by
id),
last_BMI_CTE AS(SELECT id, bmi,date
from vital_signs
where bmi is not null AND (id,date::DATE) IN (select id,max(date::DATE) as last_date_assessment
from vital_signs
Group by id))
SELECT DISTINCT on (first_BMI_CTE.id) first_BMI_CTE.id as "ID",
admissionCTE.Date_of_admission::date,
first_BMI_CTE.first_assess as "First_assessment_date",
first_BMI_CTE.bmi as "First BMI",
case
when last_BMI_CTE.obs_datetime::date = first_BMI_CTE.first_assess::date then null
else last_BMI_CTE.obs_datetime
end as "Last_assessment_date",
case
when last_BMI_CTE.obs_datetime::date = first_BMI_CTE.first_assess::date then null
else last_BMI_CTE.bmi end as "Last BMI"
from first_BMI_CTE
left outer join last_BMI_CTE on
last_BMI_CTE.id = first_BMI_CTE.id
left outer join admissionCTE on
admissionCTE.id = first_BMI_CTE.id
where admissionCTE.id is not null
and first_BMI_CTE.first_assess >= admissionCTE.Date_of_admission

postgresql/pgAdmin - accepting start_date & end_date arguments as input on query run

This is a postgresql db I'm working with using pgAdmin.
Forgive me if this is somewhat common knowledge, I'm new to postgresql in particular... and I didn't find any direct answers through prior searching.
I'm wondering if there's a simple way to implement start_time/end_time arguments as inputs when the query runs using pgadmin and any of it's built in features.
The data type I'm working with here is "timestamp with timezone".
Looking for some direction on the best way to implement this.
I considered declaring start_time and end_time as variables, then using WHERE to filter based on those, but without 3rd party/application level solutions, is there a way to prompt for input when the query runs inside of pgadmin?
I appreciate any suggestions- here's my attempt at getting something working, but it errors out: query has no destination for result data.
do $$
DECLARE
start_date timestamp := '2020-10-1';
end_date timestamp := '2020-10-5';
begin
select distinct on (account.id, menu.name, kitchen_item.name)
account.id as "Account ID",
account.firstname as "Seller First Name",
account.lastname as "Seller Last Name",
account.email as "Seller Email",
account.phone as "Seller Phone",
address.address as "Seller Address (Street)",
address.address_2 as "Seller Address 2",
account.zip_code as "Seller Zip",
address.neighborhood as "Seller Neighborhood",
menu.name as "Name of active menu",
kitchen_item.name as "Dishes",
kitchen_item.price as "Price",
kitchen_item.daily_max_orders as "Quantity",
menu.pickup_start_time as "Start time",
menu.pickup_end_time as "End time",
menu.repeat_mon as "Monday",
menu.repeat_tues as "Tuesday",
menu.repeat_wed as "Wednesday",
menu.repeat_thurs as "Thursday",
menu.repeat_fri as "Friday",
menu.repeat_sat as "Saturday",
menu.repeat_sun as "Sunday",
order_item.created as "Date of last sale"
from account
left join store on account.id = store.account_id
left join menu on store.id = menu.store_id
left join menu_item on menu.id = menu_item.menu_id
left join kitchen_item on (menu_item.kitchen_item_id = kitchen_item.id and store.id = kitchen_item.store_id)
left join orders on (orders.store_id = store.id)
left join order_item on (order_item.order_id = orders.id)
join store_address on store.id = store_address.store_id
join address on store_address.address_id = address.id
where orders.placed BETWEEN start_date AND end_date
order by account.id asc, menu.name, kitchen_item.name asc, order_item.created desc;
end $$;
DO creates an anonymous function that returns no data.
You can use WITH:
WITH input (start_date, end_date) AS
(SELECT '2020-10-01'::timestamp AS start_date,
'2020-10-05'::timestamp AS end_date)
SELECT ...
FROM...
JOIN input
WHERE orders.placed BETWEEN input.start_date AND input.end_date

Selecting rows only if meeting criteria

I am new to PostgreSQL and to database queries in general.
I have a list of user_id with university courses taken, date started and finished.
Some users have multiple entries and sometimes the start date or finish date (or both) are missing.
I need to retrieve the longest course taken by a user or, if start date is missing, the latest.
If multiple choices are still available, then pick random among the multiple options.
For example
on user 2 (below) I want to get only "Economics and Politics" because it has the latest date;
on user 6, only "Electrical and Electronics Engineering" because it is the longer course.
The query I did doesn't work (and I think I am off-track):
(SELECT Q.user_id, min(Q.started_at) as Started_on, max(Q.ended_at) as Completed_on,
q.field_of_study
FROM
(select distinct(user_id),started_at, Ended_at, field_of_study
from educations
) as Q
group by Q.user_id, q.field_of_study )
order by q.user_id
as the result is:
User_id Started_on Completed_on Field_of_studies
2 "2001-01-01" "" "International Economics"
2 "" "2002-01-01" "Economics and Politics"
3 "1992-01-01" "1999-01-01" "Economics, Management of ..."
5 "2012-01-01" "2016-01-01" ""
6 "2005-01-01" "2009-01-01" "Electrical and Electronics Engineering"
6 "2011-01-01" "2012-01-01" "Finance, General"
6 "" "" ""
6 "2010-01-01" "2012-01-01" "Financial Mathematics"
I think this query should do what you need, it relies on calculating the difference in days between ended_at and started_at, and uses 0001-01-01 if the started_at is null (making it a really long interval):
select
educations.user_id,
max(educations.started_at) started_at,
max(educations.ended_at) ended_at,
max(educations.field_of_study) field_of_study
from educations
join (
select
user_id,
max(
ended_at::date
-
coalesce(started_at, '0001-01-01')::date
) max_length
from educations
where (started_at is not null or ended_at is not null)
group by user_id
) x on educations.user_id = x.user_id
and ended_at::date
-
coalesce(started_at, '0001-01-01')::date
= x.max_length
group by educations.user_id
;
Sample SQL Fiddle

How to select first and last records between certain date parameters?

I need a Query to extract the first instance and last instance only between date parameters.
I have a Table recording financial information with financialyearenddate field linked to Company table via companyID. Each company is also linked to programme table and can have multiple programmes. I have a report to pull the financials for each company
on certain programme which I have adjusted to pull only the first and last instance (using MIN & MAX) however I need the first instance.
after a certain date parameter and the last instance before a certain date parameter.
Example: Company ABloggs has financials for 1999,2000,2001,2004,2006,2007,2009 but the programme ran from 2001 to 2007 so I only want
the first financial record and last financial record between those years i.e. 2001 & 2007 records. Any help appreciated.
At the moment I am using 2 queries as I needed the data in a hurry but I need it in 1 query and only where financial year end dates are between parameters and only where there are minimum of 2 GVA records for a company.
Query1:
SELECT
gva.ccx_companyname,
gva.ccx_depreciation,
gva.ccx_exportturnover,
gva.ccx_financialyearenddate,
gva.ccx_netprofitbeforetax,
gva.ccx_totalturnover,
gva.ccx_totalwages,
gva.ccx_statusname,
gva.ccx_status,
gva.ccx_company,
gva.ccx_totalwages + gva.ccx_netprofitbeforetax + gva.ccx_depreciation AS GVA,
gva.ccx_nofulltimeequivalentemployees
FROM
(
SELECT
ccx_companyname,
MAX(ccx_financialyearenddate) AS LatestDate
FROM Filteredccx_gva AS Filteredccx_gva_1
GROUP BY ccx_companyname
) AS min_1
INNER JOIN Filteredccx_gva AS gva
ON min_1.ccx_companyname = gva.ccx_companyname AND
min_1.LatestDate = gva.ccx_financialyearenddate
WHERE (gva.ccx_status = ACTUAL)
Query2:
SELECT
gva.ccx_companyname,
gva.ccx_depreciation,
gva.ccx_exportturnover,
gva.ccx_financialyearenddate,
gva.ccx_netprofitbeforetax,
gva.ccx_totalturnover,
gva.ccx_totalwages,
gva.ccx_statusname,
gva.ccx_status,
gva.ccx_company,
gva.ccx_totalwages + gva.ccx_netprofitbeforetax + gva.ccx_depreciation AS GVA,
gva.ccx_nofulltimeequivalentemployees
FROM
(
SELECT
ccx_companyname,
MIN(ccx_financialyearenddate) AS FirstDate
FROM Filteredccx_gva AS Filteredccx_gva_1
GROUP BY ccx_companyname
) AS MAX_1
INNER JOIN Filteredccx_gva AS gva
ON MAX_1.ccx_companyname = gva.ccx_companyname AND
MAX_1.FirstDate = gva.ccx_financialyearenddate
WHERE (gva.ccx_status = ACTUAL)
Can't you just add a where clause using the first and last date parameters. Something like this:
SELECT <companyId>, MIN(<date>), MAX(<date>)
FROM <table>
WHERE <date> BETWEEN #firstDate AND #lastDate
GROUP BY <companyId>
declare #programme table (ccx_companyname varchar(max), start_year int, end_year int);
insert #programme values
('ABloggs', 2001, 2007);
declare #companies table (ccx_companyname varchar(max), ccx_financialyearenddate int);
insert #companies values
('ABloggs', 1999)
,('ABloggs', 2000)
,('ABloggs', 2001)
,('ABloggs', 2004)
,('ABloggs', 2006)
,('ABloggs', 2007)
,('ABloggs', 2009);
select c.ccx_companyname, min(ccx_financialyearenddate), max(ccx_financialyearenddate)
from #companies c
join #programme p on c.ccx_companyname = p.ccx_companyname
where c.ccx_financialyearenddate >= p.start_year and c.ccx_financialyearenddate <= p.end_year
group by c.ccx_companyname
having count(*) > 1;
You can combine your two original queries into a single query by including the MIN and MAX aggregates in the same GROUP BY query of the virtual table. Also including COUNT() and HAVING COUNT() > 1 ensures company must have at least 2 dates. So query should look like:
SELECT
gva.ccx_companyname,
gva.ccx_depreciation,
gva.ccx_exportturnover,
gva.ccx_financialyearenddate,
gva.ccx_netprofitbeforetax,
gva.ccx_totalturnover,
gva.ccx_totalwages,
gva.ccx_statusname,
gva.ccx_status,
gva.ccx_company,
gva.ccx_totalwages + gva.ccx_netprofitbeforetax + gva.ccx_depreciation AS GVA,
gva.ccx_nofulltimeequivalentemployees
FROM
(SELECT
ccx_companyname,
ccx_status,
MIN(ccx_financialyearenddate) AS FirstDate,
MAX(ccx_financialyearenddate) AS LastDate,
COUNT(*) AS NumDates
FROM Filteredccx_gva AS Filteredccx_gva_1
WHERE (ccx_status = ACTUAL)
GROUP BY ccx_companyname, ccx_status
HAVING COUNT(*) > 1
) AS MinMax
INNER JOIN Filteredccx_gva AS gva
ON MinMax.ccx_companyname = gva.ccx_companyname AND
(MinMax.FirstDate = gva.ccx_financialyearenddate OR
MinMax.LastDate = gva.ccx_financialyearenddate)
WHERE (gva.ccx_status = MinMax.ccx_status)
ORDER BY gva.ccx_companyname, gva.ccx_financialyearenddate