How to join on condition count - postgresql

I have two tables. First one has id with versions:
Fid start_dt end_dt text
1 2021-09-01 00:00:00 2021-10-09 23:59:59 first_row
1 2021-10-10 00:00:00 2999-12-12 23:59:59 second_row
2 2021-10-02 00:00:00 2999-12-12 23:59:59 third_row
3 2021-09-05 00:00:00 2021-09-06 23:59:59 fourth_row
3 2021-09-07 00:00:00 2999-12-12 23:59:59 fifth_row
And second table with calls:
id dt Fid
1 2021-09-01 05:00:00 1
2 2021-10-01 18:00:00 2
3 2021-10-11 05:00:00 1
4 2021-10-01 16:50:00 2
The desired result is
id text
1 first_row
2 third_row
3 second_row
4 third_row
I want this script for rows in t1 which have several versions
select id,text
from t2
left join t1 on t1.Fid = t2.Fid and t2.dt between t1.start_dt and t1.end_dt
And this script for rows in t1 which have one version
select id,text
from t2
left join t1 on t1.Fid = t2.Fid and t2.dt between t1.start_dt and t1.end_dt
How to do this? I thought about setting to the row flag depending on the number of versions, but maybe there is an easier way?

You can do (Fiddle)
with x as (select fid,count(*) as qty from tb1 group by fid)
select tb2.id, tb1.text
from tb2 join tb1 on tb2.fid = tb1.fid and tb2.dt between tb1.start_dt and end_dt
join x on tb2.fid = x.fid and x.qty > 1
union
select tb2.id, tb1.text
from tb2 join tb1 on tb2.fid = tb1.fid
join x on tb2.fid = x.fid and x.qty = 1
order by id;

Related

Previous quarter max month value in big Query

I want to see the previous quarter's max month(as a new column) value in the current quarter using a big query.
When in Q1 2022 it should display Q4 December 2021 as a new column
When in Q2 2022 it should display Q1 March 2022 (in this case 60000)
When in Q3 2022 it should display Q2 June 2022 (in this case 40000)
My data is like below
date Sales
2022-09-01 10000
2022-08-02 20000
2022-07-01 30000
2022-06-01 40000
2022-05-01 30000
2022-04-01 50000
2022-03-01 60000
2022-02-01 10000
2022-01-01 89090
Output
your given result table fits not the task: previous quarter's max month.
Here several outputs. Do you want the maximum of the last months, or the values from three month ago? Both columns are included here.
The formula of the 1st month of quarter can be edit to the last month by changing the 1 to -1. As You want zero values for all other months, you need to multiply this with the other column.
Window function do the job. But for each month there must be one row. This is filled up with the all_months table.
with tbl as
(
Select date("2022-09-01") as dates, 10000 money
union all select date("2022-08-02"), 20000
union all select date("2022-07-01"), 30000
union all select date("2022-06-01"), 40000
union all select date("2022-05-01"), 30000
union all select date("2022-04-01"), 50000
union all select date("2022-03-01"), 60000
union all select date("2022-02-01"), 10000
union all select date("2022-01-01"), 89090
),
all_months as
(select dates,0 from (Select max(dates) A, min(dates) B from tbl), unnest(generate_date_array(A,B,interval 1 month)) dates)
select *,
if( date_trunc(dates,quarter)= date_trunc(date_sub(dates,interval 1 month),quarter),0,1) as first_month_of_quarter,
lag(money_max_this_quarter) over (order by dates) as money_max_last_quarter,
lag(money,3) over (order by dates) as money_three_months_ago,
from
(
select * ,
max(money) over (partition by date_trunc(dates,quarter ) ) as money_max_this_quarter
from
(
Select dates,sum(money) as money from tbl group by 1
union all select * from all_months
)
)
order by 1 desc

Query to assign max date among child items to parent item

I've data in two Postgres tables as below
table1
wid w.name owner
1 abc own1
2 def own2
3 ghi own3
table2
vid wid vname date
9 1 vnam1 10-7-2020
10 1 vnam1 10-8-2018
11 1 vnam2 10-9-2019
12 1 vnam2 10-8-2020
13 2 vnam3 10-10-2017
14 2 vnam3 10-08-2020
15 2 vnam4 10-10-2018
16 2 vnam4 10-10-2019
17 3 vnam5 10-06-2016
18 3 vnam5 10-07-2020
19 3 vnam6 10-08-2020
I was able to get max date for each of the table2 vname related to w.name in table2 but I'm looking for something like this in the result so that I can decide each w.name max date.
wid w.name owner vname maxdate
1 abc own1 vnam2 10-08-2020 (Max date out of 4 values of vnames) <br>
2 def own2 vnam3 10-08-2020
3 ghi own3 vnam6 10-08-2020
Use DISTINCT ON to achieve this.
select distinct on (t1.wid)
t1.wid, t1."w.name", t1.owner, t2.vname, t2.date
from table1 t1
join table2 t2 on t2.wid = t1.wid
order by t1.wid, t2.date desc;
Working fiddle

Select dates missing data in a range

I have a postgres table test_table that looks like this:
date | test_hour
------------+-----------
2000-01-01 | 1
2000-01-01 | 2
2000-01-01 | 3
2000-01-02 | 1
2000-01-02 | 2
2000-01-02 | 3
2000-01-02 | 4
2000-01-03 | 1
2000-01-03 | 2
I need to select all the dates which don't have test_hour = 1, 2, and 3, so it should return
date
------------
2000-01-03
Here is what I have tried:
SELECT date FROM test_table WHERE test_hour NOT IN (SELECT generate_series(1,3));
But that only returns dates that have extra hours beyond 1, 2, 3
You can use aggregation and conditional HAVING clauses, like so:
SELECT mydate
FROM mytable
GROUP BY mydate
HAVING
MAX(CASE WHEN test_hour = 1 THEN 1 END) != 1
OR MAX(CASE WHEN test_hour = 2 THEN 1 END) != 1
OR MAX(CASE WHEN test_hour = 3 THEN 1 END) != 1
Another possibility would be to join it against the series (or another subquery containing the hours) and do a [distinct] count on the hours aggregatet per date:
select date from tst
inner join (select generate_series(1,3) "hour") hours on hours.hour = tst.hour
group by tst.date
having count(distinct tst.hour) < 3;
or
select date from tst
where hour in (select generate_series(1,3))
group by date
having count(distinct tst.hour) < 3;
[You don't need the distinct if date/hour combinations in Your table are unique]
A solution using set difference, giving you exactly the rows that are missing:
(SELECT DISTINCT
date, all_hour
FROM test_table
CROSS JOIN generate_series(1,3) all_hour)
EXCEPT
(TABLE test_table)
And a solution using an array aggregate and the array contains operator:
SELECT date
FROM test_table
GROUP BY date
HAVING NOT array_agg(test_hour) #> ARRAY(SELECT generate_series(1,3))
(online demos)

One SQL Stored Procedure to get cut off date of two different cut off date format

I have one system that read from two client databases. For the two clients, both of them have different format of cut off date:
1) Client A: Every month at 15th. Example: 15-12-2016.
2) Client B: Every first day of the month. Example: 1-1-2017.
The cut off date are stored in the table as below:
Now I need a single query to retrieve the current month's cut off date of the client. For instance, today is 15-2-2017, so the expected cut off date for both clients should be as below:
1) Client A: 15-1-2017
2) Client B: 1-2-2017
How can I accomplish this in a single Stored Procedure? For client B, I can always get the first day of the month. But this can't apply to client A since their cut off is last month's date.
Might be something like this you are looking for:
DECLARE #DummyClient TABLE(ID INT IDENTITY,ClientName VARCHAR(100));
DECLARE #DummyDates TABLE(ClientID INT,YourDate DATE);
INSERT INTO #DummyClient VALUES
('A'),('B');
INSERT INTO #DummyDates VALUES
(1,{d'2016-12-15'}),(2,{d'2017-01-01'});
WITH Numbers AS
( SELECT 0 AS Nr
UNION ALL SELECT 1
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5
UNION ALL SELECT 6
UNION ALL SELECT 7
UNION ALL SELECT 9
UNION ALL SELECT 10
UNION ALL SELECT 11
UNION ALL SELECT 12
UNION ALL SELECT 13
UNION ALL SELECT 14
UNION ALL SELECT 15
UNION ALL SELECT 16
UNION ALL SELECT 17
UNION ALL SELECT 18
UNION ALL SELECT 19
UNION ALL SELECT 20
UNION ALL SELECT 21
UNION ALL SELECT 22
UNION ALL SELECT 23
UNION ALL SELECT 24
)
,ClientExt AS
(
SELECT c.*
,MIN(d.YourDate) AS MinDate
FROM #DummyClient AS c
INNER JOIN #DummyDates AS d ON c.ID=d.ClientID
GROUP BY c.ID,c.ClientName
)
SELECT ID,ClientName,D
FROM ClientExt
CROSS APPLY(SELECT DATEADD(MONTH,Numbers.Nr,MinDate)
FROM Numbers) AS RunningDate(D);
The result
ID Cl Date
1 A 2016-12-15
1 A 2017-01-15
1 A 2017-02-15
1 A 2017-03-15
1 A 2017-04-15
1 A 2017-05-15
1 A 2017-06-15
1 A 2017-07-15
1 A 2017-09-15
1 A 2017-10-15
1 A 2017-11-15
1 A 2017-12-15
1 A 2018-01-15
1 A 2018-02-15
1 A 2018-03-15
1 A 2018-04-15
1 A 2018-05-15
1 A 2018-06-15
1 A 2018-07-15
1 A 2018-08-15
1 A 2018-09-15
1 A 2018-10-15
1 A 2018-11-15
1 A 2018-12-15
2 B 2017-01-01
2 B 2017-02-01
2 B 2017-03-01
2 B 2017-04-01
2 B 2017-05-01
2 B 2017-06-01
2 B 2017-07-01
2 B 2017-08-01
2 B 2017-10-01
2 B 2017-11-01
2 B 2017-12-01
2 B 2018-01-01
2 B 2018-02-01
2 B 2018-03-01
2 B 2018-04-01
2 B 2018-05-01
2 B 2018-06-01
2 B 2018-07-01
2 B 2018-08-01
2 B 2018-09-01
2 B 2018-10-01
2 B 2018-11-01
2 B 2018-12-01
2 B 2019-01-01

T-SQL - Data Islands and Gaps - How do I summarise transactional data by month?

I'm trying to query some transactional data to establish the CurrentProductionHours value for each Report at the end of each month.
Providing there has been a transaction for each report in each month, that's pretty straight-forward... I can use something along the lines of the code below to partition transactions by month and then pick out the rows where TransactionByMonth = 1 (effectively, the last transaction for each report each month).
SELECT
ReportId,
TransactionId,
CurrentProductionHours,
ROW_NUMBER() OVER (PARTITION BY [ReportId], [CalendarYear], [MonthOfYear]
ORDER BY TransactionTimestamp desc
) AS TransactionByMonth
FROM
tblSource
The problem that I have is that there will not necessarily be a transaction for every report every month... When that's the case, I need to carry forward the last known CurrentProductionHours value to the month which has no transaction as this indicates that there has been no change. Potentially, this value may need to be carried forward multiple times.
Source Data:
ReportId TransactionTimestamp CurrentProductionHours
1 2014-01-05 13:37:00 14.50
1 2014-01-20 09:15:00 15.00
1 2014-01-21 10:20:00 10.00
2 2014-01-22 09:43:00 22.00
1 2014-02-02 08:50:00 12.00
Target Results:
ReportId Month Year ProductionHours
1 1 2014 10.00
2 1 2014 22.00
1 2 2014 12.00
2 2 2014 22.00
I should also mention that I have a date table available, which can be referenced if required.
** UPDATE 05/03/2014 **
I now have query which is genertating results as shown in the example below but I'm left with islands of data (where a transaction existed in that month) and gaps in between... My question is still similar but in some ways a little more generic - What is the best way to fill gaps between data islands if you have the dataset below as a starting point?
ReportId Month Year ProductionHours
1 1 2014 10.00
1 2 2014 12.00
1 3 2014 NULL
2 1 2014 22.00
2 2 2014 NULL
2 3 2014 NULL
Any advice about how to tackle this would be greatly appreciated!
Try this:
;with a as
(
select dateadd(m, datediff(m, 0, min(TransactionTimestamp))+1,0) minTransactionTimestamp,
max(TransactionTimestamp) maxTransactionTimestamp from tblSource
), b as
(
select minTransactionTimestamp TT, maxTransactionTimestamp
from a
union all
select dateadd(m, 1, TT), maxTransactionTimestamp
from b
where tt < maxTransactionTimestamp
), c as
(
select distinct t.ReportId, b.TT from tblSource t
cross apply b
)
select c.ReportId,
month(dateadd(m, -1, c.TT)) Month,
year(dateadd(m, -1, c.TT)) Year,
x.CurrentProductionHours
from c
cross apply
(select top 1 CurrentProductionHours from tblSource
where TransactionTimestamp < c.TT
and ReportId = c.ReportId
order by TransactionTimestamp desc) x
A similar approach but using a cartesian to obtain all the combinations of report ids/months.
in the first step.
A second step adds to that cartesian the maximum timestamp from the source table where the month is less or equal to the month in the current row.
Finally it joins the source table to the temp table by report id/timestamp to obtain the latest source table row for every report id/month.
;
WITH allcombinations -- Cartesian (reportid X yearmonth)
AS ( SELECT reportid ,
yearmonth
FROM ( SELECT DISTINCT
reportid
FROM tblSource
) a
JOIN ( SELECT DISTINCT
DATEPART(yy, transactionTimestamp)
* 100 + DATEPART(MM,
transactionTimestamp) yearmonth
FROM tblSource
) b ON 1 = 1
),
maxdates --add correlated max timestamp where the month is less or equal to the month in current record
AS ( SELECT a.* ,
( SELECT MAX(transactionTimestamp)
FROM tblSource t
WHERE t.reportid = a.reportid
AND DATEPART(yy, t.transactionTimestamp)
* 100 + DATEPART(MM,
t.transactionTimestamp) <= a.yearmonth
) maxtstamp
FROM allcombinations a
)
-- join previous data to the source table by reportid and timestamp
SELECT distinct m.reportid ,
m.yearmonth ,
t.CurrentProductionHours
FROM maxdates m
JOIN tblSource t ON t.transactionTimestamp = m.maxtstamp and t.reportid=m.reportid
ORDER BY m.reportid ,
m.yearmonth