Count Of rows since last occurence of value in specific column - tsql

Take a look at this SQL fiddle:
http://sqlfiddle.com/#!18/cefe0b/2/0
Per request, here is the DDL:
CREATE TABLE #JobExecutions
(
JobExecutionID INT IDENTITY(1, 1) PRIMARY KEY NOT NULL,
JobID INT NOT NULL,
StartTime DATETIME NOT NULL,
EndTime DATETIME NOT NULL,
RunStatus VARCHAR(50) NOT NULL
);
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (1, '10/01/2020 14:37:28', '10/01/2020 14:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (2, '10/01/2020 15:37:28', '10/01/2020 15:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (3, '10/01/2020 16:37:28', '10/01/2020 16:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (1, '10/02/2020 14:37:28', '10/02/2020 14:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (2, '10/02/2020 15:37:28', '10/02/2020 15:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (3, '10/02/2020 16:37:28', '10/02/2020 16:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (1, '10/03/2020 14:37:28', '10/03/2020 14:41:03', 'Failed')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (2, '10/03/2020 15:37:28', '10/03/2020 15:41:03', 'Failed')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (3, '10/03/2020 16:37:28', '10/03/2020 16:41:03', 'Failed')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (1, '10/04/2020 14:37:28', '10/04/2020 14:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (2, '10/04/2020 15:37:28', '10/04/2020 15:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (3, '10/04/2020 16:37:28', '10/04/2020 16:41:03', 'Succeeded')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (1, '10/05/2020 14:37:28', '10/05/2020 14:41:03', 'Failed')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (2, '10/05/2020 15:37:28', '10/05/2020 15:41:03', 'Failed')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (1, '10/06/2020 14:37:28', '10/06/2020 14:41:03', 'Failed')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (2, '10/06/2020 15:37:28', '10/06/2020 15:41:03', 'Failed')
INSERT INTO #JobExecutions (JobID, StartTime, EndTime, RunStatus) VALUES (1, '10/07/2020 14:37:28', '10/07/2020 14:41:03', 'Failed')
I have a set of jobs for which I would like to count the number of times they have failed in a row since their last successful run.
My final resultset should look like this:
+-------+-----------------------------+
| JobID | TimesFailedSinceLastSuccess |
+-------+-----------------------------+
| 1 | 3 |
| 2 | 2 |
| 3 | 0 |
+-------+-----------------------------+
Any help would be much appreciated

Ok, turns out my brain was not as blocked as I thought. Here is my approach. However, if there is a more elegant way, please post.
WITH lastRunJob
AS (SELECT MAX(EndTime) AS LastSuccessEndTime,
JobID
FROM #JobExecutions
WHERE RunStatus = 'Succeeded'
GROUP BY JobID)
SELECT t1.JobID,
COUNT(t2.JobExecutionID)
FROM lastRunJob t1
OUTER APPLY
(
SELECT *
FROM #JobExecutions t2
WHERE t1.JobID = t2.JobID
AND t2.EndTime > t1.LastSuccessEndTime
AND t2.RunStatus = 'Failed'
) t2
GROUP BY t1.JobID;

Related

PostgreSQL: Forward fill NULL values with previous NOT NULL value in group

I'm trying fill NULL values in multiple columns (different column types INT, VARCHAR) with previous NOT NULL value in a group ordered by date. Considering following table:
I want to get here:
CREATE TABLE IF NOT EXISTS test (
id VARCHAR,
date DATE,
value_1 INT,
value_2 VARCHAR
);
INSERT INTO test VALUES
(1, '2022-01-01', 5, 'asdf'),
(1, '2022-01-02', NULL, NULL),
(1, '2022-01-03', NULL, 'def'),
(1, '2022-01-04', 4, NULL),
(2, '2022-01-01', 1, 'a'),
(2, '2022-01-02', NULL, NULL),
(2, '2022-01-03', 2, 'b'),
(2, '2022-01-04', NULL, NULL);
One day, PostgreSQL may support the IGNORE NULLS option for LEAD and LAG functions.
In the mean time, you must use window functions to build groups, then select the maximum in each group.
SELECT id, date,
MAX(value_1) OVER (PARTITION BY id, grp_1) AS value_1,
MAX(value_2) OVER (PARTITION BY id, grp_2) AS value_2
FROM(
SELECT *,
COUNT(value_1) OVER (PARTITION BY id ORDER BY Date DESC) as grp_1,
COUNT(value_2) OVER (PARTITION BY id ORDER BY Date DESC) as grp_2
FROM test
) T
ORDER BY ID, date

A CTE with DENSE_RANK, should i use a Subselect within OR a JOIN Condition

In the original table there are a lot of records. Is it better to filter within the CTE (example B) or should this be done in the JOIN condition (example A). Or is it possibly all the same, both are equally good/fast?
I guess in the CTE the whole table would have to be prefiltered first and in the JOIN only the corresponding records would be affected.
DECLARE #Orders TABLE(
orderid int NOT NULL,
orderdate datetime NOT NULL,
empid int NOT NULL,
custid varchar(5) NOT NULL,
qty int NOT NULL
);
-- in original lots of data
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(30001, '20020802', 3, 'A', 10);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(10001, '20021224', 1, 'A', 12);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(10005, '20021224', 1, 'B', 20);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(40001, '20030109', 4, 'A', 40);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(10006, '20030118', 1, 'C', 14);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(20001, '20030212', 2, 'B', 12);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(40005, '20040212', 4, 'A', 10);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(20002, '20040216', 2, 'C', 20);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(30003, '20040418', 3, 'B', 15);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(30004, '20020418', 3, 'C', 22);
INSERT INTO #Orders(orderid, orderdate, empid, custid, qty) VALUES(30007, '20020907', 3, 'D', 30);
-- example A
WITH CTE_
AS(
SELECT
orderid
,orderdate
,empid
,custid
,qty
,DENSE_RANK() OVER(PARTITION BY custid ORDER BY orderid) AS [Ranking]
FROM #Orders
)
SELECT
d.*
FROM CTE_ AS d
WHERE d.[Ranking] = 1;
-------------------------------------
-- example B
WITH CTE_
AS(
SELECT
orderid
,orderdate
,empid
,custid
,qty
FROM(
SELECT
orderid
,orderdate
,empid
,custid
,qty
,DENSE_RANK() OVER(PARTITION BY custid ORDER BY orderid) AS [Ranking]
FROM #Orders
) AS d
WHERE d.[Ranking] = 1
)
SELECT
d.*
FROM CTE_ AS d
Generally, it's better to filter within the CTE (example B), especially in case of your tables have indexes in filter conditions, these indexes could be applied to be pre-filtered data first. Since a CTE is a temporary view, it can't have indexes, so in case of example A, your main query could not use indexes (if have) for filter condition in WHERE statement.

PostgreSQL WITH RECURSIVE order by in non recursive term

I am trying to create a recursive CTE and I wanted to fetch the row in the non recursive term from the table using ORDER BY but it seems impossible to do. Is there any workaround on this?
Example:
CREATE TABLE mytable (
id BIGSERIAL PRIMARY KEY,
ref_id BIGINT NOT NULL,
previous_id BIGINT REFERENCES mytable(id),
some_name TEXT NOT NULL,
created_at TIMESTAMP DEFAULT NOW()
);
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (1, NULL, 1, 'Barry');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (2, NULL, 1, 'Nick');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (3, 1, 2, 'Janet');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (4, 1, 1, 'John');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (5, 2, 7, 'Ron');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (6, 1, 1, 'Aaron');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (7, 4, 1, 'Anna');
The query I am trying to construct
WITH RECURSIVE my_path AS (
SELECT * FROM mytable
WHERE ref_id = 1 AND some_name = 'Anna'
ORDER BY created_at DESC
LIMIT 1
UNION ALL
SELECT ph.* FROM my_path hp
INNER JOIN mytable ph ON hp.previous_id = ph.id
)
SELECT * FROM my_path;
SQLFIDDLE
Just move it into a starter CTE:
updated fiddle
WITH RECURSIVE base_record as (
SELECT * FROM mytable
WHERE ref_id = 1 AND some_name = 'Anna'
ORDER BY created_at DESC
LIMIT 1
), my_path AS (
SELECT * FROM base_record
UNION ALL
SELECT ph.* FROM my_path hp
INNER JOIN mytable ph ON hp.previous_id = ph.id
)
SELECT * FROM my_path;

Postgresql find by count, joined table

Given 3 tables. I need to build SQL query to find two actors who CAST TOGETHER THE MOST and list the titles of those movies. Sort alphabetically
https://www.db-fiddle.com/f/r2Y9CpH8n7MHTeBaqEHe9S/0
The data for reproducing below:
create table film_actor
(
actor_id integer,
film_id integer
)
;
create table film
(
film_id integer,
title varchar
)
;
create table actor
(
actor_id integer,
first_name varchar,
last_name varchar
)
;
INSERT INTO public.film_actor (actor_id, film_id) VALUES (1, 1);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (1, 2);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (1, 3);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (2, 1);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (2, 2);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (2, 3);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (3, 1);
INSERT INTO public.film (film_id, title) VALUES (1, 'First');
INSERT INTO public.film (film_id, title) VALUES (2, 'Second');
INSERT INTO public.film (film_id, title) VALUES (3, 'Third');
INSERT INTO public.film (film_id, title) VALUES (4, 'Fourth');
INSERT INTO public.actor (actor_id, first_name, last_name) VALUES (1, 'John', 'Snow');
INSERT INTO public.actor (actor_id, first_name, last_name) VALUES (2, 'Spider', 'Man');
INSERT INTO public.actor (actor_id, first_name, last_name) VALUES (3, 'Mike', 'Kameron');
Is this what you are looking for?
with acting_pairs as (
select a1.actor_id as a1_id, a2.actor_id as a2_id
from film_actor a1
join film_actor a2 on a1.film_id = a2.film_id
where a1.actor_id < a2.actor_id
)
select a1_id, a2_id, count(*) as total
from acting_pairs
group by (a1_id, a2_id)
order by total desc
limit 1
Giving us expected output for the example input would be nice.

HAWQ. join in/out rows by in/out time

HAWQ. How to join in/out rows by in/out time?
simple
thanks
I believe this is what you are trying to achieve. The trick is to use the window function "row_number()".
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
You can write this with common table expressions too if you prefer that format.
with sub1 as (select car_id, id as id_in, cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in'),
sub2 as (select car_id, id as id_out, cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out')
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in,
sub2.id_out, sub2.cross_date_time_out
from sub1
join sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
create table source_table
(
id INT
,car_id INT
,direction text
,cross_date_time TIMESTAMP
);
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-18-10:20:15', 'yyyy-MM-dd hh:mi:ss'));;
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-25-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-09-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-27-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-18-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-14-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-27-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-29-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'out', to_timestamp('2017-02-14-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
Wrong result.
1 1 2017-02-12 10:20:15.000000 1 2017-02-09 10:20:15.000000
if use a left join variant:
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
left join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
wrong results:
1 1 2017-02-12 10:20:15.000000 1 2017-02-09 10:20:15.000000
1 1 2017-02-18 10:20:15.000000 1 2017-02-27 10:20:15.000000
1 1 2017-02-25 10:20:15.000000