DateDiff with multiple events Same Column SQL Server - tsql

Thank you in advance for any assistance you can provide. I am looking to get the date difference between events that are stored in the same column. Referring to the Sample data, I am looking for the differences between the "Partial Submissions" and their respective subsequent "Jr Reviewed" events.
referring to same data again, I need the dateDiff from
1st "Partial Review" to 1st "Jr Reviewed"
2nd "Partial Review" to 2nd "Jr Reviewed"
6th "Partial Review" to 3rd "Jr Reviewed"
I am not sure where to start, all i have done is add the rownumbers which are partitioned by "Descrip" and ordered by "Date" Asc. Any sort of guidance or method of accomplishing (Recursive CTE?) this would be greatly appreciated.
Start End - 2 records

DECLARE #Tbl TABLE (RowNumber INT, RecordNumber INT, IDX INT, DESCRIP NVARCHAR(50), DATES DATETIME, EVENTNUM INT)
INSERT INTO #Tbl
VALUES
(1, 11515, 13, 'Partial Submission', '8/12/16 00:21', 3078),
(1, 11515, 14, 'Junior Reviewed', '8/12/16 15:52', 3089),
(2, 11515, 26, 'Partial Submission', '8/18/16 15:24', 3078),
(3, 11515, 33, 'Partial Submission', '9/6/16 9:47', 3078),
(4, 11515, 34, 'Partial Submission', '9/6/16 9:47', 3078),
(5, 11515, 39, 'Partial Submission', '9/9/16 13:19', 3078),
(2, 11515, 40, 'Junior Reviewed', '9/11/16 8:30', 3089),
(6, 11515, 46, 'Partial Submission', '9/15/16 12:30', 3078),
(3, 11515, 54, 'Junior Reviewed', '9/17/16 10:01', 3089),
(7, 11515, 57, 'Full! Submission', '9/19/16 9:16', 3079),
(1, 11520, 19, 'Partial Submission', '8/20/16 00:42', 3078),
(1, 11520, 22, 'Junior Reviewed', '8/22/16 9:06', 3089),
(2, 11520, 28, 'Partial Submission', '8/29/16 20:12', 3078),
(2, 11520, 34, 'Junior Reviewed', '9/1/16 8:20', 3089),
(3, 11520, 38, 'Partial Submission', '9/8/16 15:03', 3078),
(4, 11520, 39, 'Partial Submission', '9/8/16 15:03', 3078),
(3, 11520, 47, 'Junior Reviewed', '9/14/16 13:53', 3089),
(5, 11520, 48, 'Full! Submission', '9/16/16 13:19', 3079),
(4, 11520, 52, 'Junior Reviewed', '9/17/16 10:51', 3089),
(6, 11520, 53, 'Full! Submission', '9/19/16 16:21', 3079)
;WITH CTE
AS
(
SELECT
*,
RowId = ROW_NUMBER() OVER (Partition BY Recordnumber ORDER BY Recordnumber, IDX),
RowIdByDescrip = ROW_NUMBER() OVER (PARTITION BY Recordnumber, DESCRIP ORDER BY Recordnumber, IDX)
FROM #tbl
)
,Test as
(
SELECT
A.Recordnumber,
A.DESCRIP,
A.EVENTNUM,
A.IDX,
A.DATES StartDate,
LEAD(A.DATES) OVER ( Partition BY A.Recordnumber ORDER BY A.IDX) EndDate,
DATEDIFF(HOUR, A.DATES, LEAD(A.DATES) OVER (Partition BY A.Recordnumber ORDER BY A.IDX)) AS DateDifff
FROM #tbl A INNER JOIN
(
SELECT
C.Recordnumber,
MIN(C.IDX) AS IDX
FROM
CTE C
GROUP BY
C.RowId - C.RowIdByDescrip,
C.DESCRIP,
C.Recordnumber
) B ON A.IDX = B.IDX and A.Recordnumber = B.Recordnumber
)
Select
*
From Test
Where eventnum in ('3078')
order by Recordnumber, IDX

Try as the below:
DECLARE #Tbl TABLE (RowNumber INT, RecordNumber INT, IDX INT, DESCRIP NVARCHAR(50), DATES DATETIME, EVENTNUM INT)
INSERT INTO #Tbl
VALUES
(1, 11515, 13, 'Partial Submission', '8/12/16 00:21', 3078),
(1, 11515, 14, 'Junior Reviewed', '8/12/16 15:52', 3089),
(2, 11515, 26, 'Partial Submission', '8/18/16 15:24', 3078),
(3, 11515, 33, 'Partial Submission', '9/6/16 9:47', 3078),
(4, 11515, 34, 'Partial Submission', '9/6/16 9:47', 3078),
(5, 11515, 39, 'Partial Submission', '9/9/16 13:19', 3078),
(2, 11515, 40, 'Junior Reviewed', '9/11/16 8:30', 3089),
(6, 11515, 46, 'Partial Submission', '9/15/16 12:30', 3078),
(3, 11515, 54, 'Junior Reviewed', '9/17/16 10:01', 3089),
(7, 11515, 57, 'Full! Submission', '9/19/16 9:16', 3079),
(1, 11520, 19, 'Partial Submission', '8/20/16 00:42', 3078),
(1, 11520, 22, 'Junior Reviewed', '8/22/16 9:06', 3089),
(2, 11520, 28, 'Partial Submission', '8/29/16 20:12', 3078),
(2, 11520, 34, 'Junior Reviewed', '9/1/16 8:20', 3089),
(3, 11520, 38, 'Partial Submission', '9/8/16 15:03', 3078),
(4, 11520, 39, 'Partial Submission', '9/8/16 15:03', 3078),
(3, 11520, 47, 'Junior Reviewed', '9/14/16 13:53', 3089),
(5, 11520, 48, 'Full! Submission', '9/16/16 13:19', 3079),
(4, 11520, 52, 'Junior Reviewed', '9/17/16 10:51', 3089),
(6, 11520, 53, 'Full! Submission', '9/19/16 16:21', 3079)
;WITH CTE
AS
(
SELECT
*,
ROW_NUMBER() OVER (ORDER BY IDX) RowId,
ROW_NUMBER() OVER (PARTITION BY DESCRIP ORDER BY IDX) RowIdByDescrip
FROM #Tbl
WHERE
EVENTNUM IN
(
3078, --Partial Submission
3089 -- Junior Reviewed
)
), CTE2
AS
(
SELECT
MIN(C.IDX) AS IDX
FROM
CTE C
GROUP BY
C.RowId - C.RowIdByDescrip,
C.DESCRIP
)
SELECT
R.RecordNumber,
R.IDX ,
R.StartDate ,
R.EndDate ,
R.DateDifff
FROM
(
SELECT
A.EVENTNUM,
A.RecordNumber,
A.DESCRIP,
A.IDX,
A.DATES StartDate,
LEAD(A.DATES) OVER (ORDER BY A.IDX) EndDate,
DATEDIFF(HOUR, A.DATES, LEAD(A.DATES) OVER (ORDER BY A.IDX)) AS DateDifff
FROM
#Tbl A INNER JOIN
CTE2 B ON A.IDX = B.IDX
) R
WHERE
R.EVENTNUM = 3078 --Partial Submission
ORDER BY R.RecordNumber
Result:
RecordNumber IDX StartDate EndDate DateDifff
------------ ----------- ---------------- ---------------- -----------
11515 13 2016-08-12 00:21 2016-08-12 15:52 15
11515 26 2016-08-18 15:24 2016-09-06 09:47 450
11515 34 2016-09-06 09:47 2016-09-01 08:20 -121
11515 46 2016-09-15 12:30 2016-09-14 13:53 -23
11520 38 2016-09-08 15:03 2016-09-11 08:30 65
11520 19 2016-08-20 00:42 2016-08-22 09:06 57

This is not an answer. Just too long for a comment.
I guess. I did not understand the question exactly. Let me tell you what i know.
Firstly, Sorting according to IDX
I just work between the two events, Partial Submission and Junior Reviewed
Result table: Partial Submission Start - Junior Reviewed END
RowNumber RecordNumber IDX DESCRIP DATES EVENTNUM RowId RowIdByDescrip
----------- ------------ ----------- -------------------------------------------------- ----------------------- ----------- -------------------- --------------------
1 11515 13 Partial Submission Start 2016-08-12 00:21:00.000 3078 1 1
1 11515 14 Junior Reviewed END 2016-08-12 15:52:00.000 3089 2 1
1 11520 19 Partial Submission Start 2016-08-20 00:42:00.000 3078 3 2
1 11520 22 Junior Reviewed END 2016-08-22 09:06:00.000 3089 4 2
2 11515 26 Partial Submission Start 2016-08-18 15:24:00.000 3078 5 3
2 11520 28 Partial Submission 2016-08-29 20:12:00.000 3078 6 4
3 11515 33 Partial Submission 2016-09-06 09:47:00.000 3078 7 5
4 11515 34 Partial Submission 2016-09-06 09:47:00.000 3078 8 6
2 11520 34 Junior Reviewed End 2016-09-01 08:20:00.000 3089 9 3
3 11520 38 Partial Submission Start 2016-09-08 15:03:00.000 3078 10 7
4 11520 39 Partial Submission 2016-09-08 15:03:00.000 3078 11 8
5 11515 39 Partial Submission 2016-09-09 13:19:00.000 3078 12 9
2 11515 40 Junior Reviewed End 2016-09-11 08:30:00.000 3089 13 4
6 11515 46 Partial Submission Start 2016-09-15 12:30:00.000 3078 14 10
3 11520 47 Junior Reviewed End 2016-09-14 13:53:00.000 3089 15 5
4 11520 52 Junior Reviewed 2016-09-17 10:51:00.000 3089 16 6
3 11515 54 Junior Reviewed 2016-09-17 10:01:00.000 3089 17 7
Result:
RecordNumber IDX StartDate EndDate DateDifff
------------ ----------- ---------------- ---------------- -----------
11515 13 2016-08-12 00:21 2016-08-12 15:52 15
11515 26 2016-08-18 15:24 2016-09-06 09:47 450
11515 34 2016-09-06 09:47 2016-09-01 08:20 -121
11515 46 2016-09-15 12:30 2016-09-14 13:53 -23
11520 38 2016-09-08 15:03 2016-09-11 08:30 65
11520 19 2016-08-20 00:42 2016-08-22 09:06 57

Related

Spark : How do I find the passengers who have been on more than 3 flights together

I have a dataset as the following
passengerId, flightId, from, to, date
56, 0, cg, ir, 2017-01-01
78, 0, cg, ir, 2017-01-01
12, 0, cg, ir, 2017-02-01
34, 0, cg, ir, 2017-02-01
51, 0, cg, ir, 2017-02-01
56, 1, ir, uk, 2017-01-02
78, 1, ir, uk, 2017-01-02
12, 1, ir, uk, 2017-02-02
34, 1, ir, uk, 2017-02-02
51, 1, ir, uk, 2017-02-02
56, 2, uk, in, 2017-01-05
78, 2, uk, in, 2017-01-05
12, 2, uk, in, 2017-02-05
34, 2, uk, in, 2017-02-05
51, 3, uk, in, 2017-02-05
I need to present a report in the following formats.
Passenger 1 ID Passenger 2 ID No_flights_together
56 78 6
12 34 8
… … …
Find the passengers who have been on more than N flights together within the range
Passenger 1 ID Passenger 2 ID No_Flights_Together From To
56 78 6 2017-01-01 2017-03-01
12 34 8 2017-04-05 2017-12-01
… … … … …
I'm not sure how to go about it. Help would be appreciated.
You can self-join on df1.passengerId < df2.passengerId along with same flightId and date, followed by performing the necessary count(*), min(date) and max(date) using groupBy/agg:
val df = Seq(
(56, 0, "2017-01-01"),
(78, 0, "2017-01-01"),
(12, 0, "2017-02-01"),
(34, 0, "2017-02-01"),
(51, 0, "2017-02-01"),
(56, 1, "2017-01-02"),
(78, 1, "2017-01-02"),
(12, 1, "2017-02-02"),
(34, 1, "2017-02-02"),
(51, 1, "2017-02-02"),
(56, 2, "2017-01-05"),
(78, 2, "2017-01-05"),
(12, 2, "2017-02-01"),
(34, 2, "2017-02-01"),
(51, 3, "2017-02-01")
).toDF("passengerId", "flightId", "date")
df.as("df1").join(df.as("df2"),
$"df1.passengerId" < $"df2.passengerId" &&
$"df1.flightId" === $"df2.flightId" &&
$"df1.date" === $"df2.date",
"inner"
).
groupBy($"df1.passengerId", $"df2.passengerId").
agg(count("*").as("flightsTogether"), min($"df1.date").as("from"), max($"df1.date").as("to")).
where($"flightsTogether" >= 3).
show
// +-----------+-----------+---------------+----------+----------+
// |passengerId|passengerId|flightsTogether| from| to|
// +-----------+-----------+---------------+----------+----------+
// | 12| 34| 3|2017-02-01|2017-02-02|
// | 56| 78| 3|2017-01-01|2017-01-05|
// +-----------+-----------+---------------+----------+----------+

How to set values from recursive query in PostgreSQL?

I have a query which gives a result:
id | manager_id | level | star_level
----+------------+-------+------------
1 | NULL | 1 | 0
2 | 1 | 2 | 1
3 | 2 | 3 | 1
4 | 3 | 4 | 2
5 | 4 | 5 | 2
6 | 5 | 6 | 2
7 | 6 | 7 | 3
8 | 7 | 8 | 3
9 | 8 | 9 | 4
(9 rows)
Here is the query:
WITH RECURSIVE parents AS (
SELECT e.id
, e.manager_id
, 1 AS level
, CAST(s.is_star AS INTEGER) AS star_level
FROM employees AS e
INNER JOIN skills AS s
ON e.skill_id = s.id
WHERE manager_id IS NULL
UNION ALL
SELECT e.id
, e.manager_id
, p.level + 1 AS level
, p.star_level + CAST(s.is_star AS INTEGER) AS star_level
FROM employees AS e
INNER JOIN skills AS s
ON e.skill_id = s.id
INNER JOIN parents AS p
ON e.manager_id = p.id
WHERE e.manager_id = p.id
)
SELECT *
FROM parents
;
Can you please tell me how you can change the query so that in the same query the level and star_level values ​​can be written to the corresponding columns?
Demo data:
create table Employees(
id INT,
name VARCHAR,
manager_id INT,
skill_id INT,
level INT,
star_level INT
);
create table Skills(
id INT,
name VARCHAR,
is_star BOOL
);
INSERT INTO Employees
(id, name, manager_id, skill_id)
VALUES
(1, 'Employee 1', NULL, 1),
(2, 'Employee 2', 1, 2),
(3, 'Employee 3', 2, 3),
(4, 'Employee 4', 3, 4),
(5, 'Employee 5', 4, 5),
(6, 'Employee 6', 5, 1),
(7, 'Employee 7', 6, 2),
(8, 'Employee 8', 7, 3),
(9, 'Employee 9', 8, 4)
;
INSERT INTO Skills
(id, name, is_star)
VALUES
(1, 'Skill 1', FALSE),
(2, 'Skill 2', TRUE),
(3, 'Skill 3', FALSE),
(4, 'Skill 4', TRUE),
(5, 'Skill 5', FALSE)
;
As a result, I need a query which will count level and star_level columns for Employees table and write their values (in Employees table) in one query.
You can use an UPDATE statement together with your CTE:
with recursive parents as (
... your original query goes here ...
)
update employees
set level = p.level,
star_level = p.star_level
from parents p
where employees.id = p.id;

Cannot UPDATE a CTE ( but SELECT works)?

According to the docs and previous answers on this forum, it should be trivial to UPDATE a CTE in a WITH query ... but I cannot succeed !
What am I doing wrong for example here in this simple example ?
WITH val AS (VALUES (1, 11), (2, 22), (3, 33))
UPDATE val SET column1=column1*2 ;
this throws me an error
ERROR: relation "val" does not exist
LINE 3: UPDATE val SET column1=column1*2 ;
^
whereas this example with SELECT works:
WITH val AS (VALUES (1, 11), (2, 22), (3, 33))
SELECT 2*column1 as new, column2 FROM val;
new | column2
-----+---------
2 | 11
4 | 22
6 | 33
(3 rows)

hive ql - posexplode with more than 2 columns

Have 3 arrays:
[21,31,41], [121,131,141], [1021,1031,1041]
Wanted to explode as:
21, 121, 1021
31, 131, 1031
41, 141, 1041
I have written like this:
select key1, key2, key3 from
lateral view posexplode(col_name_1) key1 as q1, key1
lateral view posexplode(col_name_2) key2 as q2, key2
lateral view posexplode(col_name_3) key3 as q3, key3
where q1=q2 and q1=q3;
Gets an exception as:
FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask

Use psycopg2 to do loop in postgresql

I use postgresql 8.4 to route a river network, and I want to use psycopg2 to loop through all data points in my river network.
#set up python and postgresql connection
import psycopg2
query = """
select *
from driving_distance ($$
select
gid as id,
start_id::int4 as source,
end_id::int4 as target,
shape_leng::double precision as cost
from network
$$, %s, %s, %s, %s
)
;"""
conn = psycopg2.connect("dbname = 'routing_template' user = 'postgres' host = 'localhost' password = '****'")
cur = conn.cursor()
while True:
i = 1
if i <= 2:
cur.execute(query, (i, 1000000, False, False))
i = i + 1
else:
break
rs = cur.fetchall()
conn.close()
print rs
The code above costs a lot of time to run even though I have set the maximum iterator i equals to 2, and the output is an error message contains garbage,
I am thinking that if postgresql can accept only one result at one time, so I tried to put this line in my loop,
rs(i) = cur.fetchall()
and the error message said that this line has bugs,
I know that I can't write code like rs(i), but I don't know the replacement to validate my assumption.
So should I save one result to a file first then use the next iterator to run the loop, and again and again?
I am working with postgresql 8.4, python 2.7.6 under Windows 8.1 x64.
Update#1
I can do loop using Clodoaldo Neto's code(thanks), and the result is like this,
[(1, 2, 0.0), (2, 2, 4729.33082850235), (3, 19, 4874.27571718902), (4, 3, 7397.215962901), (5, 4,
6640.31749097187), (6, 7, 10285.3869655786), (7, 7, 14376.1087618696), (8, 5, 15053.164236979), (9, 10, 16243.5973710466), (10, 8, 19307.3024368889), (11, 9, 21654.8669532788), (12, 11, 23522.6224229233), (13, 18, 29706.6964721152), (14, 21, 24034.6792693279), (15, 18, 25408.306370489), (16, 20, 34204.1769580924), (17, 11, 26465.8348728118), (18, 20, 38596.7313209197), (19, 13, 35184.9925532175), (20, 16, 36530.059646027), (21, 15, 35789.4069722436), (22, 15, 38168.1750567026)]
[(1, 2, 4729.33082850235), (2, 2, 0.0), (3, 19, 144.944888686669), (4, 3, 2667.88513439865), (5, 4, 1910.98666246952), (6, 7, 5556.05613707624), (7, 7, 9646.77793336723), (8, 5, 10323.8334084767), (9, 10, 11514.2665425442), (10, 8, 14577.9716083866), (11, 9, 16925.5361247765), (12, 11, 18793.2915944209), (13, 18, 24977.3656436129), (14, 21, 19305.3484408255), (15, 18, 20678.9755419867), (16, 20, 29474.8461295901), (17, 11, 21736.5040443094), (18, 20, 33867.4004924174), (19, 13, 30455.6617247151), (20, 16, 31800.7288175247), (21, 15, 31060.0761437413), (22, 15, 33438.8442282003)]
but if I want to get this look of output,
(1, 2, 7397.215962901)
(2, 2, 2667.88513439865)
(3, 19, 2522.94024571198)
(4, 3, 0.0)
(5, 4, 4288.98201949483)
(6, 7, 7934.05149410155)
(7, 7, 12024.7732903925)
(8, 5, 12701.828765502)
(9, 10, 13892.2618995696)
(10, 8, 16955.9669654119)
(11, 9, 19303.5314818018)
(12, 11, 21171.2869514462)
(13, 18, 27355.3610006382)
(14, 21, 21683.3437978508)
(15, 18, 23056.970899012)
(16, 20, 31852.8414866154)
(17, 11, 24114.4994013347)
(18, 20, 36245.3958494427)
(19, 13, 32833.6570817404)
(20, 16, 34178.72417455)
(21, 15, 33438.0715007666)
(22, 15, 35816.8395852256)
What should I make a little change in the code?
rs = []
while True:
i = 1
if i <= 2:
cur.execute(query, (i, 1000000, False, False))
rs.extend(cur.fetchall())
i = i + 1
else:
break
conn.close()
print rs
If it is just a counter that breaks that loop then
rs = []
i = 1
while i <= 2:
cur.execute(query, (i, 1000000, False, False))
rs.extend(cur.fetchall())
i = i + 1
conn.close()
print rs