is it possible to make a statistic with the queries starting from the data so configured?
Table a: registry
id (key)
name
able b: holidays
id (key)
id_anagrafica (foreign key)
data_start
data_end
Query:
SELECT b.id, a.name, b.start_date, b.end_date
FROM registry to INNER JOIN
holidays b ON (a.id = b.id_anagrafica)
WHERE b.start_date> = getdate ()
So doing I get:
id, name, start_date, end_date
1, Mario, 01/06/2018, 30/06/2018
2, Marino, 08/06/2018, 25/06/2018
3, Maria, 01/07/2018, 05/07/2018
-
-
-
Having only a start_date and end_date I can not know in a day how many people are on holidays.
What I need is:
data, num_pers_in_ferie
01/06/2018, 1
06/02/2018, 1
03/06/2018, 1
-
-
08/06/2018, 2
Can you help me?
Thanks in advance
Check the approach below
create table #registry (id int, name nvarchar(50))
insert into #registry values
(1, 'Mario'),
(2, 'Marino'),
(3, 'Maria')
create table #holidays (id int,id_anagrafica int,data_start date,data_end date)
insert into #holidays
select id, id, '2018-06-01', '2018-06-30'
from #registry
update #holidays set data_start = dateadd(day, 20, data_start), data_end = dateadd(day, -5, data_end)
where id = 2
update #holidays set data_start = dateadd(day, 14, data_start)--, data_end = dateadd(day, -10, data_end)
where id = 3
SELECT b.id, a.name, b.data_start, b.data_end
FROM #registry a
INNER JOIN
#holidays b ON (a.id = b.id_anagrafica)
WHERE b.data_start > = getdate ()
DECLARE #startDate DATETIME=CAST(MONTH(GETDATE()) AS VARCHAR) + '/' + '01/' + + CAST(YEAR(GETDATE()) AS VARCHAR) -- mm/dd/yyyy
DECLARE #endDate DATETIME= GETDATE() -- mm/dd/yyyy
select [DATA] = convert(date, DATEADD(Day,Number,#startDate)),
--se ti serve in italiano usa la riga sotto
--[DATA] = CONVERT(varchar, DATEADD(Day,Number,#startDate), 103)
SUM(case when DATEADD(Day,Number,#startDate) between data_start and data_end then 1 else 0 end) Pers_in_Ferie
from master..spt_values c,
#registry a
INNER JOIN
#holidays b ON (a.id = b.id_anagrafica)
where c.Type='P' and DATEADD(Day,Number,#startDate) >=data_start and DATEADD(Day,Number,#startDate) <=data_end
group by DATEADD(Day,Number,#startDate)
order by [DATA]
drop table #holidays
drop table #registry
Output:
DATA Pers_in_Ferie
---------- -------------
2018-06-01 1
2018-06-02 1
2018-06-03 1
2018-06-04 1
2018-06-05 1
2018-06-06 1
2018-06-07 1
2018-06-08 1
2018-06-09 1
2018-06-10 1
2018-06-11 1
2018-06-12 1
2018-06-13 1
2018-06-14 1
2018-06-15 2
2018-06-16 2
2018-06-17 2
2018-06-18 2
2018-06-19 2
2018-06-20 2
2018-06-21 3
2018-06-22 3
2018-06-23 3
2018-06-24 3
2018-06-25 3
2018-06-26 2
2018-06-27 2
2018-06-28 2
2018-06-29 2
2018-06-30 2
(30 rows affected)
Related
I have a table of datestamped events that I need to bundle into 7-day groups, starting with the earliest occurrence of each event_id.
The final output should return each bundle's start and end date and 'value' column of the most recent event from each bundle.
There is no predetermined start date, and the '7-day' windows are arbitrary, not 'week of the year'.
I've tried a ton of examples from other posts but none quite fit my needs or use things I'm not sure how to refactor for BigQuery
Sample Data;
Event_Id
Event_Date
Value
1
2022-01-01
010203
1
2022-01-02
040506
1
2022-01-03
070809
1
2022-01-20
101112
1
2022-01-23
131415
2
2022-01-02
161718
2
2022-01-08
192021
3
2022-02-12
212223
Expected output;
Event_Id
Start_Date
End_Date
Value
1
2022-01-01
2022-01-03
070809
1
2022-01-20
2022-01-23
131415
2
2022-01-02
2022-01-08
192021
3
2022-02-12
2022-02-12
212223
You might consider below.
CREATE TEMP FUNCTION cumsumbin(a ARRAY<INT64>) RETURNS INT64
LANGUAGE js AS """
bin = 0;
a.reduce((c, v) => {
if (c + Number(v) > 6) { bin += 1; return 0; }
else return c += Number(v);
}, 0);
return bin;
""";
WITH sample_data AS (
select 1 event_id, DATE '2022-01-01' event_date, '010203' value union all
select 1 event_id, '2022-01-02' event_date, '040506' value union all
select 1 event_id, '2022-01-03' event_date, '070809' value union all
select 1 event_id, '2022-01-20' event_date, '101112' value union all
select 1 event_id, '2022-01-23' event_date, '131415' value union all
select 2 event_id, '2022-01-02' event_date, '161718' value union all
select 2 event_id, '2022-01-08' event_date, '192021' value union all
select 3 event_id, '2022-02-12' event_date, '212223' value
),
binning AS (
SELECT *, cumsumbin(ARRAY_AGG(diff) OVER w1) bin
FROM (
SELECT *, DATE_DIFF(event_date, LAG(event_date) OVER w0, DAY) AS diff
FROM sample_data
WINDOW w0 AS (PARTITION BY event_id ORDER BY event_date)
) WINDOW w1 AS (PARTITION BY event_id ORDER BY event_date)
)
SELECT event_id,
MIN(event_date) start_date,
ARRAY_AGG(
STRUCT(event_date AS end_date, value) ORDER BY event_date DESC LIMIT 1
)[OFFSET(0)].*
FROM binning GROUP BY event_id, bin;
I have a postgres table test_table that looks like this:
date | test_hour
------------+-----------
2000-01-01 | 1
2000-01-01 | 2
2000-01-01 | 3
2000-01-02 | 1
2000-01-02 | 2
2000-01-02 | 3
2000-01-02 | 4
2000-01-03 | 1
2000-01-03 | 2
I need to select all the dates which don't have test_hour = 1, 2, and 3, so it should return
date
------------
2000-01-03
Here is what I have tried:
SELECT date FROM test_table WHERE test_hour NOT IN (SELECT generate_series(1,3));
But that only returns dates that have extra hours beyond 1, 2, 3
You can use aggregation and conditional HAVING clauses, like so:
SELECT mydate
FROM mytable
GROUP BY mydate
HAVING
MAX(CASE WHEN test_hour = 1 THEN 1 END) != 1
OR MAX(CASE WHEN test_hour = 2 THEN 1 END) != 1
OR MAX(CASE WHEN test_hour = 3 THEN 1 END) != 1
Another possibility would be to join it against the series (or another subquery containing the hours) and do a [distinct] count on the hours aggregatet per date:
select date from tst
inner join (select generate_series(1,3) "hour") hours on hours.hour = tst.hour
group by tst.date
having count(distinct tst.hour) < 3;
or
select date from tst
where hour in (select generate_series(1,3))
group by date
having count(distinct tst.hour) < 3;
[You don't need the distinct if date/hour combinations in Your table are unique]
A solution using set difference, giving you exactly the rows that are missing:
(SELECT DISTINCT
date, all_hour
FROM test_table
CROSS JOIN generate_series(1,3) all_hour)
EXCEPT
(TABLE test_table)
And a solution using an array aggregate and the array contains operator:
SELECT date
FROM test_table
GROUP BY date
HAVING NOT array_agg(test_hour) #> ARRAY(SELECT generate_series(1,3))
(online demos)
I have a table customer_history which log customer_id and modification_date.
When customer_id is not modified there is no entry in the table
I can find when customer_id haven't been modified (=last_date_with_no_modification). I look for when the date is missing (= Gaps and Islands problem).
But in the same query if no date is missing the value last_date_with_no_modification should
be DATEADD(DAY,-1,min(modification_date)) for the customer_id.
I don't know how to add this last condition in my SQL query?
I use following tables:
"Customer_history" table:
customer_id modification_date
1 2017-12-20
1 2017-12-19
1 2017-12-17
2 2017-12-20
2 2017-12-18
2 2017-12-17
2 2017-12-15
3 2017-12-20
3 2017-12-19
"#tmp_calendar" table:
date
2017-12-15
2017-12-16
2017-12-17
2017-12-18
2017-12-19
2017-12-20
Query used to qet gap date:
WITH CTE_GAP AS
(SELECT ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM customer_history ch )
SELECT cg.customer_id,
DATEADD(DAY,1,MAX(cg.GapStart)) as last_date_with_no_modification
FROM CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY,1,cg.GapStart) AND DATEADD(DAY,-1,cg.GapEnd)
GROUP BY cg.customer_id
Result:
customer_id last_date_with_no_modification
1 2017-12-18
2 2017-12-19
3 2017-12-19 (Row missing)
How to get customer_id 3?
Something this should work:
WITH CTE_GAP
AS
(
SELECT
ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM #customer_history ch
)
SELECT DISTINCT
C.customer_id
, ISNULL(LD.last_date_with_no_modification, LD_NO_GAP.last_date_with_no_modification) last_date_with_no_modification
FROM
customer_history C
LEFT JOIN
(
SELECT
cg.customer_id,
DATEADD(DAY, 1, MAX(cg.GapStart)) last_date_with_no_modification
FROM
CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE
cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY, 1, cg.GapStart) AND DATEADD(DAY, -1, cg.GapEnd)
GROUP BY cg.customer_id
) LD
ON C.customer_id = LD.customer_id
LEFT JOIN
(
SELECT
customer_id
, DATEADD(DAY, -1, MIN(modification_date)) last_date_with_no_modification
FROM customer_history
GROUP BY customer_id
) LD_NO_GAP
ON C.customer_id = LD_NO_GAP.customer_id
I have a very strange request. I'm trying to create an SQL statement to do this. I know I can create a cursor but trying to see if it can be done is SQL
Here is my source data.
1 - 1:00 PM
2 - 1:02 PM
3 - 1:03 PM
4 - 1:05 PM
5 - 1:06 PM
6 - 1:09 PM
7 - 1:10 PM
8 - 1:12 PM
9 - 1:13 PM
10 - 1:15 PM
I'm trying to create a function that if I pass an interval it will return the resulting data set.
For example I pass in 5 minutes, then the records I would want back are records 1, 4, 7, & 10.
Is there a way to do this in SQL. Note: if record 4 (1:05 PM wasn't in the data set I would expect to see 1, 5, & 8. I would see 5 because it is the next record with a time greater than 5 minutes from record 1 and record 8 because it is the next record with a time greater than 5 minutes from record 5.
Here is a create script that you should have provided:
declare #Table1 TABLE
([id] int, [time] time)
;
INSERT INTO #Table1
([id], [time])
VALUES
(1, '1:00 PM'),
(2, '1:02 PM'),
(3, '1:03 PM'),
(4, '1:05 PM'),
(5, '1:06 PM'),
(6, '1:09 PM'),
(7, '1:10 PM'),
(8, '1:12 PM'),
(9, '1:13 PM'),
(10, '1:15 PM')
;
I would do this with this query:
declare #interval int
set #interval = 5
;with next_times as(
select id, [time], (select min([time]) from #Table1 t2 where t2.[time] >= dateadd(minute, #interval, t1.[time])) as next_time
from #Table1 t1
),
t as(
select id, [time], next_time
from next_times t1 where id=1
union all
select t3.id, t3.[time], t3.next_time
from t inner join next_times t3
on t.next_time = t3.[time]
)
select id, [time] from t order by 1
-- results:
id time
----------- ----------------
1 13:00:00.0000000
4 13:05:00.0000000
7 13:10:00.0000000
10 13:15:00.0000000
(4 row(s) affected)
It works even for the situations with a missing interval:
-- delete the 1:05 PM record
delete from #table1 where id = 4;
;with next_times as(
select id, [time], (select min([time]) from #Table1 t2 where t2.[time] >= dateadd(minute, #interval, t1.[time])) as next_time
from #Table1 t1
),
t as(
select id, [time], next_time
from next_times t1 where id=1
union all
select t3.id, t3.[time], t3.next_time
from t inner join next_times t3
on t.next_time = t3.[time]
)
select id, [time] from t order by 1;
-- results:
id time
----------- ----------------
1 13:00:00.0000000
5 13:06:00.0000000
8 13:12:00.0000000
(3 row(s) affected)
Table1
sub-id ref-id Name
1 1 Project 1
2 1 Project 2
3 2 Project 3
4 2 Project 4
Table2
sub-id ref-id log_stamp Recepient log_type
----------------------------------------------------
1 1 06/06/2011 person A 1
1 1 06/14/2011 person B 2
1 1 06/16/2011 person C 2
1 1 06/17/2011 person D 3
2 1 06/18/2011 person E 2
2 1 06/19/2011 person F 2
3 2 06/20/2011 person G 1
4 2 06/23/2011 person H 3
Result
Name ref-id start_date Recepient latest_comment Recepient completion_date Receipient
Project1 1 06/06/2011 person A 06/19/2011 person F 06/17/2011 person D
Project3 2 06/20/2011 person G NULL NULL 06/23/2011 person H
log_type of 1 stands for start_date
log_type of 2 stands for latest_comment
log_type of 3 stands for completion_date
The Name of the project is just the name of the top-most name in the same group of ref-id
have tried this for now
;with T as (select
Table2.ref-id,
Table2.log_stamp,
Table2 log.log_type
when 1 then '1'
when 2 then '2'
when 3 then '3'
end as title
from
Submission sb inner join submission_log log on Table1.[sub-id] = Table2.[sub-id]
)
select * from T
pivot (
max(log_stamp)
for title IN ([1],[2],[3],[5],[6],[9],[11])
I was unable to do it as a pivot, I dont think it is possible as described
DECLARE #table1 TABLE (sub_id INT, ref_id INT, name VARCHAR(50))
INSERT #table1 VALUES (1, 1, 'Project 1')
INSERT #table1 VALUES (2, 1, 'Project 2')
INSERT #table1 VALUES (3, 2, 'Project 3' )
INSERT #table1 VALUES (4, 2, 'Project 4')
DECLARE #Table2 TABLE (sub_id INT, ref_id INT, log_stamp DATETIME, recepient VARCHAR(10), logtype INT)
INSERT #table2 VALUES(1,1,'06/06/2011','person A',1)
INSERT #table2 VALUES(1,1,'06/14/2011','person B',2)
INSERT #table2 VALUES(1,1,'06/16/2011','person C',2)
INSERT #table2 VALUES(1,1,'06/17/2011','person D',3)
INSERT #table2 VALUES(2,1,'06/18/2011','person E',2)
INSERT #table2 VALUES(2,1,'06/19/2011','person F',2)
INSERT #table2 VALUES(3,2,'06/20/2011','person G',1)
INSERT #table2 VALUES(3,2,'06/23/2011','person H',3)
;WITH a as (
SELECT RN = ROW_NUMBER() OVER (PARTITION BY t1.sub_id, t1.ref_id, t1.name, t2.logtype ORDER BY log_stamp DESC), t1.sub_id, t1.ref_id, t1.name, t2.Recepient , t2.logtype ,log_stamp
FROM #table1 t1 JOIN #table2 t2 ON t1.ref_id = t2.ref_id AND
t1.sub_id = t2.sub_id),
b as (SELECT * FROM a WHERE RN = 1)
SELECT b1.name, b1.ref_id,b1.log_stamp start_date , b1.Recepient, b2.log_stamp latest_comment , b2.Recepient, b3.log_stamp completion_date , b3.Recepient
FROM b b1
LEFT JOIN b b2 ON b1.sub_id=b2.sub_id AND b1.ref_id = b2.ref_id AND b2.logtype = 2
LEFT JOIN b b3 ON b1.sub_id=b3.sub_id AND b1.ref_id = b3.ref_id AND b3.logtype = 3
WHERE b1.logtype = 1
Result:
name ref_id start_date Recepient latest_comment Recepient completion_date Recepient
------------ ----------- ----------------------- ---------- ----------------------- ---------- ----------------------- ----------
Project 1 1 2011-06-06 00:00:00.000 person A 2011-06-16 00:00:00.000 person C 2011-06-17 00:00:00.000 person D
Project 3 2 2011-06-20 00:00:00.000 person G NULL NULL 2011-06-23 00:00:00.000 person H