Fun with row_number() - Redshift Postgres - Time sequence and restarting numbering

Fun with row_number() - Redshift Postgres - Time sequence and restarting numbering - postgresql

I am looking to number streaks within my data, the goal is to find where at least 3 consecutive streaks are flagged by the np.
Here is a subset of my data:
drop table if exists bi_test;
create table test (id varchar(12),rd date,np decimal);
insert into test
select 'aaabbbccc', '2016-07-25'::date, 0 union all
select 'aaabbbccc', '2016-08-01'::date, 0 union all
select 'aaabbbccc', '2016-08-08'::date, 0 union all
select 'aaabbbccc', '2016-08-15'::date, 0 union all
select 'aaabbbccc', '2016-08-22'::date, 1 union all
select 'aaabbbccc', '2016-08-29'::date, 0 union all
select 'aaabbbccc', '2016-09-05'::date, 1 union all
select 'aaabbbccc', '2016-09-12'::date, 0 union all
select 'aaabbbccc', '2016-09-19'::date, 1;
I am hoping to use row_number() and count(), but it doesn't seem to be giving me the result I want.
select
*
,row_number() over (partition by t.id order by t.rd) all_ctr
,count(t.id) over (partition by t.id) all_count
,row_number() over (partition by t.id,t.np order by t.rd) np_counter
,count(t.id) over (partition by t.id,t.np) np_non_np
from
bi_adhoc.test t
order by
t.rd;
Here are my results, and the desired result:
id rd np all_ctr all_count np_counter np_non_np **Desired**
aaabbbccc 7/25/2016 0 1 9 1 6 **1**
aaabbbccc 8/1/2016 0 2 9 2 6 **2**
aaabbbccc 8/8/2016 0 3 9 3 6 **3**
aaabbbccc 8/15/2016 0 4 9 4 6 **4**
aaabbbccc 8/22/2016 1 5 9 1 3 **1**
aaabbbccc 8/29/2016 0 6 9 5 6 **1**
aaabbbccc 9/5/2016 1 7 9 2 3 **1**
aaabbbccc 9/12/2016 0 8 9 6 6 **1**
aaabbbccc 9/19/2016 1 9 9 3 3 **1**

One way to do this would be to calculate the lag (np) value in a CTE, and then compare the current np and lagged np to detect a streak. This may not be the most optimal way, but seems to work fine.
with source_cte as
(
select
*
,row_number() over (partition by t.id order by t.rd) row_num
,lag(np,1) over (partition by t.id order by t.rd) as prev_np
from
bi_adhoc.test t
)
, streak_cte as
(
select
*,
case when np=prev_np or prev_np is NULL then 1 else 0 end as is_streak
from
source_cte
)
select
*,
case when is_streak=1 then dense_rank() over (partition by id, is_streak order by rd) else 1 end as desired
from
streak_cte
order by
rd;

First, I added some additional data to help fully illustrate the problem...
drop table if exists bi_adhoc.test;
create table bi_adhoc.test (id varchar(12),period date,hit decimal);
insert into bi_adhoc.test
select 'aaabbbccc', '2016-07-25'::date, 0 union all
select 'aaabbbccc', '2016-08-01'::date, 0 union all
select 'aaabbbccc', '2016-08-08'::date, 0 union all
select 'aaabbbccc', '2016-08-15'::date, 1 union all
select 'aaabbbccc', '2016-08-22'::date, 1 union all
select 'aaabbbccc', '2016-08-29'::date, 0 union all
select 'aaabbbccc', '2016-09-05'::date, 0 union all
select 'aaabbbccc', '2016-09-12'::date, 1 union all
select 'aaabbbccc', '2016-09-19'::date, 0 union all
select 'aaabbbccc', '2016-09-26'::date, 1 union all
select 'aaabbbccc', '2016-10-03'::date, 1 union all
select 'aaabbbccc', '2016-10-10'::date, 1 union all
select 'aaabbbccc', '2016-10-17'::date, 1 union all
select 'aaabbbccc', '2016-10-24'::date, 1 union all
select 'aaabbbccc', '2016-10-31'::date, 0 union all
select 'aaabbbccc', '2016-11-07'::date, 0 union all
select 'aaabbbccc', '2016-11-14'::date, 0 union all
select 'aaabbbccc', '2016-11-21'::date, 0 union all
select 'aaabbbccc', '2016-11-28'::date, 0 union all
select 'aaabbbccc', '2016-12-05'::date, 1 union all
select 'aaabbbccc', '2016-12-12'::date, 1;
Then the key was to figure out what a streak was and how to identify each streak so I could partition the data to have something to partition the data.
select
*
,case
when t1.hit = 1 then row_number() over (partition by t1.id,t1.hit_partition order by t1.period)
when t1.hit = 0 then row_number() over (partition by t1.id,t1.miss_partition order by t1.period)
else null
end desired
from
(
select
*
,row_number() over (partition by t.id order by t.id,t.period)
,case
when t.hit = 1 then row_number() over (partition by t.id, t.hit order by t.period)
else null
end hit_counter
,case
when t.hit = 1 then row_number() over (partition by t.id order by t.id,t.period) - row_number() over (partition by t.id, t.hit order by t.period)
else null
end hit_partition
,case
when t.hit = 0 then row_number() over (partition by t.id, t.hit order by t.period)
else null
end miss_counter
,case
when t.hit = 0 then row_number() over (partition by t.id order by t.id,t.period) - row_number() over (partition by t.id, t.hit order by t.period)
else null
end miss_partition
from
bi_adhoc.test t
) t1
order by
t1.id
,t1.period;
The result of this:
id period hit row_number hit_counter hit_partition miss_counter miss_partition desired
aaabbbccc 2016-07-25 0 1 NULL NULL 1 0 1
aaabbbccc 2016-08-01 0 2 NULL NULL 2 0 2
aaabbbccc 2016-08-08 0 3 NULL NULL 3 0 3
aaabbbccc 2016-08-15 1 4 1 3 NULL NULL 1
aaabbbccc 2016-08-22 1 5 2 3 NULL NULL 2
aaabbbccc 2016-08-29 0 6 NULL NULL 4 2 1
aaabbbccc 2016-09-05 0 7 NULL NULL 5 2 2
aaabbbccc 2016-09-12 1 8 3 5 NULL NULL 1
aaabbbccc 2016-09-19 0 9 NULL NULL 6 3 1
aaabbbccc 2016-09-26 1 10 4 6 NULL NULL 1
aaabbbccc 2016-10-03 1 11 5 6 NULL NULL 2
aaabbbccc 2016-10-10 1 12 6 6 NULL NULL 3
aaabbbccc 2016-10-17 1 13 7 6 NULL NULL 4
aaabbbccc 2016-10-24 1 14 8 6 NULL NULL 5
aaabbbccc 2016-10-31 0 15 NULL NULL 7 8 1
aaabbbccc 2016-11-07 0 16 NULL NULL 8 8 2
aaabbbccc 2016-11-14 0 17 NULL NULL 9 8 3
aaabbbccc 2016-11-21 0 18 NULL NULL 10 8 4
aaabbbccc 2016-11-28 0 19 NULL NULL 11 8 5
aaabbbccc 2016-12-05 1 20 9 11 NULL NULL 1
aaabbbccc 2016-12-12 1 21 10 11 NULL NULL 2

Related

How to enumerate rows by division?

I have the following table
id num sub_id
1 3 1
1 5 2
1 1 1
1 4 2
2 1 5
2 2 5
I want to get this result
id num sub_id number
1 3 1 1
1 5 2 2
1 1 1 1
1 4 2 2
2 1 5 1
2 2 5 1
I tried to do this row_number() over (partition by id order by num,sub_id DESC) but th result is obviosly differs

I don't understand your business because you don't explain your logic and information about that, but maybe this query helps you?
Result and info: dbfiddle
with recursive
cte_r as (
select id,
num,
sub_id,
row_number() over () as rn
from test),
cte as (
select id,
num,
sub_id,
rn,
rn as grp
from cte_r
where rn = 1
union all
select cr.id,
cr.num,
cr.sub_id,
cr.rn,
case
when cr.id != c.id then 1
when cr.id = c.id and cr.sub_id = c.sub_id then c.grp
when cr.id = c.id and cr.sub_id > c.sub_id then c.grp + 1
when cr.id = c.id and cr.sub_id < c.sub_id then 1
end
from cte c,
cte_r cr
where c.rn = cr.rn - 1)
select id,
num,
sub_id,
grp
from cte
order by id

It looks like you actually want to ignore the num column and then use DENSE_RANK on sub_id:
SELECT *, dense_rank() AS number OVER (PARTITION BY id ORDER BY sub_id) FROM …;

Join data from 3 tables

table_1
id customer_id
---------------
1 1
2 2
3 1
4 1
5 3
6 4
table_2
id id_table1 device_mac
-------------------------------------
1 1 aa:bb:cc:dd:ee:ff
2 1 11:22:33:44:55:66
3 2 1a:2a:3a:4a:5a:6a
4 3 2b:3b:4b:5b:6b:7b
5 4 3c:4c:5c:6c:7c:8c
6 2 4d:5d:6d:7d:8d:9d
table_3
id device_mac device_name
---------------------------------------
1 aa:bb:cc:dd:ee:ff loc1
2 11:22:33:44:55:66 loc2
3 1a:2a:3a:4a:5a:6a loc3
4 2b:3b:4b:5b:6b:7b loc4
5 3c:4c:5c:6c:7c:8c loc5
6 4d:5d:6d:7d:8d:9d loc6
I have a requirement where I need to get the below details by customer_id using python and postgres db.
ex: get details with customer_id = 1
table1_id count(table_2) device_names
1 2 [loc1, loc2]
3 1 [loc4]
4 1 [loc5]
I tried with individual queries using python:
select id from table_1 where customer_id=1;
for t1_id from ids above table_1 data:
select * from table_2 where table_id=t1_id
for t2_data from ids above table2_data:
select * from table_3 where device_mac = t2_data.device_mac
# generate expected rows
Can I just do this in a signle query?

Join the tables and aggregate:
SELECT t1.id,
COUNT(*) count,
STRING_AGG(t3.device_name, ',' ORDER BY t3.device_name) device_names
FROM table_1 t1
INNER JOIN table_2 t2 ON t2.id_table1 = t1.id
INNER JOIN table_3 t3 ON t3.device_mac = t2.device_mac
WHERE t1.customer_id = 1
GROUP BY t1.id
If you are getting duplicate device_names you may use DISTINCT:
STRING_AGG(DISTINCT t3.device_name, ',' ORDER BY t3.device_name) device_names
See the demo.
Results:
id
count
device_names
1
2
loc1,loc2
3
1
loc4
4
1
loc5

SQL - add sequential counter column starting at condition

I have a table:
id market
1 mkt1
2 mkt2
3 mkt1
4 special
5 mkt2
6 mkt2
7 special
How can I select all columns from the table while also adding a sequential counter column, which starts counting once a condition has been triggered? In this example, when market=="special":
id market count
1 mkt1 0
2 mkt2 0
3 mkt1 0
4 special 1
5 mkt2 2
6 mkt2 3
7 special 4

Here's one option using row_number with union all:
with cte as (
select min(id) as id from t where market = 'special'
)
select t.id, t.market, 0 rn
from t join cte on t.id < cte.id
union all
select t.id, t.market, row_number() over (order by t.id) rn
from t join cte on t.id >= cte.id
Online Demo
Edited to use min after your edits...

Difference of dates using lag function postgres

I have customer ID and transaction Date(yyyy-mm-dd) as shown below
Cust_id Trans_date
1 2017-01-01
1 2017-01-03
1 2017-01-06
2 2017-01-01
2 2017-01-04
2 2017-01-05
I need to find the difference in no_of_days for each transaction grouped at Cust_id
I tried with date_diff and extract using lag function, but I am getting error
function lag(timestamp without time zone) may only be called as a window function
I looking for the result as below
Cust_id Trans_date difference
1 2017-01-01 0
1 2017-01-03 3
1 2017-01-05 2
2 2017-01-01 0
2 2017-01-04 4
2 2017-01-05 1
How to find the difference in postgreSQL?

This is what you want?
with t(Cust_id,Trans_date) as(
select 1 ,'2017-01-01'::timestamp union all
select 1 ,'2017-01-03'::timestamp union all
select 1 ,'2017-01-06'::timestamp union all
select 2 ,'2017-01-01'::timestamp union all
select 2 ,'2017-01-04'::timestamp union all
select 2 ,'2017-01-05'::timestamp
)
select
Cust_id,
Trans_date,
coalesce(Trans_date::date - lag(Trans_date::date) over(partition by Cust_id order by Trans_date), 0) as difference
from t;

One SQL Stored Procedure to get cut off date of two different cut off date format

I have one system that read from two client databases. For the two clients, both of them have different format of cut off date:
1) Client A: Every month at 15th. Example: 15-12-2016.
2) Client B: Every first day of the month. Example: 1-1-2017.
The cut off date are stored in the table as below:
Now I need a single query to retrieve the current month's cut off date of the client. For instance, today is 15-2-2017, so the expected cut off date for both clients should be as below:
1) Client A: 15-1-2017
2) Client B: 1-2-2017
How can I accomplish this in a single Stored Procedure? For client B, I can always get the first day of the month. But this can't apply to client A since their cut off is last month's date.

Might be something like this you are looking for:
DECLARE #DummyClient TABLE(ID INT IDENTITY,ClientName VARCHAR(100));
DECLARE #DummyDates TABLE(ClientID INT,YourDate DATE);
INSERT INTO #DummyClient VALUES
('A'),('B');
INSERT INTO #DummyDates VALUES
(1,{d'2016-12-15'}),(2,{d'2017-01-01'});
WITH Numbers AS
( SELECT 0 AS Nr
UNION ALL SELECT 1
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5
UNION ALL SELECT 6
UNION ALL SELECT 7
UNION ALL SELECT 9
UNION ALL SELECT 10
UNION ALL SELECT 11
UNION ALL SELECT 12
UNION ALL SELECT 13
UNION ALL SELECT 14
UNION ALL SELECT 15
UNION ALL SELECT 16
UNION ALL SELECT 17
UNION ALL SELECT 18
UNION ALL SELECT 19
UNION ALL SELECT 20
UNION ALL SELECT 21
UNION ALL SELECT 22
UNION ALL SELECT 23
UNION ALL SELECT 24
)
,ClientExt AS
(
SELECT c.*
,MIN(d.YourDate) AS MinDate
FROM #DummyClient AS c
INNER JOIN #DummyDates AS d ON c.ID=d.ClientID
GROUP BY c.ID,c.ClientName
)
SELECT ID,ClientName,D
FROM ClientExt
CROSS APPLY(SELECT DATEADD(MONTH,Numbers.Nr,MinDate)
FROM Numbers) AS RunningDate(D);
The result
ID Cl Date
1 A 2016-12-15
1 A 2017-01-15
1 A 2017-02-15
1 A 2017-03-15
1 A 2017-04-15
1 A 2017-05-15
1 A 2017-06-15
1 A 2017-07-15
1 A 2017-09-15
1 A 2017-10-15
1 A 2017-11-15
1 A 2017-12-15
1 A 2018-01-15
1 A 2018-02-15
1 A 2018-03-15
1 A 2018-04-15
1 A 2018-05-15
1 A 2018-06-15
1 A 2018-07-15
1 A 2018-08-15
1 A 2018-09-15
1 A 2018-10-15
1 A 2018-11-15
1 A 2018-12-15
2 B 2017-01-01
2 B 2017-02-01
2 B 2017-03-01
2 B 2017-04-01
2 B 2017-05-01
2 B 2017-06-01
2 B 2017-07-01
2 B 2017-08-01
2 B 2017-10-01
2 B 2017-11-01
2 B 2017-12-01
2 B 2018-01-01
2 B 2018-02-01
2 B 2018-03-01
2 B 2018-04-01
2 B 2018-05-01
2 B 2018-06-01
2 B 2018-07-01
2 B 2018-08-01
2 B 2018-09-01
2 B 2018-10-01
2 B 2018-11-01
2 B 2018-12-01
2 B 2019-01-01

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Fun with row_number() - Redshift Postgres - Time sequence and restarting numbering - postgresql

Related

How to enumerate rows by division?

Join data from 3 tables

SQL - add sequential counter column starting at condition

Difference of dates using lag function postgres

One SQL Stored Procedure to get cut off date of two different cut off date format

Categories

Resources