postgres aggregate subset from group by rows

postgres aggregate subset from group by rows - postgresql

I'm trying to evaluate user loyalty bonuses balance when bonuses burns after half-year inactivity. I want my sum consist of ord's 4, 5 and 6 for user 1.
create table transactions (
user int,
ord int, -- transaction date replacement
amount int,
lag interval -- after previous transaction
);
insert into transactions values
(1, 1, 10, '1h'::interval),
(1, 2, 10, '.5y'::interval),
(1, 3, 10, '1h'::interval),
(1, 4, 10, '.5y'::interval),
(1, 5, 10, '.1h'::interval),
(1, 6, 10, '.1h'::interval),
(2, 1, 10, '1h'::interval),
(2, 2, 10, '.5y'::interval),
(2, 3, 10, '.1h'::interval),
(2, 4, 10, '.1h'::interval),
(3, 1, 10, '1h'::interval),
;
select user, sum(
amount -- but starting from last '.5y'::interval if any otherwise everything counts
) from transactions group by user
user | sum(amount)
--------------------
1 | 30 -- (4+5+6), not 50, not 60
2 | 30 -- (2+3+4), not 40
3 | 10

try this:
with cte as(
select *,
case when (lead(lag) over (partition by user_ order by ord)) >= interval '.5 year'
then 1 else 0 end "flag" from test
),
cte1 as (
select *,
case when flag=(lag(flag,1) over (partition by user_ order by ord)) then 0 else 1 end "flag1" from cte
)
select distinct on (user_) user_, sum(amount) over (partition by user_,grp order by ord) from (
select *, sum(flag1) over (partition by user_ order by ord) "grp" from cte1) t1
order by user_ , ord desc
DEMO
Though it is very complicated and slow but resolve your problem

Is this what you're looking for ?
with last_5y as(
select "user", max(ord) as ord
from transactions
where lag = '.5y'::interval group by "user"
) select t.user, sum(amount)
from transactions t, last_5y t2
where t.user = t2.user and t.ord >= t2.ord
group by t.user

Related

multiple named windows in a postgres query

The postgres docs specify a window definition clause thus:
[ WINDOW window_name AS ( window_definition ) [, ...] ]
The [,...] specifies that multiple windows are possible. I find nothing else in the docs to confirm or deny it's possible. How do I make this work?
In this query, I can use either window clause on its own but I can't use both even though the syntax follows the spec:
select q.*
, min(value) over w_id as min_id_val
--, min(value) over w_kind as min_kind_val
from (
select 1 as id, 1 as kind, 3.0 as value
union select 1, 2, 1.0
union select 2, 1, 2.0
union select 2, 2, 0.5
) as q
window w_id as (partition by id)
-- ,
-- window w_kind as (partition by kind)
I can get the technical effect by not using window definitions, but that gets tiresome for a complex query where windows are re-used:
select q.*
, min(value) over (partition by id) as min_id_val
, min(value) over (partition by kind) as min_kind_val
from (
select 1 as id, 1 as kind, 3.0 as value
union select 1, 2, 1.0
union select 2, 1, 2.0
union select 2, 2, 0.5
) as q

Don't repeat the window keyword:
select q.*,
min(value) over w_id as min_id_val,
min(value) over w_kind as min_kind_val
from (
values
(1,1,3.0),
(1, 2, 1.0),
(2, 1, 2.0),
(2, 2, 0.5)
) as q(id,kind,value)
window w_id as (partition by id),
w_kind as (partition by kind)

How to add a dash between running numbers and comma between non-running numbers

I would like to replace a set of running and non running numbers with commas and hyphens where appropriate.
Using STUFF & XML PATH I was able to accomplish some of what I want by getting something like 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 15, 19, 20, 21, 22, 24.
WITH CTE AS (
SELECT DISTINCT t1.ORDERNo, t1.Part, t2.LineNum
FROM [DBName].[DBA].Table1 t1
JOIN Table2 t2 ON t2.Part = t1.Part
WHERE t1.ORDERNo = 'AB12345')
SELECT c1.ORDERNo, c1.Part, STUFF((SELECT ', ' + CAST(LineNum AS VARCHAR(5))
FROM CTE c2
WHERE c2.ORDERNo= c1.ORDERNo
FOR XML PATH('')), 1, 2, '') AS [LineNums]
FROM CTE c1
GROUP BY c1.ORDERNo, c1.Part
Here is some sample output:
ORDERNo Part LineNums
ON5650 PT01-0181 5, 6, 7, 8, 12
ON5652 PT01-0181 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 15, 19, 20, 21, 22, 24
ON5654 PT01-0181 1, 4
ON5656 PT01-0181 1, 2, 4
ON5730 PT01-0181 1, 2
ON5253 PT16-3934 1, 2, 3, 4, 5
ON1723 PT02-0585 1, 2, 3, 6, 8, 9, 10
Would like to have:
OrderNo Part LineNums
ON5650 PT01-0181 5-8, 12
ON5652 PT01-0181 1-10, 13, 15, 19-22, 24
ON5654 PT01-0181 1, 4
ON5656 PT01-0181 1-2, 4
ON5730 PT01-0181 1-2
ON5253 PT16-3934 1-5
ON1723 PT02-0585 1-3, 6, 8-10

This is a classic gaps-and-islands problem.
(a good read on the subject is Itzik Ben-Gan's Gaps and islands from SQL Server MVP Deep Dives)
The idea is that you first need to identify the groups of consecutive numbers. Once you've done that, the rest is easy.
First, create and populate sample table (Please save us this step in your future questions):
DECLARE #T AS TABLE
(
N int
);
INSERT INTO #T VALUES
(1), (2), (3), (4),
(6),
(8),
(10), (11),
(13), (14), (15),
(17),
(19), (20), (21),
(25);
Then, use a common table expression to identify the groups.
With Grouped AS
(
SELECT N,
N - ROW_NUMBER() OVER(ORDER BY N) As Grp
FROM #T
)
The result if this cte is this:
N Grp
1 0
2 0
3 0
4 0
6 1
8 2
10 3
11 3
13 4
14 4
15 4
17 5
19 6
20 6
21 6
25 9
As you can see, while the numbers are consecutive, the grp value stays the same.
When a row has a number that isn't consecutive with the previous number, the grp value changes.
Then you select from that cte, using a case expression to either select a single number (if it's the only one in it's group) or the start and end of the group, separated by a dash:
SELECT STUFF(
(
SELECT ', ' +
CASE WHEN MIN(N) = MAX(N) THEN CAST(MIN(N) as varchar(11))
ELSE CAST(MIN(N) as varchar(11)) +'-' + CAST(MAX(N) as varchar(11))
END
FROM Grouped
GROUP BY grp
FOR XML PATH('')
), 1, 2, '') As GapsAndIslands
The result:
GapsAndIslands
1-4, 6, 8, 10-11, 13-15, 17, 19-21, 25

For fun I put together another way using Window Aggregates (e.g. SUM() OVER ...). I also use some newer T-SQL functionality such as CONCAT (2012+) and STRING_AGG (2017+). This using Zohar's sample data.
DECLARE #T AS TABLE(N INT PRIMARY KEY CLUSTERED);
INSERT INTO #T VALUES (1),(2),(3),(4),(6),(8),(10),(11),(13),(14),(15),(17),(19),(20),(21),(25);
WITH
a AS (
SELECT t.N,isNewGroup = SIGN(t.N-LAG(t.N,1,t.N-1) OVER (ORDER BY t.N)-1)
FROM #t AS t),
b AS (
SELECT a.N, GroupNbr = SUM(a.isNewGroup) OVER (ORDER BY a.N)
FROM a),
c AS (
SELECT b.GroupNbr,
txt = CONCAT(MIN(b.N), REPLICATE(CONCAT('-',MAX(b.N)), SIGN(MAX(b.N)-MIN(b.N))))
FROM b
GROUP BY b.GroupNbr)
SELECT STRING_AGG(c.txt,', ') WITHIN GROUP (ORDER BY c.GroupNbr) AS Islands
FROM c;
Returns:
Islands
1-4, 6 , 8, 10-11, 13-15, 17, 19-21, 25

And here an approach using a recursive CTE.
DECLARE #T AS TABLE(N INT PRIMARY KEY CLUSTERED);
INSERT INTO #T VALUES (1),(2),(3),(4),(6),(8),(10),(11),(13),(14),(15),(17),(19),(20),(21),(25);
WITH Numbered AS
(
SELECT N, ROW_NUMBER() OVER(ORDER BY N) AS RowIndex FROM #T
)
,recCTE AS
(
SELECT N
,RowIndex
,CAST(N AS VARCHAR(MAX)) AS OutputString
,(SELECT MAX(n2.RowIndex) FROM Numbered n2) AS MaxRowIndex
FROM Numbered WHERE RowIndex=1
UNION ALL
SELECT n.N
,n.RowIndex
,CASE WHEN A.TheEnd =1 THEN CONCAT(r.OutputString,CASE WHEN IsIsland=1 THEN '-' ELSE ',' END, n.N)
WHEN A.IsIsland=1 AND A.IsWithin=0 THEN CONCAT(r.OutputString,'-')
WHEN A.IsIsland=1 AND A.IsWithin=1 THEN r.OutputString
WHEN A.IsIsland=0 AND A.IsWithin=1 THEN CONCAT(r.OutputString,r.N,',',n.N)
ELSE CONCAT(r.OutputString,',',n.N)
END
,r.MaxRowIndex
FROM Numbered n
INNER JOIN recCTE r ON n.RowIndex=r.RowIndex+1
CROSS APPLY(SELECT CASE WHEN n.N-r.N=1 THEN 1 ELSE 0 END AS IsIsland
,CASE WHEN RIGHT(r.OutputString,1)='-' THEN 1 ELSE 0 END AS IsWithin
,CASE WHEN n.RowIndex=r.MaxRowIndex THEN 1 ELSE 0 END AS TheEnd) A
)
SELECT TOP 1 OutputString FROM recCTE ORDER BY RowIndex DESC;
The idea in short:
First we create a numbered set.
The recursive CTE will use the row's index to pick the next row, thus iterating through the set row-by-row
The APPLY determines three BIT values:
Is the distance to the previous value 1, then we are on the island, otherwise not
Is the last character of the growing output string a hyphen, then we are waiting for the end of an island, otherwise not.
...and if we've reached the end
The CASE deals with this four-field-matrix:
First we deal with the end to avoid a trailing hyphen at the end
Reaching an island we add a hyphen
Staying on the island we just continue
Reaching the end of an island we add the last number, a comma and start a new island
any other case will just add a comma and start a new island.
Hint: You can read island as group or section, while the commas mark the gaps.

Combining what I already had and using Zohar Peled's code I was finally able to figure out a solution:
WITH cteLineNums AS (
SELECT TOP 100 PERCENT t1.OrderNo, t1.Part, t2.LineNum
, (t2.line_number - ROW_NUMBER() OVER(PARTITION BY t1.OrderNo, t1.Part ORDER BY t1.OrderNo, t1.Part, t2.LineNum)) AS RowSeq
FROM [DBName].[DBA].Table1 t1
JOIN Table2 t2 ON t2.Part = t1.Part
WHERE t1.OrderNo = 'AB12345')
GROUP BY t1.OrderNo, t1.Part, t2.LineNum
ORDER BY t1.OrderNo, t1.Part, t2.LineNum)
SELECT OrderNo, Part
, STUFF((SELECT ', ' +
CASE WHEN MIN(line_number) = MAX(line_number) THEN CAST(MIN(line_number) AS VARCHAR(3))
WHEN MIN(line_number) = (MAX(line_number)-1) THEN CAST(MIN(line_number) AS VARCHAR(3)) + ', ' + CAST(MAX(line_number) AS VARCHAR(3))
ELSE CAST(MIN(line_number) AS VARCHAR(3)) + '-' + CAST(MAX(line_number) AS VARCHAR(3))
END
FROM cteLineNums c1
WHERE c1.OrderNo = c2.OrderNo
AND c1.Part = c2.Part
GROUP BY OrderNo, Part
ORDER BY OrderNo, Part
FOR XML PATH('')), 1, 2, '') AS [LineNums]
FROM cteLineNums c2
GROUP BY OrderNo, Part
I used the ROW_NUMBER() OVER PARTITION BY since I returned multiple records with different Order Numbers and Part Numbers. All this lead to me still having to do the self join in the second part in order to get the correct LineNums to show for each record.
The second WHEN in the CASE statement is due to the code defaulting to having something like 2, 5, 8-9, 14 displayed when it should be 2, 5, 8, 9, 14.

Postgresql dense ranking to start at 2 if there is an initial tie at 1

So i have a table and a query that ranks the cost of items and doesn't allows ties with position 1, if there is a tie at position 1 the ranking starts at 2.
Here is the schema with a sample data
CREATE TABLE applications
(id int, name char(10), cost int);
INSERT INTO applications
(id, name, cost)
VALUES
(1, 'nfhfjs', 10),
(2, 'oopdld', 20),
(3, 'Wedass', 14),
(4, 'djskck', 22),
(5, 'laookd', 25),
(6, 'mfjjf', 25),
(7, 'vfhgg', 28),
(8, 'nvopq', 29),
(9, 'nfhfj', 56),
(10, 'voapp', 56);
Here is the query
WITH start_tie AS (
SELECT
DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank,
lead(cost,1) OVER (ORDER BY cost DESC) as next_app_cost
FROM
applications LIMIT 1
)
SELECT
*,
DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank,
(CASE start_tie.cost_rank WHEN start_tie.next_app_cost THEN cost_rank+1 ELSE cost_rank END) AS right_cost_rank
FROM
applications;
my expected result is
id name cost cost_rank
10 voapp 56 2
9 nfhfj 56 2
8 nvopq 29 3
7 vfhgg 28 4
6 mfjjf 25 5
5 laookd 25 5
4 djskck 22 6
2 oopdld 20 7
3 Wedass 14 8
1 nfhfjs 10 9
Please modify the query to achieve the result.
SQL FIDDLE

All you need to do is to check if the highest cost is the same as the second-highest cost. And if that is the case, add 1 to all rank values:
with start_tie as (
select case
when cost = lead(cost) over (order by cost desc) then 1
else 0
end as tie_offset
from applications
order by cost desc
limit 1
)
select *,
dense_rank() over (order by cost desc) + (select tie_offset from start_tie) cost_rank
from applications;
Example: http://rextester.com/EKSLJK65530
If the number of ties defines the offset to be used for the "new" ranking, the offset could be calculated using this:
with start_tie as (
select count(*) - 1 as tie_offset
from applications a1
where cost = (select max(cost) from applications)
)
select *,
dense_rank() over(order by cost desc) + (select tie_offset from start_tie) cost_rank
from applications;

No tie at first, means more than one with rank 1
replace r.cost_rank+x.c-1 with r.cost_rank+1 if fixed start at 2 rank to regardless of how many are in tie ranks are
WITH r AS (
SELECT
*
,DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank
FROM
applications
), x as (select count(*) as c from r where cost_rank=1)
SELECT
r.*, (CASE WHEN 1<x.c THEN r.cost_rank+x.c-1 ELSE r.cost_rank END) as fixed
FROM
r,x;

Select all but sort by count in postgresql

I have a table myTable with a lot of columns, keep in mind this table is too big, and one of that columns is a geometry point, we'll call it mySortColumn. I need to sort my select by count mySortColumn when there are the same.
One example could be this
myTable
id, mySortColumn
----------------
1, ASD12321F
2, ASD12321G
3, ASD12321F
4, ASD12321G
5, ASD12321H
6, ASD12321F
I have a query which can do what I want, the problem is the time. Actually it take like 30 seconds, and it seems like this:
SELECT
id,
mySortColumn
FROM
myTable
JOIN (
SELECT
mySortColumn,
ST_Y(mySortColumn) AS lat,
ST_X(mySortColumn) AS lng,
COUNT(*)
FROM myTable
GROUP BY mySortColumn
HAVING COUNT(*) > 1
) AS myPosition ON (
ST_X(myTable.mySortColumn) = myPosition.lng
AND ST_Y(myTable.mySortColumn) = myPosition.lat
)
WHERE
<some filters>
ORDER BY COUNT DESC
The result must be this:
id, mySortColumn
----------------
1, ASD12321F
3, ASD12321F
6, ASD12321F
2, ASD12321G
4, ASD12321G
5, ASD12321H
I hope you can help me.

Here you are:
select * from myTable order by count(1) over (partition by mySortColumn) desc;
For more info about aggregate over () construction have a look at:
http://www.postgresql.org/docs/9.4/static/tutorial-window.html

Count distinct values with OVER(PARTITION BY id)

Is it possible to count distinct values in conjunction with window functions like OVER(PARTITION BY id)? Currently my query is as follows:
SELECT congestion.date, congestion.week_nb, congestion.id_congestion,
congestion.id_element,
ROW_NUMBER() OVER(
PARTITION BY congestion.id_element
ORDER BY congestion.date),
COUNT(DISTINCT congestion.week_nb) OVER(
PARTITION BY congestion.id_element
) AS week_count
FROM congestion
WHERE congestion.date >= '2014.01.01'
AND congestion.date <= '2014.12.31'
ORDER BY id_element, date
However, when I try to execute the query I get the following error:
"COUNT(DISTINCT": "DISTINCT is not implemented for window functions"

No, as the error message states, DISTINCT is not implemented with windows functions. Aplying info from this link into your case you could use something like:
WITH uniques AS (
SELECT congestion.id_element, COUNT(DISTINCT congestion.week_nb) AS unique_references
FROM congestion
WHERE congestion.date >= '2014.01.01'
AND congestion.date <= '2014.12.31'
GROUP BY congestion.id_element
)
SELECT congestion.date, congestion.week_nb, congestion.id_congestion,
congestion.id_element,
ROW_NUMBER() OVER(
PARTITION BY congestion.id_element
ORDER BY congestion.date),
uniques.unique_references AS week_count
FROM congestion
JOIN uniques USING (id_element)
WHERE congestion.date >= '2014.01.01'
AND congestion.date <= '2014.12.31'
ORDER BY id_element, date
Depending on the situation you could also put a subquery straight into SELECT-list:
SELECT congestion.date, congestion.week_nb, congestion.id_congestion,
congestion.id_element,
ROW_NUMBER() OVER(
PARTITION BY congestion.id_element
ORDER BY congestion.date),
(SELECT COUNT(DISTINCT dist_con.week_nb)
FROM congestion AS dist_con
WHERE dist_con.date >= '2014.01.01'
AND dist_con.date <= '2014.12.31'
AND dist_con.id_element = congestion.id_element) AS week_count
FROM congestion
WHERE congestion.date >= '2014.01.01'
AND congestion.date <= '2014.12.31'
ORDER BY id_element, date

If you are counting distinct numbers, you can use other aggregate functions to acheive the same effect, like so.
select
initial.id,
initial.val,
joined.id,
array_length(uniq(sort(array_agg(joined.some_number) over (partition by initial.id))), 1) as distinct_count
from
(values (1,'a'), (2,'b'), (3,'c')) initial(id, val)
left join (values (1, 1),
(1, 1),
(1, 3),
(2, 2),
(2, 2),
(3, 3),
(3, 3),
(3, 3),
(3, 4)) joined(id, some_number) on joined.id = initial.id
;
id val id distinct_count
1 a 1 2
1 a 1 2
1 a 1 2
2 b 2 1
2 b 2 1
3 c 3 2
3 c 3 2
3 c 3 2
3 c 3 2

I find that the easiest way is to use a subquery/CTE and conditional aggregation:
SELECT
c.date,
c.week_nb,
c.id_congestion,
c.id_element,
ROW_NUMBER() OVER (PARTITION BY c.id_element ORDER BY c.date),
(
CASE WHEN seqnum = 1 THEN
1
ELSE
0
END) AS week_count
FROM (
SELECT
c.*,
ROW_NUMBER() OVER (PARTITION BY c.congestion.id_element, c.week_nb ORDER BY c.date) AS seqnum
FROM
congestion c) c
WHERE
c.date >= '2014.01.01'
AND c.date <= '2014.12.31'
ORDER BY
id_element,
date

Make partitioned set smaller, up to the point there is no duplicates over counted field :
SELECT congestion.date, congestion.week_nb, congestion.id_congestion,
congestion.id_element,
ROW_NUMBER() OVER(
PARTITION BY congestion.id_element
ORDER BY congestion.date),
COUNT(congestion.week_nb) -- remove distinct
OVER(
PARTITION BY congestion.id_element,
-- add new fields which will restart counter in case duplication
congestion.id_congestion
) AS week_count
FROM congestion
WHERE congestion.date >= '2014.01.01'
AND congestion.date <= '2014.12.31'
ORDER BY id_element, date

Since this is the first result that pops up from Google, I'll add this reproducible example, similar to Gordon's answer:
Let's first start with creating a sample table:
WITH test as
(
SELECT *
FROM (VALUES
(1, 'A'),
(1, 'A'),
(2, 'B'),
(2, 'B'),
(2, 'D'),
(3, 'C'),
(3, 'C'),
(3, 'C'),
(3, 'E'),
(3, 'F')) AS t (id_element, week_nb)
)
select * from test
This yields:
id_element week_nb
1 A
1 A
2 B
2 B
2 D
3 C
3 C
3 C
3 E
3 F
Then, doing something like:
select
id_element,
week_nb,
sum(first_row_in_sequence) over (partition by id_element) as distinct_week_nb_count
from
(
select
id_element,
week_nb,
case when row_number() over (partition by id_element, week_nb) = 1 then 1 else 0 end as first_row_in_sequence
from test
) as sub
yields
id_element week_nb distinct_week_nb_count
1 A 1
1 A 1
2 B 2
2 B 2
2 D 2
3 C 3
3 C 3
3 C 3
3 E 3
3 F 3

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

postgres aggregate subset from group by rows - postgresql

Is this what you're looking for ? with last_5y as( select "user", max(ord) as ord from transactions where lag = '.5y'::interval group by "user" ) select t.user, sum(amount) from transactions t, last_5y t2 where t.user = t2.user and t.ord >= t2.ord group by t.user

Related

multiple named windows in a postgres query

How to add a dash between running numbers and comma between non-running numbers

Postgresql dense ranking to start at 2 if there is an initial tie at 1

Select all but sort by count in postgresql

Count distinct values with OVER(PARTITION BY id)

Categories

Resources