PostgreSQL sum some values together and don't for other - postgresql

SELECT
t.id,
sum(o.amount),
t.parent_id
FROM tab t
LEFT JOIN order o ON o.deal = t.id
GROUP BY t.id
Current output:
id
sum
parent_id
1
10
2
10
3
15
5
4
30
5
5
0
6
0
8
7
0
8
8
20
Desired logic, if the row contains parent_id then skip it but add everything together in the sum field so for id 3,4,5 the total would be 45 and only the id 5 would be shown. There can be cases when the sums are in the "sub tabs" or in the "main tab" but everything should be summed together.
Desired output:
id
sum
parent_id
1
10
2
10
5
45
8
20
What have I tried so far is to do sub-selects and played around with group by. Can someone point me to the right direction?

Use coalesce().
with the_data(id, sum, parent_id) as (
values
(1, 10, null),
(2, 10, null),
(3, 15, 5),
(4, 30, 5),
(5, 0, null),
(6, 0, 8),
(7, 0, 8),
(8, 20, null)
)
select coalesce(parent_id, id) as id, sum(sum)
from the_data
group by 1
order by 1
Read about the feature in the documentation.
Db<>fiddle.

Your query isn't valid in PostgreSQL:
SELECT
t.id,
sum(o.amount),
t.parent_id
FROM tab t
LEFT JOIN order o ON o.deal = t.id
GROUP BY t.id
Unlike MySQL, PostgreSQL doesn't have implicit GROUP BY columns (unless something changed recently).
Anyway, if you're using t.id in your GROUP BY clause, then each t.id will produce one row, so you'll always have 3 and 4 separated, for example.
It looks like you're trying to use the parent_id as the main criterion to group by, falling back on the id when the parent_id is NULL.
You could use COALESCE(t.parent_id, t.id) to get this value for each row, and then group using it.
For example:
SELECT
COALESCE(t.parent_id, t.id),
SUM(o.amount)
FROM tab t
LEFT JOIN order o ON o.deal = t.id
GROUP BY COALESCE(t.parent_id, t.id)

Related

Convert jsonb in PostgreSQL to rows without cycle

ffI have a json array stored in my postgres database. The first table "Orders" looks like this:
order_id, basket_items_id
1, {1,2}
2, {3}
3, {1,2,3,1}
Second table "Items" looks like this:
item_id, price
1,5
2,3
3,20
Already tried to load data with multiple sql and select of different jsonb record, but this is not a silver bullet.
SELECT
sum(price)
FROM orders
INNER JOIN items on
orders.basket_items_id = items.item_id
WHERE order_id = 3;
Want to get this as output:
order_id, basket_items_id, price
1, 1, 5
1, 2, 3
2, 3, 20
3, 1, 5
3, 2, 3
3, 3, 20
3, 1, 5
or this:
order_id, sum(price)
1, 8
2, 20
3, 33
demo:db<>fiddle
SELECT
o.order_id,
elems.value::int as basket_items_id,
i.price
FROM
orders o, jsonb_array_elements_text(basket_items_id) as elems
LEFT JOIN items i
ON i.item_id = elems.value::int
ORDER BY 1,2,3
jsonb_array_elements_text expands the jsonb array into one row each element. With this you are able to join against your second table directly
Since the expanded array gives you text elements you have to cast them into integers using ::int
Of course you can GROUP and SUM aggregate this as well:
SELECT
o.order_id,
SUM(i.price)
FROM
orders o, jsonb_array_elements_text(basket_items_id) as elems
LEFT JOIN items i
ON i.item_id = elems.value::int
GROUP BY o.order_id
ORDER BY 1
Is your orders.basket_items_id column of type jsonb or int[]?
If the type is jsonb you can use json_array_elements_text to expand the column:
SELECT
o.order_id,
o.basket_item_id,
items.price
FROM
(
SELECT
order_id,
jsonb_array_elements_text(basket_items_id)::int basket_item_id
FROM
orders
) o
JOIN
items ON o.basket_item_id = items.item_id
ORDER BY
1, 2, 3;
See this DB-Fiddle.
If the type is int[] (array of integers), you can run a similar query with the unnest function:
SELECT
o.order_id,
o.basket_item_id,
items.price
FROM
(
SELECT
order_id,
unnest(basket_items_id) basket_item_id
FROM
orders
) o
JOIN
items ON o.basket_item_id = items.item_id
ORDER BY
1, 2, 3;
See this DB-fiddle

PostGIS equivalent of ArcMap Union

What is the equivalent in PostGIS / PostgreSQL of the "Union" operation in ArcMap?
Say you have two shapefiles with two features each. (PostGIS equivalent: two tables with two rows with polygon geometries)
then the result would be 1 shapefile with 7 features. (PostGIS equivalent: Table with 7 rows with geometries)
I've looked at ST_Intersect, ST_Union and ST_Collect but can't find the right combination. Your help is much appreciated.
Here is a working query based on this answer from gis.stackexchange:
Read it from a) to d):
-- d) Extract the path number and the geom from the geometry dump
SELECT
(dump).path[1] id,
(dump).geom
FROM
(
-- c) Polygonize the unioned rings (returns a GEOMETRYCOLLECTION)
-- Dump them to return individual geometries
SELECT
ST_Dump(ST_Polygonize(geom)) dump
FROM
(
-- b) Union all rings in one big geometry
SELECT
ST_Union(geom) geom
FROM
(
-- a) First get the exterior ring from all geoms
SELECT
ST_ExteriorRing(geom) geom
FROM
rectangles
) a
) b
) c
Result:
Many thanks to Michael Entin
-- input data
with polys1 AS (
SELECT 1 df1, ST_GeogFromText('Polygon((0 0, 2 0, 2 2, 0 2, 0 0))') g
UNION ALL
SELECT 2, ST_GeogFromText('Polygon((2 2, 4 2, 4 4, 2 4, 2 2))')
),
polys2 AS (
SELECT 1 df2, ST_GeogFromText('Polygon((1 1, 3 1, 3 3, 1 3, 1 1))') g
UNION ALL
SELECT 2, ST_GeogFromText('Polygon((3 3, 5 3, 5 5, 3 5, 3 3))')
),
-- left and right unions
union1 AS (
SELECT ST_UNION_AGG(g) FROM polys1
),
union2 AS (
SELECT ST_UNION_AGG(g) FROM polys2
),
-- various combinations of intersections
pairs AS (
SELECT df1, df2, ST_INTERSECTION(a.g, b.g) g FROM polys1 a, polys2 b WHERE ST_INTERSECTS(a.g, b.g)
UNION ALL
SELECT df1, NULL, ST_DIFFERENCE(g, (SELECT * FROM union2)) g FROM polys1
UNION ALL
SELECT NULL, df2, ST_DIFFERENCE(g, (SELECT * FROM union1)) g FROM polys2
)
SELECT * FROM pairs WHERE NOT ST_IsEmpty(g)

Limit query by count distinct column values

I have a table with people, something like this:
ID PersonId SomeAttribute
1 1 yellow
2 1 red
3 2 yellow
4 3 green
5 3 black
6 3 purple
7 4 white
Previously I was returning all of Persons to API as seperate objects. So if user set limit to 3, I was just setting query maxResults in hibernate to 3 and returning:
{"PersonID": 1, "attr":"yellow"}
{"PersonID": 1, "attr":"red"}
{"PersonID": 2, "attr":"yellow"}
and if someone specify limit to 3 and page 2(setMaxResult(3), setFirstResult(6) it would be:
{"PersonID": 3, "attr":"green"}
{"PersonID": 3, "attr":"black"}
{"PersonID": 3, "attr":"purple"}
But now I want to select people and combine then into one json object to look like this:
{
"PersonID":3,
"attrs": [
{"attr":"green"},
{"attr":"black"},
{"attr":"purple"}
]
}
And here is the problem. Is there any possibility in postgresql or hibernate to set limit not by number of rows but to number of distinct people ids, because if user specifies limit to 4 I should return person1, 2, 3 and 4, but in my current limiting mechanism I will return person1 with 2 attributes, person2 and person3 with only one attribute. Same problem with pagination, now I can return half of a person3 array attrs on one page and another half on next page.
You can use row_number to simulate LIMIT:
-- Test data
CREATE TABLE person AS
WITH tmp ("ID", "PersonId", "SomeAttribute") AS (
VALUES
(1, 1, 'yellow'::TEXT),
(2, 1, 'red'),
(3, 2, 'yellow'),
(4, 3, 'green'),
(5, 3, 'black'),
(6, 3, 'purple'),
(7, 4, 'white')
)
SELECT * FROM tmp;
-- Returning as a normal column (limit by someAttribute size)
SELECT * FROM (
select
"PersonId",
"SomeAttribute",
row_number() OVER(PARTITION BY "PersonId" ORDER BY "PersonId") AS rownum
from
person) as tmp
WHERE rownum <= 3;
-- Returning as a normal column (overall limit)
SELECT * FROM (
select
"PersonId",
"SomeAttribute",
row_number() OVER(ORDER BY "PersonId") AS rownum
from
person) as tmp
WHERE rownum <= 4;
-- Returning as a JSON column (limit by someAttribute size)
SELECT "PersonId", json_object_agg('color', "SomeAttribute") AS attributes FROM (
select
"PersonId",
"SomeAttribute",
row_number() OVER(PARTITION BY "PersonId" ORDER BY "PersonId") AS rownum
from
person) as tmp
WHERE rownum <= 3 GROUP BY "PersonId";
-- Returning as a JSON column (limit by person)
SELECT "PersonId", json_object_agg('color', "SomeAttribute") AS attributes FROM (
select
"PersonId",
"SomeAttribute"
from
person) as tmp
GROUP BY "PersonId"
LIMIT 4;
In this case, of course, you must use a native query, but this is a small trade-off IMHO.
More info here and here.
I'm assuming you have another Person table. With JPA, you should do the query on Person table(one side), not on the PersonColor(many side).Then the limit will be applied on number of rows of Person then
If you don't have the Person table and can't modify the DB, what you can do is use SQL and Group By PersonId, and concatenate colors
select PersonId, array_agg(Color) FROM my_table group by PersonId limit 2
SQL Fiddle
Thank you guys. After I realize that it could not be done with one query I just do sth like
temp_query = select distinct x.person_id from (my_original_query) x
with user specific page/per_page
and then:
my_original_query += " AND person_id in (temp_query_results)

Postgresql dense ranking to start at 2 if there is an initial tie at 1

So i have a table and a query that ranks the cost of items and doesn't allows ties with position 1, if there is a tie at position 1 the ranking starts at 2.
Here is the schema with a sample data
CREATE TABLE applications
(id int, name char(10), cost int);
INSERT INTO applications
(id, name, cost)
VALUES
(1, 'nfhfjs', 10),
(2, 'oopdld', 20),
(3, 'Wedass', 14),
(4, 'djskck', 22),
(5, 'laookd', 25),
(6, 'mfjjf', 25),
(7, 'vfhgg', 28),
(8, 'nvopq', 29),
(9, 'nfhfj', 56),
(10, 'voapp', 56);
Here is the query
WITH start_tie AS (
SELECT
DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank,
lead(cost,1) OVER (ORDER BY cost DESC) as next_app_cost
FROM
applications LIMIT 1
)
SELECT
*,
DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank,
(CASE start_tie.cost_rank WHEN start_tie.next_app_cost THEN cost_rank+1 ELSE cost_rank END) AS right_cost_rank
FROM
applications;
my expected result is
id name cost cost_rank
10 voapp 56 2
9 nfhfj 56 2
8 nvopq 29 3
7 vfhgg 28 4
6 mfjjf 25 5
5 laookd 25 5
4 djskck 22 6
2 oopdld 20 7
3 Wedass 14 8
1 nfhfjs 10 9
Please modify the query to achieve the result.
SQL FIDDLE
All you need to do is to check if the highest cost is the same as the second-highest cost. And if that is the case, add 1 to all rank values:
with start_tie as (
select case
when cost = lead(cost) over (order by cost desc) then 1
else 0
end as tie_offset
from applications
order by cost desc
limit 1
)
select *,
dense_rank() over (order by cost desc) + (select tie_offset from start_tie) cost_rank
from applications;
Example: http://rextester.com/EKSLJK65530
If the number of ties defines the offset to be used for the "new" ranking, the offset could be calculated using this:
with start_tie as (
select count(*) - 1 as tie_offset
from applications a1
where cost = (select max(cost) from applications)
)
select *,
dense_rank() over(order by cost desc) + (select tie_offset from start_tie) cost_rank
from applications;
No tie at first, means more than one with rank 1
replace r.cost_rank+x.c-1 with r.cost_rank+1 if fixed start at 2 rank to regardless of how many are in tie ranks are
WITH r AS (
SELECT
*
,DENSE_RANK() OVER(ORDER BY cost DESC) cost_rank
FROM
applications
), x as (select count(*) as c from r where cost_rank=1)
SELECT
r.*, (CASE WHEN 1<x.c THEN r.cost_rank+x.c-1 ELSE r.cost_rank END) as fixed
FROM
r,x;

Hierarchical query rollup Rollup

I have the following table:
parent_id child_id child_class
1 2 1
1 3 1
1 4 2
2 5 2
2 6 2
Parent_id represents a folder id. Child id represents either a child folder (where child_class=1) or child file (where child_class=2).
I'd like to get a rollup counter (bottom up) of all files only (child_class=2) the following way. for example if C is a leaf folder (no child folders) with 5 files, and B is a parent folder of C that has 4 files in it, the counter on C should say 5 and the counter on B should say 9 (=5 from C plus 4 files in B) and so forth recursively going bottom up taking into consideration sibling folders etc.
In the example above I expect the results below (notice 3 is a child folder with no files in it):
parent_id FilesCounter
3 0
2 2
1 3
I prefer an SQL query for performance but function is also possible.
I tried mixing hirarchical query with rollup (sql 2008 r2) with no success so far.
Please advise.
This CTE should do the trick... Here is the SQLFiddle.
SELECT parent_id, child_id, child_class,
(SELECT COUNT(*) FROM tbl a WHERE a.parent_id = e.parent_id AND child_class <> 1) AS child_count
INTO tbl2
FROM tbl e
;WITH CTE (parent_id, child_id, child_class, child_count)
AS
(
-- Start with leaf nodes
SELECT parent_id, child_id, child_class, child_count
FROM tbl2
WHERE child_id NOT IN (SELECT parent_id from tbl)
UNION ALL
-- Recursively go up the chain
SELECT e.parent_id, e.child_id, e.child_class, e.child_count + d.child_count
FROM tbl2 e
INNER JOIN CTE AS d
ON e.child_id = d.parent_id
)
-- Statement that executes the CTE
SELECT FOLDERS.parent_id, max(ISNULL(child_count,0)) FilesCounter
FROM (SELECT parent_id FROM tbl2 WHERE parent_id NOT IN (select child_id from tbl2)
UNION
SELECT child_id FROM tbl2 WHERE child_class = 1) FOLDERS
LEFT JOIN CTE ON FOLDERS.parent_id = CTE.parent_id
GROUP BY FOLDERS.parent_id
Zak's answer was close, but the root folder did not rollup well. The following does the work:
with par_child as (
select 1 as parent_id, 2 as child_id, 1 as child_class
union all select 1, 3, 1
union all select 1, 4, 2
union all select 2, 5, 1
union all select 2, 6, 2
union all select 2, 10, 2
union all select 3, 11, 2
union all select 3, 7 , 2
union all select 5, 8 , 2
union all select 5, 9 , 2
union all select 5, 12, 1
union all select 5, 13, 1
)
, child_cnt as
(
select parent_id as root_parent_id, parent_id, child_id, child_class, 1 as lvl from par_child union all
select cc.root_parent_id, pc.parent_id, pc.child_id, pc.child_class, cc.lvl + 1 as lvl from
par_child pc join child_cnt cc on (pc.parent_id=cc.child_id)
),
distinct_folders as (
select distinct child_id as folder_id from par_child where child_class=1
)
select root_parent_id, count(child_id) as cnt from child_cnt where child_class=2 group by root_parent_id
union all
select folder_id, 0 from distinct_folders df where not exists (select 1 from par_child pc where df.folder_id=pc.parent_id)