Sorting rows by children? - postgresql

I have this table:
CREATE TABLE items (
id SERIAL PRIMARY KEY,
data TEXT,
parent INT,
posted INT
);
Each item has a piece of data, a timestamp, and a parent. I'd like to select the top 10 root items (parent = 0), sorted by the timestamp of the most recent child.
If item #1 has a child #2 that has a child #3, #3 is considered a child of #1.
How can I do this?

EDIT:
The query has been rewritten to
first sort the child items
get the root parent id and the rank for each item
select the top 10 parents
select the details for the top 10 parents
Common Table expressions have been used to incrementally select the data following the above steps.
WITH recursive c AS
(
SELECT *
FROM seeds
UNION ALL
SELECT
T.id,
T.parent,
c.topParentID,
(c.child_level + 1),
c.child_rank
FROM items AS T
INNER JOIN c ON T.parent = c.id
WHERE T.id <> T.parent
)
, seeds AS
(
SELECT
id,
parent,
parent AS topParentID,
0 AS child_level,
rank() OVER (ORDER BY posted DESC) child_rank
FROM items
WHERE parent <> 0
ORDER BY posted DESC
)
, rank_level AS
(
SELECT DISTINCT
c2.id id,
c_ranks.min_child_rank child_rank,
c_roots.max_child_level root_level
FROM
(
SELECT
id,
MAX(child_level) max_child_level
FROM c
GROUP BY id
)
c_roots
INNER JOIN c c2 ON c_roots.id = c2.id
INNER JOIN
(
SELECT
id,
MIN(child_rank) min_child_rank
FROM c
GROUP BY id
)
c_ranks
ON c2.id = c_ranks.id
)
, top_10_parents AS
(
SELECT
c.topParentID id,
MIN(rl.child_rank) id_rank
FROM rank_level rl
INNER JOIN c ON rl.id = c.id AND c.child_level = rl.root_level
GROUP BY c.topParentID
ORDER BY MIN(rl.child_rank)
limit 10
)
SELECT
i.*
FROM
items i
INNER JOIN top_10_parents tp ON tp.id = i.id
ORDER BY tp.id_rank;
SQL Fiddle
Reference:
WITH Queries (Common Table Expressions) on PostgreSQL Manual

Related

How to use aggregate functions when using recursive query in postgresql

On multiple iteration on a recursive query in postgresql, I have got the following result when i run the below query
WITH recursive report AS (
select a.name, a.id, a.parentid, sum(b.id)
from table1 a
INNER JOIN table2 b on a.id=b.table1id
GROUP by a.name, a.id, a.parentid
), report2 AS (
SELECT , 0 as lvl
FROM report
WHERE parentid IS NULL
UNION ALL
SELECT child., parent.lvl + 1
FROM report child
JOIN report2 parent
ON parent.id = child.parentid
)
select * from report2
I want to sum the count column with the top most level, so my output should be like below,
What is the best possible way to get it.
If you calculate a path during recursion, like so:
WITH recursive report AS (
select a.name, a.id, a.parentid, sum(b.id) -- Is summing b.id the right thing here?
from table1 a
INNER JOIN table2 b on a.id=b.table1id
GROUP by a.name, a.id, a.parentid
), report2 AS (
SELECT report.*, 0 as lvl, array[report.id] as path_array
FROM report
WHERE parentid IS NULL
UNION ALL
SELECT child.*, parent.lvl + 1, report2.path_array||report.id
FROM report child
JOIN report2 parent
ON parent.id = child.parentid
)
select * from report2;
Do you really mean sum(b.id) and not count(*) in the report CTE?
You can get the sum of count for your top levels using this query as the main query from your recursion:
select t.name, sum(r.count) as total_count
from report2 r
join table1 t
on t.id = r.path_array[1]
group by t.name;

Strange Behaviour on Postgresql query

We created a view in Postgres and I am getting strange result.
View Name: event_puchase_product_overview
When I try to get records with *, I get the correct result. but when I try to get specific fields, I get wrong values.
I hope the screens attached here can explain the problem well.
select *
from event_purchase_product_overview
where id = 15065;
select id, departure_id
from event_puchase_product_overview
where id = 15065;
VIEW definition:
CREATE OR REPLACE VIEW public.event_puchase_product_overview AS
SELECT row_number() OVER () AS id,
e.id AS departure_id,
e.type AS event_type,
e.name,
p.id AS product_id,
pc.name AS product_type,
product_date.attribute AS option,
p.upcomming_date AS supply_date,
pr.date_end AS bid_deadline,
CASE
WHEN (pt.categ_id IN ( SELECT unnest(tt.category_ids) AS unnest
FROM ( SELECT string_to_array(btrim(ir_config_parameter.value, '[]'::text), ', '::text)::integer[] AS category_ids
FROM ir_config_parameter
WHERE ir_config_parameter.key::text = 'trip_product_flight.product_category_hotel'::text) tt)) THEN e.maximum_rooms
WHEN (pt.categ_id IN ( SELECT unnest(tt.category_ids) AS unnest
FROM ( SELECT string_to_array(btrim(ir_config_parameter.value, '[]'::text), ', '::text)::integer[] AS category_ids
FROM ir_config_parameter
WHERE ir_config_parameter.key::text = 'trip_product_flight.product_category_flight'::text) tt)) THEN e.maximum_seats
WHEN (pt.categ_id IN ( SELECT unnest(tt.category_ids) AS unnest
FROM ( SELECT string_to_array(btrim(ir_config_parameter.value, '[]'::text), ', '::text)::integer[] AS category_ids
FROM ir_config_parameter
WHERE ir_config_parameter.key::text = 'trip_product_flight.product_category_bike'::text) tt)) THEN e.maximum_bikes
ELSE e.maximum_seats
END AS departure_qty,
CASE
WHEN now()::date > pr.date_end AND po.state::text = 'draft'::text THEN true
ELSE false
END AS is_deadline,
pl.product_qty::integer AS purchased_qty,
pl.comments,
pl.price_unit AS unit_price,
rp.id AS supplier,
po.id AS po_ref,
po.state AS po_state,
po.date_order AS po_date,
po.user_id AS operator,
pl.po_state_line AS line_status
FROM event_event e
LEFT JOIN product_product p ON p.related_departure = e.id
LEFT JOIN product_template pt ON pt.id = p.product_tmpl_id
LEFT JOIN product_category pc ON pc.id = pt.categ_id
LEFT JOIN purchase_order_line pl ON pl.product_id = p.id
LEFT JOIN purchase_order po ON po.id = pl.order_id
LEFT JOIN purchase_order_purchase_requisition_rel prr ON prr.purchase_order_id = po.id
LEFT JOIN purchase_requisition pr ON pr.id = prr.purchase_requisition_id
LEFT JOIN res_partner rp ON rp.id = po.partner_id
LEFT JOIN ( SELECT p_1.id AS product_id,
pav.name AS attribute
FROM product_product p_1
LEFT JOIN product_attribute_value_product_product_rel pa ON pa.prod_id = p_1.id
LEFT JOIN product_attribute_value pav ON pav.id = pa.att_id
LEFT JOIN product_attribute pat ON pat.id = pav.attribute_id
WHERE pat.name::text <> ALL (ARRAY['Date'::character varying, 'Departure'::character varying]::text[])) product_date ON product_date.product_id = p.id
WHERE (p.id IN ( SELECT DISTINCT mrp_bom_line.product_id
FROM mrp_bom_line)) AND p.active
ORDER BY e.id, pt.categ_id, p.id;
If I add new event_event or new product_product I'll get a new definition of row_number in my view, then the column ID of my view is not stable.
at least you can't use row_number as Id of the view,
If you insist to use row_number, you can use the Order By "creation DATE" by this way all new records will be as last lines in the view and this will not change the correspondency between ID (row_number) and other columns.
Hope that helps !
Very likely the execution plan of your query depends on the columns you select. Compare the execution plans!
Your id is generated using the row_number window function. Now window functions are executed before the ORDER BY clause, so the order will depend on the execution plan and hence on the columns you select.
Using row_number without an explicit ordering doesn't make any sense.
To fix that, don't use
row_number() OVER ()
but
row_number() OVER (ORDER BY e.id, pt.categ_id, p.id)
so that you have a reliable ordering.
In addition, you should omit the ORDER BY clause at the end.

Avoiding Order By in T-SQL

Below sample query is a part of my main query. I found SORT operator in below query is consuming 30% of the cost.
To avoid SORT, there is need of creation of Indexes. Is there any other way to optimize this code.
SELECT TOP 1 CONVERT( DATE, T_Date) AS T_Date
FROM TableA
WHERE ID = r.ID
AND Status = 3
AND TableA_ID >ISNULL((
SELECT TOP 1 TableA_ID
FROM TableA
WHERE ID = r.ID
AND Status <> 3
ORDER BY T_Date DESC
), 0)
ORDER BY T_Date ASC
Looks like you can use not exists rather than the sorts. I think you'll probably get a better performance boost by use a CTE or derived table instead of the a scalar subquery.
select *
from r ... left outer join
(
select ID, min(t_date) as min_date from TableA t1
where status = 3 and not exists (
select 1 from TableA t2
where t2.ID = t1.ID
and t2.status <> 3 and t2.t_date > t1.t_date
)
group by ID
) as md on md.ID = r.ID ...
or
select *
from r ... left outer join
(
select t1.ID, min(t1.t_date) as min_date
from TableA t1 left outer join TableA t2
on t2.ID = t1.ID and t2.status <> 3
where t1.status = 3 and t1.t_date < t2.t_date
group by t1.ID
having count(t2.ID) = 0
) as md on md.ID = r.ID ...
It also appears that you're relying on an identity column but it's not clear what those values mean. I'm basically ignoring it and using the date column instead.
Try this:
SELECT TOP 1 CONVERT( DATE, T_Date) AS T_Date
FROM TableA a1
LEFT JOIN (
SELECT ID, MAX(TableA_ID) AS MaxAID
FROM TableA
WHERE Status <> 3
GROUP BY ID
) a2 ON a2.ID = a1.ID AND a1.TableA_ID > coalesce(a2.MAXAID,0)
WHERE a1.ID = r.ID AND a1.Status = 3
ORDER BY T_Date ASC
The use of TOP 1 in combination with the unexplained r alias concern me. There's almost certainly a MUCH better way to get this data into your results that doesn't involve doing this in a sub query (unless this is for an APPLY operation).

How should I add fields without adding them to a GROUP BY?

I have a SQL statement that works as-is. I get an area name and the minimum value within that area. next, I need to add in a key so I can actually do something with the results. The key is necessary since names and values are unlikely to be unique.
select g.name, min(g.rndval) from
(
select p.rndval, a.name, p.id
from points p, areas a
where ST_WITHIN(p.geom, a.geom)
) AS g
group by g.name
When I add the Id field to the group by, the query returns multiple rows for each area, as expected since it's grouping by the name and id combination, and the results are no longer what I need. How should I add in the id field (p.id in the inner select)?
You can try:
WITH cte AS
( select p.rndval, a.name, p.id
from points p, areas a
where ST_WITHIN(p.geom, a.geom)
), cte_aggregated AS
(
SELECT name, min(rndval) AS min_value
FROM cte
GROUP BY name
)
SELECT DISTINCT c.rndval, c.name, c.id
FROM cte c
JOIN cte_aggregated ca
ON c.rndval = ca.min_value
AND c.name = ca.name;
You can solve this quite elegantly with a window function:
select name, rndval as min, id
from (
select a.name, p.rndval, p.id, rank() over (partition by a.name order by p.rndval) as rnk
from points p
join areas a on ST_Within(p.geom, a.geom)) as g
where rnk = 1;

How to get the top most parent in PostgreSQL

I have a tree structure table with columns:
id,parent,name.
Given a tree A->B->C,
how could i get the most top parent A's ID according to C's ID?
Especially how to write SQL with "with recursive"?
Thanks!
WITH RECURSIVE q AS
(
SELECT m
FROM mytable m
WHERE id = 'C'
UNION ALL
SELECT m
FROM q
JOIN mytable m
ON m.id = q.parent
)
SELECT (m).*
FROM q
WHERE (m).parent IS NULL
To implement recursive queries, you need a Common Table Expression (CTE).
This query computes ancestors of all parent nodes. Since we want just the top level, we select where level=0.
WITH RECURSIVE Ancestors AS
(
SELECT id, parent, 0 AS level FROM YourTable WHERE parent IS NULL
UNION ALL
SELECT child.id, child.parent, level+1 FROM YourTable child INNER JOIN
Ancestors p ON p.id=child.parent
)
SELECT * FROM Ancestors WHERE a.level=0 AND a.id=C
If you want to fetch all your data, then use an inner join on the id, e.g.
SELECT YourTable.* FROM Ancestors a WHERE a.level=0 AND a.id=C
INNER JOIN YourTable ON YourTable.id = a.id
Assuming a table named "organization" with properties id, name, and parent_organization_id, here is what worked for me to get a list that included top level and parent level org ID's for each level.
WITH RECURSIVE orgs AS (
SELECT
o.id as top_org_id
,null::bigint as parent_org_id
,o.id as org_id
,o.name
,0 AS relative_depth
FROM organization o
UNION
SELECT
allorgs.top_org_id
,childorg.parent_organization_id
,childorg.id
,childorg.name
,allorgs.relative_depth + 1
FROM organization childorg
INNER JOIN orgs allorgs ON allorgs.org_id = childorg.parent_organization_id
) SELECT
*
FROM
orgs order by 1,5;