Having issues combining HAVING with WHERE on a very simple QUERY - postgresql

This is what I have tried so far:
SELECT group_id, player_id as winner_id
/*
,sum(M1.first_score + M2.second_score), sum(M2.first_score + M1.second_score)
*/
FROM players as P1
LEFT JOIN matches as M1
ON M1.first_player = P1.player_id
LEFT JOIN matches as M2
ON M2.second_player = P1.player_id
LEFT JOIN matches as M3
ON M3.first_player = P1.player_id
LEFT JOIN matches as M4
ON M4.second_player = P1.player_id
--WHERE P1.player_id is not null /*or P2.player_id is not null*/
GROUP BY group_id, P1.player_id
/*
HAVING
sum(M1.first_score + M2.second_score) > sum(M2.first_score + M1.second_score)
OR
sum(M3.first_score + M4.second_score) > sum(M4.first_score + M3.second_score)
*/
ORDER BY group_id ASC, player_id ASC
The results that I am getting are:
1,30
1,45
1,65
2,20
2,50
3,40
I know I am missing something very obvious as usual
This is my most recent
attempt
-- write your code in PostgreSQL 9.4
SELECT
group_id, player_id as winner_id
/*
,sum(M1.first_score + M2.second_score), sum(M2.first_score + M1.second_score)
*/
FROM players as P1
LEFT JOIN matches as M1
ON M1.first_player = P1.player_id
LEFT JOIN matches as M2
ON M2.second_player = P1.player_id
LEFT JOIN matches as M3
ON M3.first_player = P1.player_id
LEFT JOIN matches as M4
ON M4.second_player = P1.player_id
GROUP BY group_id, P1.player_id, M1,M2,M3,M4
/*
HAVING
(M1 is not null) OR (M2 is not null) OR (M3 is not null) OR (M4 is not null)
*/
/*
HAVING
sum(M1.first_score + M2.second_score) > sum(M2.first_score + M1.second_score)
OR
sum(M3.first_score + M4.second_score) > sum(M4.first_score + M3.second_score)
*/
ORDER BY group_id ASC
/*, player_id DESC
*/
How can I fix the query so that I can get the expected results

I don't have any PostgreSQL background but lets see if this works:
I would start this by simplifying it, by writing a query that first returns the total score by player:
SELECT player_id, SUM(score) score
FROM (
SELECT first_player as player_id, first_score as score
FROM matches
UNION ALL
SELECT second_player, second_score
FROM matches
)
GROUP BY player_id
Now, join that dataset to players to find the groups:
SELECT w.player_id, p.group_id, w.score
FROM
(
SELECT player_id, SUM(score) score
FROM (
SELECT first_player as player_id, first_score as score
FROM matches
UNION ALL
SELECT second_player, second_score
FROM matches
)
GROUP BY player_id
) as w
inner join players p
on p.player_id = w.player_id
Now we have all players, their total score and their group. We want to identify the winner by group? We can use ranking functions to do this:
SELECT
w.player_id,
p.group_id,
w.score,
RANK() OVER (PARTITION BY p.group_id ORDER BY score DESC) as group_placement
FROM
(
SELECT player_id, SUM(score) score
FROM (
SELECT first_player as player_id, first_score as score
FROM matches
UNION ALL
SELECT second_player, second_score
FROM matches
)
GROUP BY player_id
) as w
inner join players p
on p.player_id = w.player_id
Now we just pick out the top ones in each group (rank = 1) using WHERE
SELECT
player_id,
group_id
FROM
(
SELECT
w.player_id,
p.group_id,
w.score,
RANK() OVER (PARTITION BY p.group_id ORDER BY score DESC) as group_placement
FROM
(
SELECT player_id, SUM(score) score
FROM (
SELECT first_player as player_id, first_score as score
FROM matches
UNION ALL
SELECT second_player, second_score
FROM matches
)
GROUP BY player_id
) as w
inner join players p
on p.player_id = w.player_id
) as gp
WHERE group_placement = 1
Looks complicated? yes, but you can see have the final result is provided bit by bit. Each step of this is a 'subtable' and you can run and observe the data at each point.

Related

How to select from relationship based on the height value of field postgres?

i have three tables vehicles and trips and componentValues they are related to each other by
vehicles -> trips -> componentValues
vehicles Table : id, ...
trips Table: id, vehicle_id, ...
componentValues Table: id, trip_id, damage, ...
and i'm trying to get all the trips with the highest damage component form the componentValues table like this
SELECT *
FROM (select * from trips WHERE "trips"."vehicle_id" = '7') as t
LEFT JOIN (
select * from "component_values"
where trip_id = '85'
order by damage
desc nulls last limit 1
) as h on t.id = h.trip_id
how can i change the line where trip_id = '85' to be dynamic or is their another way to do this and many thanks in advance.
expected result:
UPDATE
i have did some query that get what i want but how can i improve it by not using sub queries in the select statement
select * ,
(select damage from "component_values" where trip_id = trips.id order by damage desc nulls last limit 1) as h_damage,
(select damage from "component_values" where trip_id = trips.id order by damage asc nulls first limit 1) as l_damage,
(select component_types.name from "component_values" left join component_types on component_values.component_type_id = component_types.id where trip_id = trips.id order by damage desc nulls last limit 1) as hc_damage,
(select component_types.name from "component_values" left join component_types on component_values.component_type_id = component_types.id where trip_id = trips.id order by damage asc nulls first limit 1) as lc_damage
from trips
WHERE trips."vehicle_id" = '7'
I think you want distinct on:
select distinct on (t.id) t.*, dv.damage
from trips t join
component_values cv
on cv.trip_id = t.id
where t.vehicle_id = 7 -- not sure if this is needed
order by t.id, cv.damage desc nulls last;
distinct on is usually the most efficient method in Postgres. You can also do this with window functions:
select distinct on (t.id) t.*, cv.damage
from trips t join
(select cv.*,
row_number() over (partition by cv.trip_id, cv.damage desc nulls last) as seqnum
from component_values cv
) cv
on cv.trip_id = t.id and cv.seqnum = 1
where t.vehicle_id = 7; -- not sure if this is needed
I think you want a lateral join.
SELECT *
FROM (select * from trips WHERE "trips"."vehicle_id" = '7') as t
LEFT JOIN lateral (
select * from "component_values"
where trip_id = t.id
order by damage
desc nulls last limit 1
) as h on true
Although I don't think there is a reason for the first subquery, so:
SELECT *
FROM trips
LEFT JOIN lateral (
select * from "component_values"
where trip_id = trips.id
order by damage
desc nulls last limit 1
) as h on true
WHERE "trips"."vehicle_id" = '7'

can I rank results got from different WHERE clause?

Say I want to select the posts that has certain tags or matches the keyword.
select t1.*
from (
select p.*, count(p.id) from plainto_tsquery('hElLo') AS q , post p
left join post_tag pt on pt.post_id = p.id
left join tag t on t.id = pt.tag_id
WHERE (tsv ## q) or t.id in (2,3)
group by p.id
) as t1
order by count desc, ts_rank_cd(t1.tsv, plainto_tsquery('hElLo')) desc
limit 5;
the above does select what I want. In tsv, I gave title A weight and description D weight. it now becomes pretty pointless when sorting by count because each entry has the same weight. Is it possible to do things like if this row is picked from t.id in (2,3), they get to sorted to the first, then sort by ts_rank_cd, or give each match tag 'A' weight, title become B weight and description is D?
Try CASE WHEN
select t1.*
from (
select p.*, count(p.id),
(CASE WHEN t.id in (2,3) THEN 1 ELSE 2 END) as ranking
from plainto_tsquery('hElLo') AS q , post p
left join post_tag pt on pt.post_id = p.id
left join tag t on t.id = pt.tag_id
WHERE (tsv ## q) or t.id in (2,3)
group by p.id
) as t1
order by count desc, ranking asc, ts_rank_cd(t1.tsv, plainto_tsquery('hElLo')) desc
limit 5;
Edited(Correct Answer):
select t1.*
from (
select p.*, count(p.id),
COUNT(1) filter(where t.id in (2,3)) ranking
from plainto_tsquery('hElLo') AS q , post p
left join post_tag pt on pt.post_id = p.id
left join tag t on t.id = pt.tag_id
WHERE (tsv ## q) or t.id in (2,3)
group by p.id
) as t1
order by count desc, ranking asc, ts_rank_cd(t1.tsv, plainto_tsquery('hElLo')) desc
limit 5;
I'm not sure why the count would be the same, but you can add more keys to the order by:
order by count desc,
(t.id in (2, 3)) desc,
ts_rank_cd(t1.tsv, plainto_tsquery('hElLo')) desc ;
The desc is because true > false, and you want the true values to be first.

Avoiding Order By in T-SQL

Below sample query is a part of my main query. I found SORT operator in below query is consuming 30% of the cost.
To avoid SORT, there is need of creation of Indexes. Is there any other way to optimize this code.
SELECT TOP 1 CONVERT( DATE, T_Date) AS T_Date
FROM TableA
WHERE ID = r.ID
AND Status = 3
AND TableA_ID >ISNULL((
SELECT TOP 1 TableA_ID
FROM TableA
WHERE ID = r.ID
AND Status <> 3
ORDER BY T_Date DESC
), 0)
ORDER BY T_Date ASC
Looks like you can use not exists rather than the sorts. I think you'll probably get a better performance boost by use a CTE or derived table instead of the a scalar subquery.
select *
from r ... left outer join
(
select ID, min(t_date) as min_date from TableA t1
where status = 3 and not exists (
select 1 from TableA t2
where t2.ID = t1.ID
and t2.status <> 3 and t2.t_date > t1.t_date
)
group by ID
) as md on md.ID = r.ID ...
or
select *
from r ... left outer join
(
select t1.ID, min(t1.t_date) as min_date
from TableA t1 left outer join TableA t2
on t2.ID = t1.ID and t2.status <> 3
where t1.status = 3 and t1.t_date < t2.t_date
group by t1.ID
having count(t2.ID) = 0
) as md on md.ID = r.ID ...
It also appears that you're relying on an identity column but it's not clear what those values mean. I'm basically ignoring it and using the date column instead.
Try this:
SELECT TOP 1 CONVERT( DATE, T_Date) AS T_Date
FROM TableA a1
LEFT JOIN (
SELECT ID, MAX(TableA_ID) AS MaxAID
FROM TableA
WHERE Status <> 3
GROUP BY ID
) a2 ON a2.ID = a1.ID AND a1.TableA_ID > coalesce(a2.MAXAID,0)
WHERE a1.ID = r.ID AND a1.Status = 3
ORDER BY T_Date ASC
The use of TOP 1 in combination with the unexplained r alias concern me. There's almost certainly a MUCH better way to get this data into your results that doesn't involve doing this in a sub query (unless this is for an APPLY operation).

Sorting rows by children?

I have this table:
CREATE TABLE items (
id SERIAL PRIMARY KEY,
data TEXT,
parent INT,
posted INT
);
Each item has a piece of data, a timestamp, and a parent. I'd like to select the top 10 root items (parent = 0), sorted by the timestamp of the most recent child.
If item #1 has a child #2 that has a child #3, #3 is considered a child of #1.
How can I do this?
EDIT:
The query has been rewritten to
first sort the child items
get the root parent id and the rank for each item
select the top 10 parents
select the details for the top 10 parents
Common Table expressions have been used to incrementally select the data following the above steps.
WITH recursive c AS
(
SELECT *
FROM seeds
UNION ALL
SELECT
T.id,
T.parent,
c.topParentID,
(c.child_level + 1),
c.child_rank
FROM items AS T
INNER JOIN c ON T.parent = c.id
WHERE T.id <> T.parent
)
, seeds AS
(
SELECT
id,
parent,
parent AS topParentID,
0 AS child_level,
rank() OVER (ORDER BY posted DESC) child_rank
FROM items
WHERE parent <> 0
ORDER BY posted DESC
)
, rank_level AS
(
SELECT DISTINCT
c2.id id,
c_ranks.min_child_rank child_rank,
c_roots.max_child_level root_level
FROM
(
SELECT
id,
MAX(child_level) max_child_level
FROM c
GROUP BY id
)
c_roots
INNER JOIN c c2 ON c_roots.id = c2.id
INNER JOIN
(
SELECT
id,
MIN(child_rank) min_child_rank
FROM c
GROUP BY id
)
c_ranks
ON c2.id = c_ranks.id
)
, top_10_parents AS
(
SELECT
c.topParentID id,
MIN(rl.child_rank) id_rank
FROM rank_level rl
INNER JOIN c ON rl.id = c.id AND c.child_level = rl.root_level
GROUP BY c.topParentID
ORDER BY MIN(rl.child_rank)
limit 10
)
SELECT
i.*
FROM
items i
INNER JOIN top_10_parents tp ON tp.id = i.id
ORDER BY tp.id_rank;
SQL Fiddle
Reference:
WITH Queries (Common Table Expressions) on PostgreSQL Manual

Is T-SQL (2005) RANK OVER(PARTITION BY) the answer?

I have a stored procedure that does paging for the front end and is working fine. I now need to modify that procedure to group by four columns of the 20 returned and then only return the row within each group that contains the lowest priority. So when resort_id, bedrooms, kitchen and checkin (date) all match then only return the row that has the min priority. I have to still maintain the paging functionality. The #startIndex and #upperbound are parms passed into the procedure from the front end for paging. I’m thinking that RANK OVER (PARTITION BY) is the answer I just can’t quite figure out how to put it all together.
SELECT I.id,
I.resort_id,
I.[bedrooms],
I.[kitchen],
I.[checkin],
I.[priority],
I.col_1,
I.col_2 /* ..... (more cols) */
FROM (
SELECT ROW_NUMBER() OVER(ORDER by checkin) AS rowNumber,
*
FROM Inventory
) AS I
WHERE rowNumber >= #startIndex
AND rowNumber < #upperBound
ORDER BY rowNumber
Example 2 after fix:
SELECT I.resort_id,
I.[bedrooms],
I.[kitchen],
I.[checkin],
I.[priority],
I.col_1,
I.col_2 /* ..... (more cols) */
FROM Inventory i
JOIN
(
SELECT ROW_NUMBER() OVER(ORDER BY h.checkin) as rowNumber, MIN(h.id) as id
FROM Inventory h
JOIN (
SELECT resort_id, bedrooms, kitchen, checkin, id, MIN(priority) as priority
FROM Inventory
GROUP BY resort_id, bedrooms, kitchen, checkin, id
) h2 on h.resort_id = h2.resort_id and
h.bedrooms = h2.bedrooms and
h.kitchen = h2.kitchen and
h.checkin = h2.checkin and
h.priority = h2.priority
GROUP BY h.resort_id, h.bedrooms, h.kitchen, h.checkin, h.priority
) AS I2
on i.id = i2.id
WHERE rowNumber >= #startIndex
AND rowNumber < #upperBound
ORDER BY rowNumber
I would accompish it this way.
SELECT I.resort_id,
I.[bedrooms],
I.[kitchen],
I.[checkin],
I.[priority],
I.col_1,
I.col_2 /* ..... (more cols) */
FROM Inventory i
JOIN
(
SELECT ROW_NUMBER(ORDER BY Checkin) as rowNumber, MIN(id) id
FROM Inventory h
JOIN (
SELECT resort_id, bedrooms, kitchen, checkin id, MIN(priority) as priority
FROM Inventory
GROUP BY resort_id, bedrooms, kitchen, checkin
) h2 on h.resort_id = h2.resort and
h.bedrooms = h2.bedrooms and
h.kitchen = h2.kitchen and
h.checkin = h2.checkin and
h.priority = h2.priority
GROUP BY h.resort_id, h.bedrooms, h.kitchen, h.checkin, h.priority
) AS I2
on i.id = i2.id
WHERE rowNumber >= #startIndex
AND rowNumber < #upperBound
ORDER BY rowNumber