Possible indexes on below query - postgresql

select uid, user_id, email, mno, orgnztn, status, utype, state,
to_char(cdate,'yyyy-mm-dd hh:mm:ss') as cdate
from schema.table_1
where puser in (with recursive rel_tree as (
select user_id, puser,1 as level,uid
from schema.table_1
where puser = 9
union all
select c.user_id, c.puser, p.level + 1 as level ,p.uid
from schema.table_1 c
join rel_tree p on c.puser = p.uid
)
select uid
from rel_tree
union select 9
)
group by uid, user_id, email, mno, orgnztn, status, utype, state,
to_char(cdate,'yyyy-mm-dd hh:mm:ss');

probably slightly faster like this:
with recursive rel_tree as (
select
uid, user_id, email, mno, orgnztn, status, utype, state,
to_char(cdate,'yyyy-mm-dd hh:mm:ss') as cdate,
puser,1 as level
from schema.table_1
where puser = 9
union all
select
uid, user_id, email, mno, orgnztn, status, utype, state,
to_char(cdate,'yyyy-mm-dd hh:mm:ss') as cdate,
puser, p.level + 1 as level
from schema.table_1 c
join rel_tree p on c.puser = p.uid
)
select uid, user_id, email, mno, orgnztn, status, utype, state,
cdate
from rel_tree
group by uid, user_id, email, mno, orgnztn, status, utype, state,
cdate;
the index you want is
CREATE INDEX table_1_puser on schema.table_1(puser);

Related

How to group data by a related field as separate columns?

Say I have a dataset of survey responses like this:
survey_id
user_id
question
answer
1
1
fav_colour
red
1
2
fav_colour
blue
1
2
fav_fruit
orange
2
3
fav_sport
hockey
I want to be able to use a single query with a survey ID to query the rows by survey ID like so:
...where survey_id = 1
survey_id
user_id
fav_color
fav_fruit
1
1
red
NULL
1
2
blue
orange
...where survey_id = 2
survey_id
user_id
fav_sport
2
3
hockey
How can I perform a query to get each question for a survey as a column, listing each response under it?
Table:
create table survey(survey_id int ,user_id int,question varchar(20),answer varchar(20));
insert into survey values(1,1,'fav_colour','red'),(1,2,'fav_colour','blue'),(1,2,'fav_fruit','orange'),(2,3,'fav_sport','hockey');
SQL for Full Display:
WITH favcolor
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_colour' THEN answer
END AS fav_color
FROM survey) inline_view
WHERE fav_color IS NOT NULL),
favsport
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_sport' THEN answer
END AS fav_sport
FROM survey) inline_view
WHERE fav_sport IS NOT NULL),
favfruit
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_fruit' THEN answer
END AS fav_fruit
FROM survey) inline_view
WHERE fav_fruit IS NOT NULL)
SELECT COALESCE(inline_view.survey_id, ff.survey_id) survey_id,
COALESCE(inline_view.user_id, ff.user_id) user_id,
inline_view.fav_color,
inline_view.fav_sport,
ff.fav_fruit
FROM (SELECT COALESCE(fc.survey_id, fs.survey_id) survey_id,
COALESCE(fc.user_id, fs.user_id) user_id,
fc.fav_color,
fs.fav_sport
FROM (SELECT survey_id,
user_id,
fav_color
FROM favcolor) fc
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_sport
FROM favsport) fs
ON fc.survey_id = fs.survey_id
AND fc.user_id = fs.user_id) inline_view
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_fruit
FROM favfruit) ff
ON inline_view.survey_id = ff.survey_id
AND inline_view.user_id = ff.user_id;
Output:
survey_id | user_id | fav_color | fav_sport | fav_fruit
-----------+---------+-----------+-----------+-----------
1 | 1 | red | |
1 | 2 | blue | | orange
2 | 3 | | hockey |
(3 rows)
SQL for particular id , survey_id say 1 , the earlier SQL with filter on survery_id:
WITH favcolor
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_colour' THEN answer
END AS fav_color
FROM survey) inline_view
WHERE fav_color IS NOT NULL),
favsport
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_sport' THEN answer
END AS fav_sport
FROM survey) inline_view
WHERE fav_sport IS NOT NULL),
favfruit
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_fruit' THEN answer
END AS fav_fruit
FROM survey) inline_view
WHERE fav_fruit IS NOT NULL)
SELECT *
FROM (SELECT COALESCE(inline_view.survey_id, ff.survey_id) survey_id,
COALESCE(inline_view.user_id, ff.user_id) user_id,
inline_view.fav_color,
inline_view.fav_sport,
ff.fav_fruit
FROM (SELECT COALESCE(fc.survey_id, fs.survey_id) survey_id,
COALESCE(fc.user_id, fs.user_id) user_id,
fc.fav_color,
fs.fav_sport
FROM (SELECT survey_id,
user_id,
fav_color
FROM favcolor) fc
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_sport
FROM favsport) fs
ON fc.survey_id = fs.survey_id
AND fc.user_id = fs.user_id) inline_view
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_fruit
FROM favfruit) ff
ON inline_view.survey_id = ff.survey_id
AND inline_view.user_id = ff.user_id)
final_inline_view
WHERE survey_id = 1 ;
Output:
survey_id | user_id | fav_color | fav_sport | fav_fruit
-----------+---------+-----------+-----------+-----------
1 | 1 | red | |
1 | 2 | blue | | orange
(2 rows)
For Survey_id=2:
WITH favcolor
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_colour' THEN answer
END AS fav_color
FROM survey) inline_view
WHERE fav_color IS NOT NULL),
favsport
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_sport' THEN answer
END AS fav_sport
FROM survey) inline_view
WHERE fav_sport IS NOT NULL),
favfruit
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_fruit' THEN answer
END AS fav_fruit
FROM survey) inline_view
WHERE fav_fruit IS NOT NULL)
SELECT *
FROM (SELECT COALESCE(inline_view.survey_id, ff.survey_id) survey_id,
COALESCE(inline_view.user_id, ff.user_id) user_id,
inline_view.fav_color,
inline_view.fav_sport,
ff.fav_fruit
FROM (SELECT COALESCE(fc.survey_id, fs.survey_id) survey_id,
COALESCE(fc.user_id, fs.user_id) user_id,
fc.fav_color,
fs.fav_sport
FROM (SELECT survey_id,
user_id,
fav_color
FROM favcolor) fc
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_sport
FROM favsport) fs
ON fc.survey_id = fs.survey_id
AND fc.user_id = fs.user_id) inline_view
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_fruit
FROM favfruit) ff
ON inline_view.survey_id = ff.survey_id
AND inline_view.user_id = ff.user_id)
final_inline_view
WHERE survey_id = 2 ;
Output:
survey_id | user_id | fav_color | fav_sport | fav_fruit
-----------+---------+-----------+-----------+-----------
2 | 3 | | hockey |
(1 row)
the tablefunc module contains function to do that:
https://www.postgresql.org/docs/13/tablefunc.html

SqlAlchemy: groupby of a union Error: SELECT construct for inclusion in a UNION or other set construct expected

I trying to do 2 queries with different table, union them and then do a groupby with sum.
Here is the PostgreSQL code snippet:
SELECT
to_char(worklist.date,'YYYY-MM') AS year_month,
department.department_name AS department_name,
user.username AS user_name,
SUM(worklist.hour) AS TotalHour,
SUM(worklist.overtime_hour) AS overtime_TotalHour,
CAST(NULL AS numeric(20)) AS total_price
FROM user, worklist, department
WHERE user.id = worklist.user_id AND
user.department_id = department.id
group BY year_month, user_name, department_name
UNION
SELECT
to_char(expenditure.date,'YYYY-MM') AS year_month,
department.department_name AS department_name,
user.username AS user_name,
CAST(NULL AS numeric(20)) AS TotalHour,
CAST(NULL AS numeric(20)) AS overtime_TotalHour,
SUM(expenditure.price) AS total_price
FROM user, department, expenditure
WHERE user.id = expenditure.user_id AND
user.department_id = department.id
group BY year_month, user_name, department_name
ORDER BY year_month DESC
query result:
PostgreSQL query result
I want to implement this in sqlalchemy and here is how i would approach it:
def get_allusers_monthly(db: Session):
qry1 = (db.query(
func.to_char(workhour.Workhour.date,'YYYY-MM').label('year_month'),
department.Department.department_name.label('department_name'),
user.User.username.label('user_name'),
func.sum(workhour.Workhour.hour).label('total_hour'),
func.sum(workhour.Workhour.overtime_hour).label('total_overtime_hour'),
cast(expen.Expenditure.price, Numeric(10)).label('total_pric')
).filter(user.User.id == workhour.Workhour.user_id and user.User.department_id == department.Department.id
).group_by(
'year_month',
'department_name',
'user_name',
'total_pric'
).all())
qry2 = (db.query(
func.to_char(expen.Expenditure.date,'YYYY-MM').label('year_month'),
department.Department.department_name.label('department_name'),
user.User.username.label('user_name'),
cast(workhour.Workhour.hour, Numeric(10)).label('total_hour'),
cast(workhour.Workhour.overtime_hour, Numeric(10)).label('total_overtime_hour'),
func.sum(expen.Expenditure.price).label('total_pric')
).filter(user.User.id == expen.Expenditure.user_id and user.User.department_id == department.Department.id
).group_by(
'year_month',
'department_name',
'user_name',
'total_hour',
'total_overtime_hour'
).all())
all_queries = [qry1, qry2]
golden_set = union(*all_queries).subquery()
return golden_set
Here is the outputted that gets this error:
sqlalchemy.exc.ArgumentError: SELECT construct for inclusion in a UNION or other set construct expected
Where am I wrong? or is there has any better way to implement this
Can anybody please help?!

Postgres select work 3x time faster then function with that select

I have a SELECT in Postgres:
SELECT DISTINCT ON (price) price, quantity, is_ask, final_update_id
FROM (SELECT *
FROM ((SELECT price, quantity, is_ask, book_depth.final_update_id
FROM order_depth
LEFT JOIN book_depth ON book_depth_id = book_depth.id
WHERE book_depth_id IN (SELECT id
FROM book_depth
WHERE final_update_id > (SELECT last_update_id
FROM order_book
WHERE symbol_name = 'XRPRUB'
ORDER BY last_update_id DESC
LIMIT 1)
AND symbol_name = 'XRPRUB'))
UNION
(SELECT price, quantity, is_ask, order_book_id
FROM "order"
WHERE order_book_id = (SELECT id
FROM order_book
WHERE symbol_name = 'XRPRUB'
ORDER BY last_update_id DESC
LIMIT 1))
ORDER BY final_update_id DESC) AS t) AS t1
ORDER BY price, final_update_id DESC;
It works for about 20 seconds.
But when I create function with this select this function works for about 1 min 40 seconds. Can someone explain me is it normal or I make mistake somewhere?

Select specific lines in data according to last update [duplicate]

Name Value AnotherColumn
-----------
Pump 1 8000.0 Something1
Pump 1 10000.0 Something2
Pump 1 10000.0 Something3
Pump 2 3043 Something4
Pump 2 4594 Something5
Pump 2 6165 Something6
My table looks something like this. I would like to know how to select max value for each pump.
select a.name, value from out_pumptable as a,
(select name, max(value) as value from out_pumptable where group by posnumber)g where and g.value = value
this code does the job, but i get two entries of Pump 1 since it has two entries with same value.
select name, max(value)
from out_pumptable
group by name
select name, value
from( select name, value, ROW_NUMBER() OVER(PARTITION BY name ORDER BY value desc) as rn
from out_pumptable ) as a
where rn = 1
SELECT
b.name,
MAX(b.value) as MaxValue,
MAX(b.Anothercolumn) as AnotherColumn
FROM out_pumptabl
INNER JOIN (SELECT
name,
MAX(value) as MaxValue
FROM out_pumptabl
GROUP BY Name) a ON
a.name = b.name AND a.maxValue = b.value
GROUP BY b.Name
Note this would be far easier if you had a primary key. Here is an Example
SELECT * FROM out_pumptabl c
WHERE PK in
(SELECT
MAX(PK) as MaxPK
FROM out_pumptabl b
INNER JOIN (SELECT
name,
MAX(value) as MaxValue
FROM out_pumptabl
GROUP BY Name) a ON
a.name = b.name AND a.maxValue = b.value)
select Name, Value, AnotherColumn
from out_pumptable
where Value =
(
select Max(Value)
from out_pumptable as f where f.Name=out_pumptable.Name
)
group by Name, Value, AnotherColumn
Try like this, It works.
select * from (select * from table order by value desc limit 999999999) v group by v.name
Using analytic function is the easy way to find max value of every group.
Documentation : https://learn.microsoft.com/en-us/sql/t-sql/functions/row-number-transact-sql?view=sql-server-ver15
Select name,
value,
AnotherColumn
From(
SELECT Row_Number() over(partition by name order by value desc)as
row_number, *
FROM students
)
Where row_number = 1
SELECT t1.name, t1.Value, t1.AnotherColumn
FROM mytable t1
JOIN (SELECT name AS nameMax, MAX(Value) as valueMax
FROM mytable
GROUP BY name) AS t2
ON t2.nameMax = t1.name AND t2.valueMax = t1.Value
WHERE 1 OR <anything you would like>
GROUP BY t1.name;
SELECT DISTINCT (t1.ProdId), t1.Quantity FROM Dummy t1 INNER JOIN
(SELECT ProdId, MAX(Quantity) as MaxQuantity FROM Dummy GROUP BY ProdId) t2
ON t1.ProdId = t2.ProdId
AND t1.Quantity = t2.MaxQuantity
ORDER BY t1.ProdId
this will give you the idea.

Identifying duplicates within a table: looking for query advice

So I am trying to identify duplicated contact records within an account, and looking for the best way to do this. There is a an account table, and a contact table. Below is the query I've come up with to give me what I need, but I feel like there is probably a better/more efficient way to do this, so looking for any feedback/advice. Thanks in advance!
SELECT * FROM sysdba.CONTACT a WITH(NOLOCK)
WHERE EXISTS
(
SELECT ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL FROM sysdba.CONTACT b WITH(NOLOCK)
GROUP BY ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL
HAVING COUNT(*) > 1
AND a.ACCOUNTID = b.ACCOUNTID AND a.FIRSTNAME = b.FIRSTNAME AND a.LASTNAME = b.LASTNAME AND a.EMAIL = b.EMAIL
)
ORDER BY ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL
Here is another way I can do this, but having to use DISTINCT seems ugly..
SELECT DISTINCT a.CONTACTID, a.FIRSTNAME, a.LASTNAME, a.EMAIL FROM sysdba.CONTACT a WITH(NOLOCK)
JOIN sysdba.CONTACT b WITH(NOLOCK)
ON a.ACCOUNTID = b.ACCOUNTID AND a.FIRSTNAME = b.FIRSTNAME AND a.LASTNAME = b.LASTNAME AND a.EMAIL = b.EMAIL AND a.CONTACTID != b.CONTACTID
ORDER BY a.CONTACTID, a.FIRSTNAME, a.LASTNAME, a.EMAIL
When checking the execution plans for both, the first query is 37% compared to 63% in the second query, which is surprising, as I've always though (apparently wrong) that using joins is quicker than relying on a where clause.
Quite common practice, when you trying to identify duplicates, is to use windowed aggregate functions, such as COUNT() OVER (...) and ROW_NUMBER() OVER (...).
Below is the query that should return you groups of records, where there are more than one CONTACTID for the same ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL combination. In other words this query returns records, having duplicates, along with their duplicates:
;WITH cteCONTACT
AS (
SELECT ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL, CONTACTID,
CNT = COUNT(*) OVER (PARTITION BY ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL)
FROM sysdba.CONTACT
)
SELECT ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL, CONTACTID
FROM cteCONTACT
WHERE CNT > 1;
And the following query should return duplicates only, without records that they duplicates are:
;WITH cteCONTACT
AS (
SELECT ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL, CONTACTID,
NUM = ROW_NUMBER() OVER (
PARTITION BY ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL
ORDER BY CONTACTID)
FROM sysdba.CONTACT
)
SELECT ACCOUNTID, FIRSTNAME, LASTNAME, EMAIL, CONTACTID
FROM cteCONTACT
WHERE NUM > 1;