Improve performance on CTE with sub-queries - tsql

I have a table with this structure:
WorkerID Value GroupID Sequence Validity
1 '20%' 1 1 2018-01-01
1 '10%' 1 1 2017-06-01
1 'Yes' 1 2 2017-06-01
1 '2018-01-01' 2 1 2017-06-01
1 '17.2' 2 2 2017-06-01
2 '10%' 1 1 2017-06-01
2 'No' 1 2 2017-06-01
2 '2016-03-01' 2 1 2017-06-01
2 '15.9' 2 2 2017-06-01
This structure was created so that the client can create customized data for a worker. For example Group 1 can be something like "Salary" and Sequence is one value that belongs to that Group like "Overtime Compensation". The column Value is a VARCHAR(150) field and the correct validation and conversation is done in another part of the application.
The Validity column exist mainly for historical reasons.
Now I would like to show, for the different workers, the information in a grid where each row should be one worker (displaying the one with the most recent Validity):
Worker 1_1 1_2 2_1 2_2
1 20% Yes 2018-01-01 17.2
2 10% No 2016-03-01 15.9
To accomplish this I created a CTE that looks like this:
WITH CTE_worker_grid
AS
(
SELECT
worker,
/* 1 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 1_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 1_2,
/* 2 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 2_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 2_2
)
GO
This produces the correct result but it's very slow as it creates this grid for over 18'000 worker with almost 30 Groups and up to 20 Sequences in each Group.
How could one speed up the process of a CTE of this magnitude? Should CTE even be used? Can the sub-queries be changed or re-factored out to speed up the execution?

Use a PIVOT!
+----------+---------+---------+------------+---------+
| WorkerId | 001_001 | 001_002 | 002_001 | 002_002 |
+----------+---------+---------+------------+---------+
| 1 | 20% | Yes | 2018-01-01 | 17.2 |
| 2 | 10% | No | 2016-03-01 | 15.9 |
+----------+---------+---------+------------+---------+
SQL Fiddle: http://sqlfiddle.com/#!18/6e768/1
CREATE TABLE WorkerAttributes
(
WorkerID INT NOT NULL
, [Value] VARCHAR(50) NOT NULL
, GroupID INT NOT NULL
, [Sequence] INT NOT NULL
, Validity DATE NOT NULL
)
INSERT INTO WorkerAttributes
(WorkerID, Value, GroupID, Sequence, Validity)
VALUES
(1, '20%', 1, 1, '2018-01-01')
, (1, '10%', 1, 1, '2017-06-01')
, (1, 'Yes', 1, 2, '2017-06-01')
, (1, '2018-01-01', 2, 1, '2017-06-01')
, (1, '17.2', 2, 2, '2017-06-01')
, (2, '10%', 1, 1, '2017-06-01')
, (2, 'No', 1, 2, '2017-06-01')
, (2, '2016-03-01', 2, 1, '2017-06-01')
, (2, '15.9', 2, 2, '2017-06-01')
;WITH CTE_WA_RANK
AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY WorkerID, GroupID, [Sequence] ORDER BY Validity DESC) AS VersionNumber
, WA.WorkerID
, WA.GroupID
, WA.[Sequence]
, WA.[Value]
FROM
WorkerAttributes AS WA
),
CTE_WA
AS
(
SELECT
WA_RANK.WorkerID
, RIGHT('000' + CAST(WA_RANK.GroupID AS VARCHAR(3)), 3)
+ '_'
+ RIGHT('000' + CAST(WA_RANK.[Sequence] AS VARCHAR(3)), 3) AS SMART_KEY
, WA_RANK.[Value]
FROM
CTE_WA_RANK AS WA_RANK
WHERE
WA_RANK.VersionNumber = 1
)
SELECT
WorkerId
, [001_001] AS [001_001]
, [001_002] AS [001_002]
, [002_001] AS [002_001]
, [002_002] AS [002_002]
FROM
(
SELECT
CTE_WA.WorkerId
, CTE_WA.SMART_KEY
, CTE_WA.[Value]
FROM
CTE_WA
) AS WA
PIVOT
(
MAX([Value])
FOR
SMART_KEY IN
(
[001_001]
, [001_002]
, [002_001]
, [002_002]
)
) AS PVT

Related

How to write a select query for displaying data on a table in another way using Postgresql?

I want to write a select query to pick data from a table which is shown in this image below,PICTURE_1
1.Table Containing Data
and display it like this image in this link below, PICTURE_2
2.Result of the query
About the data: The first picture shows data logged into a table for 2 seconds from 3 IDs(1,2&3) having 2 sub IDs (aa&bb). Values and timestamp are also displayed in the picture. The table conatins only 3 column as shown in PICTURE_1. Could you guys help me write a query to display data in the table to get displayed as shown in the second image using Postgresql?. You can extract ID name using substring function. The language that Im using is plpgsql. Any ideas/logic also will be good.Thank you for your time.
Please try this. Here row value has been shown in column wise and also use CTE.
-- PostgreSQL(v11)
WITH cte_t AS (
SELECT LEFT(name, 1) id
, RIGHT(name, POSITION('.' IN REVERSE(name)) - 1) t_name
, value
, time_stamp
FROM test
)
SELECT id
, time_stamp :: DATE "date"
, time_stamp :: TIME "time"
, MAX(CASE WHEN t_name = 'aa' THEN value END) "aa"
, MAX(CASE WHEN t_name = 'bb' THEN value END) "bb"
FROM cte_t
GROUP BY id, time_stamp
ORDER BY date, time, id;
Please check from url https://dbfiddle.uk/?rdbms=postgres_11&fiddle=6d35047560b3f83e6c906584b23034e9
Check this query dbfiddle
with cte (name, value, timeStamp) as (values
('1.aa', 1, '2021-08-20 10:10:01'),
('2.aa', 2, '2021-08-20 10:10:01'),
('3.aa', 3, '2021-08-20 10:10:01'),
('1.bb', 4, '2021-08-20 10:10:01'),
('2.bb', 5, '2021-08-20 10:10:01'),
('3.bb', 6, '2021-08-20 10:10:01'),
('1.aa', 7, '2021-08-20 10:10:02'),
('2.aa', 8, '2021-08-20 10:10:02'),
('3.aa', 9, '2021-08-20 10:10:02'),
('1.bb', 0, '2021-08-20 10:10:02'),
('2.bb', 1, '2021-08-20 10:10:02'),
('3.bb', 2, '2021-08-20 10:10:02')
), sub_cte as (
select split_name[1] as id, split_name[2] as name, value, tt::date as date, tt::time as time from (
select
regexp_split_to_array(name, '\.') split_name,
value,
to_timestamp(timestamp, 'YYYY-MM-DD HH:MI:SS') as tt
from cte
) foo
)
select id, date, time, a.value as aa, b.value as bb from sub_cte a
left join (
select * from sub_cte where name = 'bb'
) as b using (id, date, time)
where a.name = 'aa'
Result
id | date | time | aa | bb
----+------------+----------+----+----
1 | 2021-08-20 | 10:10:01 | 1 | 4
2 | 2021-08-20 | 10:10:01 | 2 | 5
3 | 2021-08-20 | 10:10:01 | 3 | 6
1 | 2021-08-20 | 10:10:02 | 7 | 0
2 | 2021-08-20 | 10:10:02 | 8 | 1
3 | 2021-08-20 | 10:10:02 | 9 | 2
(6 rows)

Iterate through records to build hierarchy

Please consider the following tables. They describe a schools' hierarchy and the notes per student.
users
-------------------------------------------------------------
root_id obj_id obj_type obj_ref_id obj_role
-------------------------------------------------------------
1 2 student 7 learn
1 3 student 7 learn
1 1 principal 1 lead
1 4 mentor 1 train teachers
1 5 trainee 4 learn teaching
1 6 trainee 4 learn teaching
1 7 teacher 1 teach
2 8 student 9 learn
2 9 principal 9 lead
notes
--------------------------------------------------------------
note_id obj_id note
--------------------------------------------------------------
1 2 foo
2 2 bar
3 2 baz
4 3 lorem
5 8 ipsum
I need to write out the hierarchy and number of notes per user as follows:
-------------------------------------------------------------------------------------------
obj_id notes obj_path
-------------------------------------------------------------------------------------------
1 0 principal 1 (lead)
2 3 student 2 (learn) > teacher 7 (teach) > principal 1 (lead)
3 1 student 3 (learn) > teacher 7 (teach) > principal 1 (lead)
4 0 mentor 4 (train teachers) > principal 1 (lead)
5 0 trainee 5 (learn teaching) > mentor 4 (train teachers) > principal 1 (lead)
6 0 trainee 6 (learn teaching) > mentor 4 (train teachers) > principal 1 (lead)
7 0 teacher 7 (teach) > principal 1 (lead)
8 1 student 8 (learn) > principal 2 (lead)
9 0 principal 9 (lead)
For this, I understand that I need to use a loop as follows:
declare cur cursor for
select obj_id from users order by root_id
open cur
declare #obj_id int
fetch next from cur into #id
while (##FETCH_STATUS = 0)
begin
select obj_role from users where obj_id = #obj_id
fetch next from cur into #obj_id
end
close cur
deallocate cur
This is what I have until now, but I do not understand how to go from here. Can someone help me on my way?
Understand that using a cursor is going to process each individual record one by one.
Recursive CTE would be a better solution:
Sql server CTE and recursion example
CTE Recursion to get tree hierarchy
How does a Recursive CTE run, line by line?
Something like:
DECLARE #User TABLE
(
[root_id] INT
, [obj_id] INT
, [Obj_type] NVARCHAR(100)
, [obj_ref_id] INT
, [obj_role] NVARCHAR(100)
);
DECLARE #Notes TABLE
(
[note_id] INT
, [obj_id] INT
, [note] NVARCHAR(255)
);
INSERT INTO #Notes (
[note_id]
, [obj_id]
, [note]
)
VALUES ( 1, 2, 'foo ' )
, ( 2, 2, 'bar ' )
, ( 3, 2, 'baz ' )
, ( 4, 3, 'lorem' )
, ( 5, 8, 'ipsum' );
INSERT INTO #User (
[root_id]
, [obj_id]
, [Obj_type]
, [obj_ref_id]
, [obj_role]
)
VALUES ( 1, 2, 'student', 7, 'learn' )
, ( 1, 3, 'student', 7, 'learn' )
, ( 1, 1, 'principal', 1, 'lead' )
, ( 1, 4, 'mentor', 1, 'train teachers' )
, ( 1, 5, 'trainee', 4, 'learn teaching' )
, ( 1, 6, 'trainee', 4, 'learn teaching' )
, ( 1, 7, 'teacher', 1, 'teach' )
, ( 2, 8, 'student', 9, 'learn' )
, ( 2, 9, 'principal', 9, 'lead' );
WITH [Hierarchy]
AS ( SELECT [obj_id] AS [root_obj]
, [obj_ref_id] AS [root_obj_ref]
, [obj_id]
, [obj_ref_id]
, CONVERT(
NVARCHAR(MAX)
, [Obj_type] + ' ' + CONVERT(NVARCHAR, [obj_id]) + ' ('
+ [obj_role] + ')'
) AS [obj_path]
FROM #User
UNION ALL
SELECT [a].[root_obj]
, [a].[root_obj_ref]
, [b].[obj_id]
, [b].[obj_ref_id]
, [a].[obj_path] + ' > ' + [b].[Obj_type]
+ CONVERT(NVARCHAR, [b].[obj_id]) + ' (' + [b].[obj_role] + ')' AS [obj_path]
FROM [Hierarchy] [a]
INNER JOIN #User [b]
ON [b].[obj_id] = [a].[obj_ref_id]
WHERE [a].[obj_id] <> [a].[obj_ref_id] ) --Here, basically continue the recursion while the record isn't referencing itself. The final will include that self referencing record.
SELECT [Hierarchy].[root_obj] AS [obj_id]
, (
SELECT COUNT(*)
FROM #Notes
WHERE [obj_id] = [Hierarchy].[root_obj]
) AS [notes] --Here we'll go out and get the count of notes.
, [Hierarchy].[obj_path]
FROM [Hierarchy]
WHERE [Hierarchy].[obj_id] = [Hierarchy].[obj_ref_id] --Then we only went those records built up to the final record that was referencing itself.
ORDER BY [Hierarchy].[root_obj];

Postgres query : using previous dynamically created colum value in next

I'm trying to implement what I have in code as a postgres query.
The following example isn't exactly what we're trying to do but I hope it shows how I'm trying to use the value from a previously calculated row in the next.
A sample table to help me demonstrate what I'm trying to do :
test=# select * from test ;
id | field1 | field2 | field3 | score
----+--------+--------+--------+-------
1 | 1 | 3 | 2 | 1.25
2 | 1 | -1 | 1 |
3 | 2 | 1 | 5 |
4 | 3 | -2 | 4 |
Here's the query in progress:
select id,
coalesce (
score,
case when lag_field3 = 2 then 0.25*(3*field1+field2) end
) as new_score
from (
select id, field1, field2, field3, score,
lag (field3) over (order by id) as lag_field3
from test
) inner1 ;
Which returns what I want so far ...
id | new_score
----+-----------
1 | 1.25
2 | 0.5
3 |
4 |
The next iteration of the query:
select id,
coalesce (
score,
case when lag_field3 = 2 then 0.25*(3*field1+field2) end,
case when field1 = 2 then 0.75 * lag (new_score) end
) as new_score
from (
select id, field1, field2, field3, score,
lag (field3) over (order by id) as lag_field3
from test
) inner1 ;
The difference is this :
case when field1 = 2 then 0.75 * lag (new_score) end
I know and understand why this won't work.
I've aliased the calculated field as new_score and when field1 = 2, I want 0.75 * the previous rows new_score value.
I understand that new_score is an alias and can't be used.
Is there some way I can accomplish this? I could try to copy that expression, wrap a lag around it, alias that as something else and try to work with that but that would get very messy.
Any ideas?
Many thanks.
Postgres lets you use windows in CASE statements. Probably you were missing the OVER (ORDER BY id) part. You can also define different windows but you can't use windows in conjunction with GROUP BY. Also, it won't let you use annidate windows, so you have to write down some subqueries or CTEs.
Here's the query:
SELECT id, COALESCE(tmp_score,
CASE
WHEN field1 = 2
THEN 0.75 * LAG(tmp_score) OVER (ORDER BY id)
-- missing ELSE statement here
END
) AS new_score
FROM (
SELECT id, field1,
COALESCE (
score,
CASE
WHEN LAG(field3) OVER (ORDER BY id) = 2
THEN 0.25*(3*field1+field2)
END
) AS tmp_score
FROM test
) inner1
The code to create and populate the table:
CREATE TABLE test(
id int,
field1 int,
field2 int,
field3 int,
score numeric
);
INSERT INTO test VALUES
(1, 1, 3, 2, 1.25),
(2, 1, -1, 1, NULL),
(3, 2, 1, 5, NULL),
(4, 3, -2, 4, NULL);
The query returns this output:
id | new_score
----+-----------
1 | 1.25
2 | 0.50
3 | 0.3750
4 |

First and second time appearing row id in PostgreSQL

Suppose we have a list of ids with date. And we want to know when the ids appeared for the first and the second time. About the first time, I have created a query that is
SELECT year, mon, COUNT(id) AS sum_first_id
FROM (
SELECT DISTINCT
ON (id) DATE, id
FROM TABLE
GROUP BY 2, 1
) AS foo
GROUP BY 2, 1
ORDER BY 1, 2;
I think that this works. But how could I find when the ids appear for the second time?
Let's say you have the table table_x:
select *
from table_x
order by 1, 2
id | date
----+------------
1 | 2015-06-04
1 | 2015-06-05
1 | 2015-06-14
2 | 2015-06-05
2 | 2015-06-08
2 | 2015-06-10
2 | 2015-06-17
2 | 2015-06-22
(8 rows)
To select n first element in groups use row_number() function:
select id, date
from (
select id, date, row_number() over (partition by id order by date) rn
from table_x
order by 1, 2
) sub
where rn <= 2
id | date
----+------------
1 | 2015-06-04
1 | 2015-06-05
2 | 2015-06-05
2 | 2015-06-08
(4 rows)
It does not appear that your query is correct.
SELECT year, mon, COUNT(id) AS sum_first_id -- what is year, mon?
FROM (
SELECT DISTINCT
ON (id) DATE, id
FROM TABLE
GROUP BY 2, 1 -- should be order by 2, 1
) AS foo
GROUP BY 2, 1
ORDER BY 1, 2;

t-sql group by category and get top n values

Imagine I have this table:
Month | Person | Value
----------------------
Jan | P1 | 1
Jan | P2 | 2
Jan | P3 | 3
Feb | P1 | 5
Feb | P2 | 4
Feb | P3 | 3
Feb | P4 | 2
...
How can I build a t-sql query to get the top 2 value rows and a third with the sum of others?
Something like this:
RESULT:
Month | Person | Value
----------------------
Jan | P3 | 3
Jan | P2 | 2
Jan | Others | 1 -(sum of the bottom value - in this case (Jan, P1, 1))
Feb | P1 | 5
Feb | P2 | 4
Feb | Others | 5 -(sum of the bottom values - in this case (Feb, P3, 3) and (Feb, P4, 2))
Thanks
In the assumption you are using SQL Server 2005 or higher, using a CTE would do the trick.
Attach a ROW_NUMBER to each row, starting with the highest value, resetting for each month.
SELECT the top 2 rows for each month from this query (rownumber <= 2)
UNION with the remaining rows (rownumber > 2)
SQL Statement
;WITH Months (Month, Person, Value) AS (
SELECT 'Jan', 'P1', 1 UNION ALL
SELECT 'Jan', 'P2', 2 UNION ALL
SELECT 'Jan', 'P3', 3 UNION ALL
SELECT 'Feb', 'P1', 5 UNION ALL
SELECT 'Feb', 'P2', 4 UNION ALL
SELECT 'Feb', 'P3', 3 UNION ALL
SELECT 'Feb', 'P4', 2
),
q AS (
SELECT Month
, Person
, Value
, RowNumber = ROW_NUMBER() OVER (PARTITION BY Month ORDER BY Value DESC)
FROM Months
)
SELECT Month
, Person
, Value
FROM (
SELECT Month
, Person
, Value
, RowNumber
FROM q
WHERE RowNumber <= 2
UNION ALL
SELECT Month
, Person = 'Others'
, SUM(Value)
, MAX(RowNumber)
FROM q
WHERE RowNumber > 2
GROUP BY
Month
) q
ORDER BY
Month DESC
, RowNumber
Kudo's go to Andriy for teaching me some new tricks.
;WITH atable (Month, Person, Value) AS (
SELECT 'Jan', 'P1', 1 UNION ALL
SELECT 'Jan', 'P2', 2 UNION ALL
SELECT 'Jan', 'P3', 3 UNION ALL
SELECT 'Feb', 'P1', 5 UNION ALL
SELECT 'Feb', 'P2', 4 UNION ALL
SELECT 'Feb', 'P3', 3 UNION ALL
SELECT 'Feb', 'P4', 2
),
numbered AS (
SELECT
Month, Person, Value,
rownum = ROW_NUMBER() OVER (PARTITION BY Month ORDER BY Value DESC)
FROM atable
),
grouped AS (
SELECT
Month, Person, Value,
Grp = CASE WHEN rownum < 3 THEN rownum ELSE 3 END
FROM numbered
)
SELECT
Month,
Person = CASE Grp WHEN 3 THEN 'Others' ELSE MAX(Person) END,
Value = SUM(Value)
FROM grouped
GROUP BY Month, Grp
ORDER BY Month DESC, Grp
WITH NTable AS
(
SELECT [Month],
Person,
Value,
ROW_NUMBER() OVER (PARTITION BY [Month] ORDER BY Value DESC)
AS Rownumber
FROM MyTable
)
SELECT t.[Month],
CASE Rownumber WHEN 1 THEN t.Person WHEN 2 THEN t.Person ELSE 'Others' END As Person,
SUM(t.Value) As [Sum]
FROM NTable t
GROUP BY t.[Month], CASE Rownumber WHEN 1 THEN t.Person WHEN 2 THEN t.Person ELSE 'Others' END
ORDER BY t.[Month]