Update Column 1 Based on time and repeating value in Column B

Update Column 1 Based on time and repeating value in Column B - tsql

Table A: ID (Identity) Name State StateTimestamp Course
1 ABC C 1/1/2001 ?
2 ABC A 1/5/2001 OO
3 ABC B 2/3/2001 OO
4 ABC A 2/4/2001 PP
5 ABC D 2/5/2001 PP
6 ABC A 2/12/2001 QQ
7 ABC A 2/18/2001 RR
8 ABC z 2/20/2001 ?
9 XYZ C 1/1/2001 ?
10 XYZ A 1/14/2001 ?
11 XYZ D 1/16/2001 ?
12 XYZ A 1/17/2001 ?
13 XYZ z 1/31/2001 ?
ID is a unique column. Each state belongs to name and has timestamp associated with it in an incremental order.
Update the course for each row based on the State-A, such that when state A occurs first time for a Name Update
Course to 'OO', when state A occurs 2nd time for a Name update course 'PP' and When state C occurs 3rd time update
course to 'QQ'. And the number of repeatation of State A can be infinite.
And Course for all of the states having date > State A and date < consequitive State A - i.e After 1 Pending state
and before following pending state (i.e B,2/3/2001 in the above table ) has to be updated same as the course for
precedding A state (i.e. OO). Your ideas will be much appreciated.
Create table As:
CREATE TABLE TABLEA
(
ID Identity 1,1
, Name varchar(155)
, State varchar(155)
, StateTimestamp Datetime
, Course varchar(155)
)
Populate As:
INSERT INTO TABLEA VALUES
( 'ABC' ,'C', '1/1/2001' , '? ')
,( 'ABC' ,'A', '1/5/2001' , '?')
,( 'ABC' ,'B', '2/3/2001' , '?')
,( 'ABC' ,'A', '2/4/2001' , '?')
,( 'ABC' ,'D', '2/5/2001' , '?')
,( 'ABC' ,'A', '2/12/2001' , '?')
,( 'ABC' ,'A', '2/18/2001' , '?')
,( 'ABC' ,'z', '2/20/2001' , '? ')
,( 'XYZ' ,'C', '1/1/2001' , '? ')
,( 'XYZ' ,'A', '1/14/2001' , '? ')
,( 'XYZ' ,'D', '1/16/2001' , '? ')
,( 'XYZ' ,'A', '1/17/2001' , '? ')
,( 'XYZ' ,'z', '1/31/2001' , '? ')
Desired Result:
Table A:
ID (Identity) Name State StateTimestamp Course
1 ABC C 1/1/2001 ?
2 ABC A 1/5/2001 OO
3 ABC B 2/3/2001 OO
4 ABC A 2/4/2001 PP
5 ABC D 2/5/2001 PP
6 ABC A 2/12/2001 QQ
7 ABC A 2/18/2001 RR
8 ABC z 2/20/2001 RR
9 XYZ C 1/1/2001 XX
10 XYZ A 1/14/2001 YY
11 XYZ D 1/16/2001 YY
12 XYZ A 1/17/2001 ZZ
13 XYZ z 1/31/2001 ZZ

Related

Display column values in a row in DB2

Is there a way to display some column value in a row followed by the remaining columns ? I have written the below query and it gives 3 rows as output however it should bring only 1 output.
select asset.assetnum, asset.serialnum, assetspec.alnvalue , assetspec.assetattrid
from asset
left outer join assetspec on asset.assetnum = assetspec.assetnum
and asset.classstructureid = assetspec.classstructureid
where asset.assetnum='100' AND assetspec.ASSETATTRID IN ('XXX','YYY','ZZZ')
OUTPUT
ASSETNUM SERIALNUM ALNVALUE ASSETATTRID
100 123 A XXX
100 123 B YYY
100 123 C ZZZ
EXPECTED OUTPUT
ASSETNUM SERIALNUM XXX YYY ZZZ
100 123 A B C

Try this:
with tab (ASSETNUM, SERIALNUM, ALNVALUE, ASSETATTRID) as (values
(100, 123, 'A', 'XXX')
, (100, 123, 'B', 'YYY')
, (100, 123, 'C', 'ZZZ')
)
select ASSETNUM, SERIALNUM
, max(case ASSETATTRID when 'XXX' then ALNVALUE end) as XXX
, max(case ASSETATTRID when 'YYY' then ALNVALUE end) as YYY
, max(case ASSETATTRID when 'ZZZ' then ALNVALUE end) as ZZZ
from tab
group by ASSETNUM, SERIALNUM;

Improve performance on CTE with sub-queries

I have a table with this structure:
WorkerID Value GroupID Sequence Validity
1 '20%' 1 1 2018-01-01
1 '10%' 1 1 2017-06-01
1 'Yes' 1 2 2017-06-01
1 '2018-01-01' 2 1 2017-06-01
1 '17.2' 2 2 2017-06-01
2 '10%' 1 1 2017-06-01
2 'No' 1 2 2017-06-01
2 '2016-03-01' 2 1 2017-06-01
2 '15.9' 2 2 2017-06-01
This structure was created so that the client can create customized data for a worker. For example Group 1 can be something like "Salary" and Sequence is one value that belongs to that Group like "Overtime Compensation". The column Value is a VARCHAR(150) field and the correct validation and conversation is done in another part of the application.
The Validity column exist mainly for historical reasons.
Now I would like to show, for the different workers, the information in a grid where each row should be one worker (displaying the one with the most recent Validity):
Worker 1_1 1_2 2_1 2_2
1 20% Yes 2018-01-01 17.2
2 10% No 2016-03-01 15.9
To accomplish this I created a CTE that looks like this:
WITH CTE_worker_grid
AS
(
SELECT
worker,
/* 1 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 1_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 1_2,
/* 2 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 2_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 2_2
)
GO
This produces the correct result but it's very slow as it creates this grid for over 18'000 worker with almost 30 Groups and up to 20 Sequences in each Group.
How could one speed up the process of a CTE of this magnitude? Should CTE even be used? Can the sub-queries be changed or re-factored out to speed up the execution?

Use a PIVOT!
+----------+---------+---------+------------+---------+
| WorkerId | 001_001 | 001_002 | 002_001 | 002_002 |
+----------+---------+---------+------------+---------+
| 1 | 20% | Yes | 2018-01-01 | 17.2 |
| 2 | 10% | No | 2016-03-01 | 15.9 |
+----------+---------+---------+------------+---------+
SQL Fiddle: http://sqlfiddle.com/#!18/6e768/1
CREATE TABLE WorkerAttributes
(
WorkerID INT NOT NULL
, [Value] VARCHAR(50) NOT NULL
, GroupID INT NOT NULL
, [Sequence] INT NOT NULL
, Validity DATE NOT NULL
)
INSERT INTO WorkerAttributes
(WorkerID, Value, GroupID, Sequence, Validity)
VALUES
(1, '20%', 1, 1, '2018-01-01')
, (1, '10%', 1, 1, '2017-06-01')
, (1, 'Yes', 1, 2, '2017-06-01')
, (1, '2018-01-01', 2, 1, '2017-06-01')
, (1, '17.2', 2, 2, '2017-06-01')
, (2, '10%', 1, 1, '2017-06-01')
, (2, 'No', 1, 2, '2017-06-01')
, (2, '2016-03-01', 2, 1, '2017-06-01')
, (2, '15.9', 2, 2, '2017-06-01')
;WITH CTE_WA_RANK
AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY WorkerID, GroupID, [Sequence] ORDER BY Validity DESC) AS VersionNumber
, WA.WorkerID
, WA.GroupID
, WA.[Sequence]
, WA.[Value]
FROM
WorkerAttributes AS WA
),
CTE_WA
AS
(
SELECT
WA_RANK.WorkerID
, RIGHT('000' + CAST(WA_RANK.GroupID AS VARCHAR(3)), 3)
+ '_'
+ RIGHT('000' + CAST(WA_RANK.[Sequence] AS VARCHAR(3)), 3) AS SMART_KEY
, WA_RANK.[Value]
FROM
CTE_WA_RANK AS WA_RANK
WHERE
WA_RANK.VersionNumber = 1
)
SELECT
WorkerId
, [001_001] AS [001_001]
, [001_002] AS [001_002]
, [002_001] AS [002_001]
, [002_002] AS [002_002]
FROM
(
SELECT
CTE_WA.WorkerId
, CTE_WA.SMART_KEY
, CTE_WA.[Value]
FROM
CTE_WA
) AS WA
PIVOT
(
MAX([Value])
FOR
SMART_KEY IN
(
[001_001]
, [001_002]
, [002_001]
, [002_002]
)
) AS PVT

SELECT, format rows with different columns into a single row that share an ID

I'm trying to format one SELECT statement so that it outputs a resultset with combined values over a few columns.
I have a resultset like this:
ID VID PID VALUE
1 x 1 a
2 y 1 A
3 y 2 B
4 x 2 b
5 y 3 C
6 x 3 c
7 x 4 d
8 y 4 D
9 x 5 e
10 y 5 E
Can I format one SELECT statement to effectively join the values with duplicate PIDs into a single row? I'm only really interested in PID and VALUE, e.g.
PID VALUE1 VALUE2
1 a A
2 b B
3 c C
4 d D
5 e E
Otherwise, should I be using actual JOINs with queries acting on the same table?
I tried to use CASE but can get up to a resultset like this:
ID VID PID VALUE1 VALUE2
1 x 1 a NULL
2 y 1 NULL A
3 y 2 NULL B
4 x 2 b NULL
5 y 3 NULL C
6 x 3 c NULL
7 x 4 d NULL
8 y 4 NULL D
9 x 5 e NULL
10 y 5 NULL E
The query I'm using looks somewhat like this.
SELECT
ID,
VID,
PID,
CASE WHEN VID = 'x' THEN VALUE END VALUE1,
CASE WHEN VID = 'y' THEN VALUE END VALUE2
FROM BIGTABLE
WHERE PID IN (1, 2, 3, 4, 5)
AND VID IN ('x', 'y')
There's a lot of values of PID and VID that aren't just 1-5 and x & y so I'm selecting them that way from the whole table.

Do you mean like this? It's called "conditional aggregation."
with
resultset ( id, vid, pid, value ) as (
select 1, 'x', 1, 'a' from dual union all
select 2, 'y', 1, 'A' from dual union all
select 3, 'y', 2, 'B' from dual union all
select 4, 'x', 2, 'b' from dual union all
select 5, 'y', 3, 'C' from dual union all
select 6, 'x', 3, 'c' from dual union all
select 7, 'x', 4, 'd' from dual union all
select 8, 'y', 4, 'D' from dual union all
select 9, 'x', 5, 'e' from dual union all
select 10, 'y', 5, 'E' from dual
)
-- End of simulated resultset (for testing purposes only, not part of the solution).
-- SQL query begins below this line.
select pid,
min(case when vid = 'x' then value end) as value1,
min(case when vid = 'y' then value end) as value2
from resultset
-- WHERE conditions, if any are needed - as in your attempt
group by pid
order by pid
;
PID VALUE1 VALUE2
--- ------ ------
1 a A
2 b B
3 c C
4 d D
5 e E

Postgresql: only keep unique values from integer array

Let's say I have an array of integers
1 6 6 3 3 8 4 4
It will be always of the form n*(pairs of number) + 2 (unique numbers).
Is there an efficient way of keeping only the 2 uniques values (i.e. the 2 with single occurence)?
Here, I would like to get 1 and 8.
So far is what I have:
SELECT node_id
FROM
( SELECT node_id, COUNT(*)
FROM unnest(array[1, 6, 6 , 3, 3 , 8 , 4 ,4]) AS node_id
GROUP BY node_id
) foo
ORDER BY count LIMIT 2;

You are very close, I think:
SELECT node_id
FROM (SELECT node_id, COUNT(*)
FROM unnest(array[1, 6, 6 , 3, 3 , 8 , 4 ,4]) AS node_id
GROUP BY node_id
HAVING count(*) = 1
) foo ;
You can group these back into an array, if you like, using array_agg().

question with a query

Table1
sub-id ref-id Name
1 1 Project 1
2 1 Project 2
3 2 Project 3
4 2 Project 4
Table2
sub-id ref-id log_stamp Recepient log_type
----------------------------------------------------
1 1 06/06/2011 person A 1
1 1 06/14/2011 person B 2
1 1 06/16/2011 person C 2
1 1 06/17/2011 person D 3
2 1 06/18/2011 person E 2
2 1 06/19/2011 person F 2
3 2 06/20/2011 person G 1
4 2 06/23/2011 person H 3
Result
Name ref-id start_date Recepient latest_comment Recepient completion_date Receipient
Project1 1 06/06/2011 person A 06/19/2011 person F 06/17/2011 person D
Project3 2 06/20/2011 person G NULL NULL 06/23/2011 person H
log_type of 1 stands for start_date
log_type of 2 stands for latest_comment
log_type of 3 stands for completion_date
The Name of the project is just the name of the top-most name in the same group of ref-id
have tried this for now
;with T as (select
Table2.ref-id,
Table2.log_stamp,
Table2 log.log_type
when 1 then '1'
when 2 then '2'
when 3 then '3'
end as title
from
Submission sb inner join submission_log log on Table1.[sub-id] = Table2.[sub-id]
)
select * from T
pivot (
max(log_stamp)
for title IN ([1],[2],[3],[5],[6],[9],[11])

I was unable to do it as a pivot, I dont think it is possible as described
DECLARE #table1 TABLE (sub_id INT, ref_id INT, name VARCHAR(50))
INSERT #table1 VALUES (1, 1, 'Project 1')
INSERT #table1 VALUES (2, 1, 'Project 2')
INSERT #table1 VALUES (3, 2, 'Project 3' )
INSERT #table1 VALUES (4, 2, 'Project 4')
DECLARE #Table2 TABLE (sub_id INT, ref_id INT, log_stamp DATETIME, recepient VARCHAR(10), logtype INT)
INSERT #table2 VALUES(1,1,'06/06/2011','person A',1)
INSERT #table2 VALUES(1,1,'06/14/2011','person B',2)
INSERT #table2 VALUES(1,1,'06/16/2011','person C',2)
INSERT #table2 VALUES(1,1,'06/17/2011','person D',3)
INSERT #table2 VALUES(2,1,'06/18/2011','person E',2)
INSERT #table2 VALUES(2,1,'06/19/2011','person F',2)
INSERT #table2 VALUES(3,2,'06/20/2011','person G',1)
INSERT #table2 VALUES(3,2,'06/23/2011','person H',3)
;WITH a as (
SELECT RN = ROW_NUMBER() OVER (PARTITION BY t1.sub_id, t1.ref_id, t1.name, t2.logtype ORDER BY log_stamp DESC), t1.sub_id, t1.ref_id, t1.name, t2.Recepient , t2.logtype ,log_stamp
FROM #table1 t1 JOIN #table2 t2 ON t1.ref_id = t2.ref_id AND
t1.sub_id = t2.sub_id),
b as (SELECT * FROM a WHERE RN = 1)
SELECT b1.name, b1.ref_id,b1.log_stamp start_date , b1.Recepient, b2.log_stamp latest_comment , b2.Recepient, b3.log_stamp completion_date , b3.Recepient
FROM b b1
LEFT JOIN b b2 ON b1.sub_id=b2.sub_id AND b1.ref_id = b2.ref_id AND b2.logtype = 2
LEFT JOIN b b3 ON b1.sub_id=b3.sub_id AND b1.ref_id = b3.ref_id AND b3.logtype = 3
WHERE b1.logtype = 1
Result:
name ref_id start_date Recepient latest_comment Recepient completion_date Recepient
------------ ----------- ----------------------- ---------- ----------------------- ---------- ----------------------- ----------
Project 1 1 2011-06-06 00:00:00.000 person A 2011-06-16 00:00:00.000 person C 2011-06-17 00:00:00.000 person D
Project 3 2 2011-06-20 00:00:00.000 person G NULL NULL 2011-06-23 00:00:00.000 person H