TSQL combine multiple records - tsql

I have a table that contains:
Code ID1 ID2
Blue 1 2
Blue 3
Blue 4
Green 1 5
Green 10
Green 12
I need the result set:
Code ID1 ID2
Blue 1 2
Blue 1 3
Blue 1 4
Blue 1 5
Green 1 5
Green 1 10
Green 1 12
The number in ID1 does not always start with a 1.
Any ideas?? I am stumped!

Drop Table T1
Create table T1( Code varchar(10), Id1 int, Id2 int )
Insert T1 Values
('Blue', 11, 2),
('Blue', 12, 3),
('Blue', NULL, 4),
('Blue', 15, 5),
('Blue', NULL, 6),
('Green', 23, 21),
('Green', 24, 24),
('Green',NULL, 25),
('Green',NULL, 28),
('Green', 27, 29),
('Red', 35, 33),
('Red', NULL, 36),
('Red', NULL, 38)
SELECT
A.Code, ISNULL(A. Id1,B.LastId) Id1 , A.Id2
FROM T1 A
CROSS APPLY(
Select Top 1 Id1 as LastId
From T1
where A.Code = T1.Code and Id2 <= A.Id2
order by id1 desc
) B

Related

Tree structure in tsql - How get data by the special class in branches

I have structure like that(as example):
ID ClassId Name Parent
--------------------------------------
1 12 Boss
2 13 Manager1 1
3 13 Manager2 1
4 13 Manager3 1
5 14 SubManager1 3
6 15 UnderSubManager1 5
7 16 Worker1 2
8 16 Worker2 6
9 14 SubManager2 4
10 16 Worker3 9
Than, we have this:
Boss->Manager1->Worker1
Boss->Manager2->SubManager1->UnderSubManager1->Worker2
Boss->Manager3->SubManager2->Worker3
I need query, that give me a this reult:
Boss->Manager1->worker1
Boss->Manager2->worker2
Boss->Manager3->worker3
I try do this witch CTE using ClassId but with poor result :(
Assuming you want to show the 2 top levels (Boss, and ManagerX), and then the lowest level (WorkerX) -
create table #tmp (ID int, ClassID int, Name varchar(32), Parent int)
go
insert into #tmp (ID, ClassID, Name, Parent)
values
(1, 12, 'Boss', null)
, (2, 13, 'Manager1', 1)
, (3, 13, 'Manager2', 1)
, (4, 13, 'Manager3', 1)
, (5, 14, 'SubManager1', 2)
, (6, 15, 'UnderSubManager1', 5)
, (7, 16, 'Worker1', 2)
, (8, 16, 'Worker2', 6)
, (9, 14, 'SubManager2', 4)
, (10, 16, 'Worker3', 9)
go
with cte as (
select t.ID, t.ClassID, t.Name, t.Parent
, Path = cast(case when t.ClassID in (12, 13) then t.Name else '' end as varchar(max))
, NestLevel = 0
, IsWorker = case t.ClassID when 16 then 1 else 0 end
from #tmp t
where t.Parent is null
union all
select t.ID, t.ClassID, t.Name, t.Parent
, Path = cte.Path + cast(case when t.ClassID in (12, 13, 16) then '->' + t.Name else '' end as varchar(max))
, NestLevel = cte.NestLevel + 1
, IsWorker = case t.ClassID when 16 then 1 else 0 end
from #tmp t
inner join cte on t.Parent = cte.ID
)
select cte.Path
from cte
where cte.IsWorker = 1
order by cte.Path
drop table #tmp
go
The result:
Boss->Manager1->Worker1
Boss->Manager1->Worker2
Boss->Manager3->Worker3

Improve performance on CTE with sub-queries

I have a table with this structure:
WorkerID Value GroupID Sequence Validity
1 '20%' 1 1 2018-01-01
1 '10%' 1 1 2017-06-01
1 'Yes' 1 2 2017-06-01
1 '2018-01-01' 2 1 2017-06-01
1 '17.2' 2 2 2017-06-01
2 '10%' 1 1 2017-06-01
2 'No' 1 2 2017-06-01
2 '2016-03-01' 2 1 2017-06-01
2 '15.9' 2 2 2017-06-01
This structure was created so that the client can create customized data for a worker. For example Group 1 can be something like "Salary" and Sequence is one value that belongs to that Group like "Overtime Compensation". The column Value is a VARCHAR(150) field and the correct validation and conversation is done in another part of the application.
The Validity column exist mainly for historical reasons.
Now I would like to show, for the different workers, the information in a grid where each row should be one worker (displaying the one with the most recent Validity):
Worker 1_1 1_2 2_1 2_2
1 20% Yes 2018-01-01 17.2
2 10% No 2016-03-01 15.9
To accomplish this I created a CTE that looks like this:
WITH CTE_worker_grid
AS
(
SELECT
worker,
/* 1 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 1_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 1
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 1_2,
/* 2 */
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 1
ORDER BY w.Validity DESC
) AS 2_1,
(
SELECT top 1 w.Value
FROM worker_values AS w
WHERE w.GroupID = 2
AND w.Sequence = 2
ORDER BY w.Validity DESC
) AS 2_2
)
GO
This produces the correct result but it's very slow as it creates this grid for over 18'000 worker with almost 30 Groups and up to 20 Sequences in each Group.
How could one speed up the process of a CTE of this magnitude? Should CTE even be used? Can the sub-queries be changed or re-factored out to speed up the execution?
Use a PIVOT!
+----------+---------+---------+------------+---------+
| WorkerId | 001_001 | 001_002 | 002_001 | 002_002 |
+----------+---------+---------+------------+---------+
| 1 | 20% | Yes | 2018-01-01 | 17.2 |
| 2 | 10% | No | 2016-03-01 | 15.9 |
+----------+---------+---------+------------+---------+
SQL Fiddle: http://sqlfiddle.com/#!18/6e768/1
CREATE TABLE WorkerAttributes
(
WorkerID INT NOT NULL
, [Value] VARCHAR(50) NOT NULL
, GroupID INT NOT NULL
, [Sequence] INT NOT NULL
, Validity DATE NOT NULL
)
INSERT INTO WorkerAttributes
(WorkerID, Value, GroupID, Sequence, Validity)
VALUES
(1, '20%', 1, 1, '2018-01-01')
, (1, '10%', 1, 1, '2017-06-01')
, (1, 'Yes', 1, 2, '2017-06-01')
, (1, '2018-01-01', 2, 1, '2017-06-01')
, (1, '17.2', 2, 2, '2017-06-01')
, (2, '10%', 1, 1, '2017-06-01')
, (2, 'No', 1, 2, '2017-06-01')
, (2, '2016-03-01', 2, 1, '2017-06-01')
, (2, '15.9', 2, 2, '2017-06-01')
;WITH CTE_WA_RANK
AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY WorkerID, GroupID, [Sequence] ORDER BY Validity DESC) AS VersionNumber
, WA.WorkerID
, WA.GroupID
, WA.[Sequence]
, WA.[Value]
FROM
WorkerAttributes AS WA
),
CTE_WA
AS
(
SELECT
WA_RANK.WorkerID
, RIGHT('000' + CAST(WA_RANK.GroupID AS VARCHAR(3)), 3)
+ '_'
+ RIGHT('000' + CAST(WA_RANK.[Sequence] AS VARCHAR(3)), 3) AS SMART_KEY
, WA_RANK.[Value]
FROM
CTE_WA_RANK AS WA_RANK
WHERE
WA_RANK.VersionNumber = 1
)
SELECT
WorkerId
, [001_001] AS [001_001]
, [001_002] AS [001_002]
, [002_001] AS [002_001]
, [002_002] AS [002_002]
FROM
(
SELECT
CTE_WA.WorkerId
, CTE_WA.SMART_KEY
, CTE_WA.[Value]
FROM
CTE_WA
) AS WA
PIVOT
(
MAX([Value])
FOR
SMART_KEY IN
(
[001_001]
, [001_002]
, [002_001]
, [002_002]
)
) AS PVT

Summing arrays in conjunction with GROUP BY

I've got some periodic counter data (like once a second) from different objects that I wish to combine into an hourly total.
If I do it with separate column names, it's pretty straightforward:
CREATE TABLE ts1 (
id INTEGER,
ts TIMESTAMP,
count0 integer,
count1 integer,
count2 integer
);
INSERT INTO ts1 VALUES
(1, '2017-12-07 10:37:48', 10, 20, 50),
(2, '2017-12-07 10:37:48', 13, 7, 88),
(1, '2017-12-07 10:37:49', 12, 23, 34),
(2, '2017-12-07 10:37:49', 11, 13, 46),
(1, '2017-12-07 10:37:50', 8, 33, 80),
(2, '2017-12-07 10:37:50', 9, 3, 47),
(1, '2017-12-07 10:37:51', 17, 99, 7),
(2, '2017-12-07 10:37:51', 9, 23, 96);
SELECT id, date_trunc('hour', ts + '1 hour') nts,
sum(count0), sum(count1), sum(count2)
FROM ts1 GROUP BY id, nts;
id | nts | sum | sum | sum
----+---------------------+-----+-----+-----
1 | 2017-12-07 11:00:00 | 47 | 175 | 171
2 | 2017-12-07 11:00:00 | 42 | 46 | 277
(2 rows)
The problem is that different objects have different numbers of counts (though each particular object's rows -- ones sharing the same ID -- all have the same number of counts). Hence I want to use an array.
The corresponding table looks like this:
CREATE TABLE ts2 (
id INTEGER,
ts TIMESTAMP,
counts INTEGER[]
);
INSERT INTO ts2 VALUES
(1, '2017-12-07 10:37:48', ARRAY[10, 20, 50]),
(2, '2017-12-07 10:37:48', ARRAY[13, 7, 88]),
(1, '2017-12-07 10:37:49', ARRAY[12, 23, 34]),
(2, '2017-12-07 10:37:49', ARRAY[11, 13, 46]),
(1, '2017-12-07 10:37:50', ARRAY[8, 33, 80]),
(2, '2017-12-07 10:37:50', ARRAY[9, 3, 47]),
(1, '2017-12-07 10:37:51', ARRAY[17, 99, 7]),
(2, '2017-12-07 10:37:51', ARRAY[9, 23, 96]);
I have looked at this answer https://stackoverflow.com/a/24997565/1076479 and I get the general gist of it, but I cannot figure out how to get the correct rows summed together when I try to combine it with the grouping by id and timestamp.
For example, with this I get all the rows, not just the ones with matching id and timestamp:
SELECT id, date_trunc('hour', ts + '1 hour') nts, ARRAY(
SELECT sum(elem) FROM ts2 t, unnest(t.counts)
WITH ORDINALITY x(elem, rn) GROUP BY rn ORDER BY rn
) FROM ts2 GROUP BY id, nts;
id | nts | array
----+---------------------+--------------
1 | 2017-12-07 11:00:00 | {89,221,448}
2 | 2017-12-07 11:00:00 | {89,221,448}
(2 rows)
FWIW, I'm using postgresql 9.6
The problem with you original query is that you're summing all elements, because GROUP BY id, nts is executed in outer query. Combining a CTE with LATERAL JOIN would do the trick:
WITH tmp AS (
SELECT
id,
date_trunc('hour', ts + '1 hour') nts,
sum(elem) AS counts
FROM
ts2
LEFT JOIN LATERAL unnest(counts) WITH ORDINALITY x(elem, rn) ON TRUE
GROUP BY
id, nts, rn
)
SELECT id, nts, array_agg(counts) FROM tmp GROUP BY id, nts

PostgreSQL, mixing SUM horizontaly and vertically

I have a temporary table which is result of previously heavy combined data from which I have to create html document to show.
This table in short illustrates situation:
DROP TABLE IF EXISTS temp11;
CREATE TABLE temp11 (t_idx int PRIMARY KEY, mydate text, myclass int, mypercent double precision, valpercent double precision, valclass double precision);
INSERT INTO temp11
(t_idx, mydate, myclass, mypercent, valpercent, valclass) VALUES
(1, '01.01.2014', 1, 10, 10, 1),
(2, '01.01.2014', 2, 20, 20, 4),
(3, '01.01.2014', 2, 20, 50, 10),
(4, '01.01.2014', 1, 10, 17, 1.7),
(5, '02.01.2014', 2, 20, 40, 8),
(6, '02.01.2014', 1, 10, 18, 1.8),
(7, '02.01.2014', 2, 20, 50, 10),
(8, '03.01.2014', 1, 10, 10, 1),
(9, '03.01.2014', 2, 20, 40, 8),
(10, '03.01.2014', 1, 10, 20, 2),
(11, '03.01.2014', 2, 20, 30, 6);
Now I have a query for grouping and summing that into dates and valclasses:
SELECT mydate, myclass, mypercent,
SUM(valpercent) AS sumvalpercent,
SUM(valclass) AS sumvalclass,
SUM(valpercent+valclass) AS sum_row
FROM temp11
GROUP BY mydate, myclass, mypercent
ORDER BY mydate;
Result of this query is expectable:
"01.01.2014" 2 20 70 14.0 84.0
"01.01.2014" 1 10 27 2.7 29.7
"02.01.2014" 1 10 18 1.8 19.8
"02.01.2014" 2 20 90 18.0 108.0
"03.01.2014" 2 20 70 14.0 84.0
"03.01.2014" 1 10 30 3.0 33.0
But needs are a bit extended.
Is it possible to do with PostgreSQL that in same process after every date I get vertically SUM of data inside that date and after all, at the end, SUM of data from all dates so result will look like this:
"01.01.2014" 2 20 70 14.0 84.0
"01.01.2014" 1 10 27 2.7 29.7
97 16.7 113.7
"02.01.2014" 1 10 18 1.8 19.8
"02.01.2014" 2 20 90 18.0 108.0
108 19.8 127.8
"03.01.2014" 2 20 70 14.0 84.0
"03.01.2014" 1 10 30 3.0 33.0
100 17.0 117.0
305 53.5 358.5
If this is possible such (or similar), how that query should look like with showed data?
The simplest way I can think of is to use UNION ALL to get all the desired output at once.
If you leave out the fact that the dates are shown (needed for the order by clause) this query gives the requested output in the simplest way.
SELECT mydate, myclass, mypercent,
SUM(valpercent) AS sumvalpercent,
SUM(valclass) AS sumvalclass,
SUM(valpercent+valclass) AS sum_row
FROM temp11
GROUP BY mydate, myclass, mypercent
UNION ALL
SELECT mydate || ' total', null, null,
SUM(valpercent) AS sumvalpercent,
SUM(valclass) AS sumvalclass,
SUM(valpercent+valclass) AS sum_row
FROM temp11
GROUP BY mydate
UNION ALL
SELECT 'Total', null, null,
SUM(valpercent) AS sumvalpercent,
SUM(valclass) AS sumvalclass,
SUM(valpercent+valclass) AS sum_row
FROM temp11
ORDER BY mydate;
Here's a fiddle
Perhaps it can be rewritten more elegantly using WITH
EDIT:
This will be more efficient because it only traverses through temp11 table just once. Then it only uses the temporary table temp100 which has much fewer rows for the additional totals (no more than one row per day). The UNIONs still remain and the logic is still the same.
WITH temp100 (mydate,myclass,mypercent, sumvalpercent,sumvalclass,sum_row) as (
SELECT mydate, myclass, mypercent,
SUM(valpercent) AS sumvalpercent,
SUM(valclass) AS sumvalclass,
SUM(valpercent+valclass) AS sum_row
FROM temp11
GROUP BY mydate, myclass, mypercent
)
SELECT mydate,myclass,mypercent, sumvalpercent,sumvalclass,sum_row
FROM temp100
UNION ALL
SELECT mydate || ' total' as mydate, null, null, SUM(sumvalpercent), SUM(sumvalclass), SUM(sum_row)
FROM temp100
GROUP BY mydate
UNION ALL
SELECT 'Total' as mydate, null, null, SUM(sumvalpercent), SUM(sumvalclass), SUM(sum_row)
FROM temp100
ORDER BY mydate;
This is the fiddle

T-SQL pulling multiple columns of data from a single column field

I am trying to pull 3 columns of data from one field. basically i have a field with for arguments sake a table with the following data:
Color,
Model,
Year of a car.
It is itemized as ID4 is Color, ID5 is Model and ID6 is Year. I can pull one data set with no problem using a filter, ex. Filter = 4, 5 or 6. But I cannot pull multiples as I just get the headers and no data at all.
Assuming you are using SQL Server 2005+, and your question really is "how do you break one column in a table into multiple named columns based on another field in the same table", here is a simple example patterned after your question.
Give this dataset:
declare #tbl table (id int, tag char(3), data varchar(255))
insert into #tbl values
(1, 'ID4', 'Red'), (1, 'ID5', 'Toyota'), (1, 'ID6', '1999'),
(2, 'ID4', 'Blue'), (2, 'ID5', 'Honda'), (2, 'ID6', '2000'),
(3, 'ID4', 'Green'), (3, 'ID5', 'Nissan'), (3, 'ID6', '2004'),
(4, 'ID4', 'Red'), (4, 'ID5', 'Nissan'), (4, 'ID6', '1990'),
(5, 'ID4', 'Black'), (5, 'ID5', 'Toyota'), (5, 'ID6', '2002')
A simple select statement returns this data:
select * from #tbl
id tag data
1 ID4 Red
1 ID5 Toyota
1 ID6 1999
2 ID4 Blue
2 ID5 Honda
2 ID6 2000
3 ID4 Green
3 ID5 Nissan
3 ID6 2004
4 ID4 Red
4 ID5 Nissan
4 ID6 1990
5 ID4 Black
5 ID5 Toyota
5 ID6 2002
This pivot query returns the data -- one row per car -- with Color, Model and Year as their own columns:
select id, [ID4] as 'Color', [ID5] as 'Model', [ID6] as 'Year'
from (select id, tag, data from #tbl) as p
pivot (max(data) for tag in ([ID4], [ID5], [ID6])) as pvt
order by pvt.id
This is how the output looks:
id Color Model Year
1 Red Toyota 1999
2 Blue Honda 2000
3 Green Nissan 2004
4 Red Nissan 1990
5 Black Toyota 2002