TSQL Pivot -- So Simple Yet Kicking My Butt - tsql

I cannot for the life of me figure out why my pivot is not grouping the Elementary data onto one row...what am I doing wrong? I have tried rewriting the query by not grouping the source data, grouping all the source data...is it because the pivot value is a string(varchar) value?
This is so embarrassing...
Create Table #temp
(
endYear int,
schoolLevelDescription varchar(25),
staffCt int,
staffADA decimal(5,1),
trend varchar(10),
srt int
)
Insert Into #temp Select 2018, 'Elementary', 2729, 93.2, 'red', 1
Insert Into #temp Select 2018, 'Combo', 169, 91.3, 'green', 4
Insert Into #temp Select 2018, 'High', 1467, 94.6, 'red', 3
Insert Into #temp Select 2019, 'Elementary', 61, 94.8, null, 1
Insert Into #temp Select 2018, 'Middle', 1027, 94.5, 'red', 2
Select schoolLevelDescription,
[2018] y1,
IsNull([2019], '-') y2,
IsNull([2020], '-') y3,
IsNull([2021], '-') y4,
trend
From (
Select endYear,
schoolLevelDescription,
max(cast(staffCt as varchar(5)) + ' ('
+ cast(staffADA as varchar(5)) + '%)') val,
trend,
srt
From #temp
Where endYear >= 2018
Group by endYear, schoolLevelDescription, trend, srt
) src
Pivot (
max(val)
For endYear in ([2018], [2019], [2020], [2021])
) pvt
Order by srt
Drop Table #temp

Related

pivot or reshapre sql [duplicate]

I've been tasked with coming up with a means of translating the following data:
date category amount
1/1/2012 ABC 1000.00
2/1/2012 DEF 500.00
2/1/2012 GHI 800.00
2/10/2012 DEF 700.00
3/1/2012 ABC 1100.00
into the following:
date ABC DEF GHI
1/1/2012 1000.00
2/1/2012 500.00
2/1/2012 800.00
2/10/2012 700.00
3/1/2012 1100.00
The blank spots can be NULLs or blanks, either is fine, and the categories would need to be dynamic. Another possible caveat to this is that we'll be running the query in a limited capacity, which means temp tables are out. I've tried to research and have landed on PIVOT but as I've never used that before I really don't understand it, despite my best efforts to figure it out. Can anyone point me in the right direction?
Dynamic SQL PIVOT:
create table temp
(
date datetime,
category varchar(3),
amount money
)
insert into temp values ('1/1/2012', 'ABC', 1000.00)
insert into temp values ('2/1/2012', 'DEF', 500.00)
insert into temp values ('2/1/2012', 'GHI', 800.00)
insert into temp values ('2/10/2012', 'DEF', 700.00)
insert into temp values ('3/1/2012', 'ABC', 1100.00)
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = STUFF((SELECT distinct ',' + QUOTENAME(c.category)
FROM temp c
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT date, ' + #cols + ' from
(
select date
, amount
, category
from temp
) x
pivot
(
max(amount)
for category in (' + #cols + ')
) p '
execute(#query)
drop table temp
Results:
Date ABC DEF GHI
2012-01-01 00:00:00.000 1000.00 NULL NULL
2012-02-01 00:00:00.000 NULL 500.00 800.00
2012-02-10 00:00:00.000 NULL 700.00 NULL
2012-03-01 00:00:00.000 1100.00 NULL NULL
Dynamic SQL PIVOT
Different approach for creating columns string
create table #temp
(
date datetime,
category varchar(3),
amount money
)
insert into #temp values ('1/1/2012', 'ABC', 1000.00)
insert into #temp values ('2/1/2012', 'DEF', 500.00)
insert into #temp values ('2/1/2012', 'GHI', 800.00)
insert into #temp values ('2/10/2012', 'DEF', 700.00)
insert into #temp values ('3/1/2012', 'ABC', 1100.00)
DECLARE #cols AS NVARCHAR(MAX)='';
DECLARE #query AS NVARCHAR(MAX)='';
SELECT #cols = #cols + QUOTENAME(category) + ',' FROM (select distinct category from #temp ) as tmp
select #cols = substring(#cols, 0, len(#cols)) --trim "," at end
set #query =
'SELECT * from
(
select date, amount, category from #temp
) src
pivot
(
max(amount) for category in (' + #cols + ')
) piv'
execute(#query)
drop table #temp
Result
date ABC DEF GHI
2012-01-01 00:00:00.000 1000.00 NULL NULL
2012-02-01 00:00:00.000 NULL 500.00 800.00
2012-02-10 00:00:00.000 NULL 700.00 NULL
2012-03-01 00:00:00.000 1100.00 NULL NULL
I know this question is older but I was looking thru the answers and thought that I might be able to expand on the "dynamic" portion of the problem and possibly help someone out.
First and foremost I built this solution to solve a problem a couple of coworkers were having with inconstant and large data sets needing to be pivoted quickly.
This solution requires the creation of a stored procedure so if that is out of the question for your needs please stop reading now.
This procedure is going to take in the key variables of a pivot statement to dynamically create pivot statements for varying tables, column names and aggregates. The Static column is used as the group by / identity column for the pivot(this can be stripped out of the code if not necessary but is pretty common in pivot statements and was necessary to solve the original issue), the pivot column is where the end resultant column names will be generated from, and the value column is what the aggregate will be applied to. The Table parameter is the name of the table including the schema (schema.tablename) this portion of the code could use some love because it is not as clean as I would like it to be. It worked for me because my usage was not publicly facing and sql injection was not a concern. The Aggregate parameter will accept any standard sql aggregate 'AVG', 'SUM', 'MAX' etc. The code also defaults to MAX as an aggregate this is not necessary but the audience this was originally built for did not understand pivots and were typically using max as an aggregate.
Lets start with the code to create the stored procedure. This code should work in all versions of SSMS 2005 and above but I have not tested it in 2005 or 2016 but I can not see why it would not work.
create PROCEDURE [dbo].[USP_DYNAMIC_PIVOT]
(
#STATIC_COLUMN VARCHAR(255),
#PIVOT_COLUMN VARCHAR(255),
#VALUE_COLUMN VARCHAR(255),
#TABLE VARCHAR(255),
#AGGREGATE VARCHAR(20) = null
)
AS
BEGIN
SET NOCOUNT ON;
declare #AVAIABLE_TO_PIVOT NVARCHAR(MAX),
#SQLSTRING NVARCHAR(MAX),
#PIVOT_SQL_STRING NVARCHAR(MAX),
#TEMPVARCOLUMNS NVARCHAR(MAX),
#TABLESQL NVARCHAR(MAX)
if isnull(#AGGREGATE,'') = ''
begin
SET #AGGREGATE = 'MAX'
end
SET #PIVOT_SQL_STRING = 'SELECT top 1 STUFF((SELECT distinct '', '' + CAST(''[''+CONVERT(VARCHAR,'+ #PIVOT_COLUMN+')+'']'' AS VARCHAR(50)) [text()]
FROM '+#TABLE+'
WHERE ISNULL('+#PIVOT_COLUMN+','''') <> ''''
FOR XML PATH(''''), TYPE)
.value(''.'',''NVARCHAR(MAX)''),1,2,'' '') as PIVOT_VALUES
from '+#TABLE+' ma
ORDER BY ' + #PIVOT_COLUMN + ''
declare #TAB AS TABLE(COL NVARCHAR(MAX) )
INSERT INTO #TAB EXEC SP_EXECUTESQL #PIVOT_SQL_STRING, #AVAIABLE_TO_PIVOT
SET #AVAIABLE_TO_PIVOT = (SELECT * FROM #TAB)
SET #TEMPVARCOLUMNS = (SELECT replace(#AVAIABLE_TO_PIVOT,',',' nvarchar(255) null,') + ' nvarchar(255) null')
SET #SQLSTRING = 'DECLARE #RETURN_TABLE TABLE ('+#STATIC_COLUMN+' NVARCHAR(255) NULL,'+#TEMPVARCOLUMNS+')
INSERT INTO #RETURN_TABLE('+#STATIC_COLUMN+','+#AVAIABLE_TO_PIVOT+')
select * from (
SELECT ' + #STATIC_COLUMN + ' , ' + #PIVOT_COLUMN + ', ' + #VALUE_COLUMN + ' FROM '+#TABLE+' ) a
PIVOT
(
'+#AGGREGATE+'('+#VALUE_COLUMN+')
FOR '+#PIVOT_COLUMN+' IN ('+#AVAIABLE_TO_PIVOT+')
) piv
SELECT * FROM #RETURN_TABLE'
EXEC SP_EXECUTESQL #SQLSTRING
END
Next we will get our data ready for the example. I have taken the data example from the accepted answer with the addition of a couple of data elements to use in this proof of concept to show the varied outputs of the aggregate change.
create table temp
(
date datetime,
category varchar(3),
amount money
)
insert into temp values ('1/1/2012', 'ABC', 1000.00)
insert into temp values ('1/1/2012', 'ABC', 2000.00) -- added
insert into temp values ('2/1/2012', 'DEF', 500.00)
insert into temp values ('2/1/2012', 'DEF', 1500.00) -- added
insert into temp values ('2/1/2012', 'GHI', 800.00)
insert into temp values ('2/10/2012', 'DEF', 700.00)
insert into temp values ('2/10/2012', 'DEF', 800.00) -- addded
insert into temp values ('3/1/2012', 'ABC', 1100.00)
The following examples show the varied execution statements showing the varied aggregates as a simple example. I did not opt to change the static, pivot, and value columns to keep the example simple. You should be able to just copy and paste the code to start messing with it yourself
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','sum'
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','max'
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','avg'
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','min'
This execution returns the following data sets respectively.
Updated version for SQL Server 2017 using STRING_AGG function to construct the pivot column list:
create table temp
(
date datetime,
category varchar(3),
amount money
);
insert into temp values ('20120101', 'ABC', 1000.00);
insert into temp values ('20120201', 'DEF', 500.00);
insert into temp values ('20120201', 'GHI', 800.00);
insert into temp values ('20120210', 'DEF', 700.00);
insert into temp values ('20120301', 'ABC', 1100.00);
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = (SELECT STRING_AGG(category,',') FROM (SELECT DISTINCT category FROM temp WHERE category IS NOT NULL)t);
set #query = 'SELECT date, ' + #cols + ' from
(
select date
, amount
, category
from temp
) x
pivot
(
max(amount)
for category in (' + #cols + ')
) p ';
execute(#query);
drop table temp;
There's my solution cleaning up the unnecesary null values
DECLARE #cols AS NVARCHAR(MAX),
#maxcols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT ',' + QUOTENAME(CodigoFormaPago)
from PO_FormasPago
order by CodigoFormaPago
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
select #maxcols = STUFF((SELECT ',MAX(' + QUOTENAME(CodigoFormaPago) + ') as ' + QUOTENAME(CodigoFormaPago)
from PO_FormasPago
order by CodigoFormaPago
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT CodigoProducto, DenominacionProducto, ' + #maxcols + '
FROM
(
SELECT
CodigoProducto, DenominacionProducto,
' + #cols + ' from
(
SELECT
p.CodigoProducto as CodigoProducto,
p.DenominacionProducto as DenominacionProducto,
fpp.CantidadCuotas as CantidadCuotas,
fpp.IdFormaPago as IdFormaPago,
fp.CodigoFormaPago as CodigoFormaPago
FROM
PR_Producto p
LEFT JOIN PR_FormasPagoProducto fpp
ON fpp.IdProducto = p.IdProducto
LEFT JOIN PO_FormasPago fp
ON fpp.IdFormaPago = fp.IdFormaPago
) xp
pivot
(
MAX(CantidadCuotas)
for CodigoFormaPago in (' + #cols + ')
) p
) xx
GROUP BY CodigoProducto, DenominacionProducto'
t #query;
execute(#query);
The below code provides the results which replaces NULL to zero in the output.
Table creation and data insertion:
create table test_table
(
date nvarchar(10),
category char(3),
amount money
)
insert into test_table values ('1/1/2012','ABC',1000.00)
insert into test_table values ('2/1/2012','DEF',500.00)
insert into test_table values ('2/1/2012','GHI',800.00)
insert into test_table values ('2/10/2012','DEF',700.00)
insert into test_table values ('3/1/2012','ABC',1100.00)
Query to generate the exact results which also replaces NULL with zeros:
DECLARE #DynamicPivotQuery AS NVARCHAR(MAX),
#PivotColumnNames AS NVARCHAR(MAX),
#PivotSelectColumnNames AS NVARCHAR(MAX)
--Get distinct values of the PIVOT Column
SELECT #PivotColumnNames= ISNULL(#PivotColumnNames + ',','')
+ QUOTENAME(category)
FROM (SELECT DISTINCT category FROM test_table) AS cat
--Get distinct values of the PIVOT Column with isnull
SELECT #PivotSelectColumnNames
= ISNULL(#PivotSelectColumnNames + ',','')
+ 'ISNULL(' + QUOTENAME(category) + ', 0) AS '
+ QUOTENAME(category)
FROM (SELECT DISTINCT category FROM test_table) AS cat
--Prepare the PIVOT query using the dynamic
SET #DynamicPivotQuery =
N'SELECT date, ' + #PivotSelectColumnNames + '
FROM test_table
pivot(sum(amount) for category in (' + #PivotColumnNames + ')) as pvt';
--Execute the Dynamic Pivot Query
EXEC sp_executesql #DynamicPivotQuery
OUTPUT :
A version of Taryn's answer with performance improvements:
Data
CREATE TABLE dbo.Temp
(
[date] datetime NOT NULL,
category nchar(3) NOT NULL,
amount money NOT NULL,
INDEX [CX dbo.Temp date] CLUSTERED ([date]),
INDEX [IX dbo.Temp category] NONCLUSTERED (category)
);
INSERT dbo.Temp
([date], category, amount)
VALUES
({D '2012-01-01'}, N'ABC', $1000.00),
({D '2012-01-02'}, N'DEF', $500.00),
({D '2012-01-02'}, N'GHI', $800.00),
({D '2012-02-10'}, N'DEF', $700.00),
({D '2012-03-01'}, N'ABC', $1100.00);
Dynamic pivot
DECLARE
#Delimiter nvarchar(4000) = N',',
#DelimiterLength bigint,
#Columns nvarchar(max),
#Query nvarchar(max);
SET #DelimiterLength = LEN(REPLACE(#Delimiter, SPACE(1), N'#'));
-- Before SQL Server 2017
SET #Columns =
STUFF
(
(
SELECT
[text()] = #Delimiter,
[text()] = QUOTENAME(T.category)
FROM dbo.Temp AS T
WHERE T.category IS NOT NULL
GROUP BY T.category
ORDER BY T.category
FOR XML PATH (''), TYPE
)
.value(N'text()[1]', N'nvarchar(max)'),
1, #DelimiterLength, SPACE(0)
);
-- Alternative for SQL Server 2017+ and database compatibility level 110+
SELECT #Columns =
STRING_AGG(CONVERT(nvarchar(max), QUOTENAME(T.category)), N',')
WITHIN GROUP (ORDER BY T.category)
FROM
(
SELECT T2.category
FROM dbo.Temp AS T2
WHERE T2.category IS NOT NULL
GROUP BY T2.category
) AS T;
IF #Columns IS NOT NULL
BEGIN
SET #Query =
N'SELECT [date], ' +
#Columns +
N'
FROM
(
SELECT [date], amount, category
FROM dbo.Temp
) AS S
PIVOT
(
MAX(amount)
FOR category IN (' +
#Columns +
N')
) AS P;';
EXECUTE sys.sp_executesql #Query;
END;
Execution plans
Results
date
ABC
DEF
GHI
2012-01-01 00:00:00.000
1000.00
NULL
NULL
2012-01-02 00:00:00.000
NULL
500.00
800.00
2012-02-10 00:00:00.000
NULL
700.00
NULL
2012-03-01 00:00:00.000
1100.00
NULL
NULL
CREATE TABLE #PivotExample(
[ID] [nvarchar](50) NULL,
[Description] [nvarchar](50) NULL,
[ClientId] [smallint] NOT NULL,
)
GO
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc1',1008)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc2',2000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc3',3000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc4',4000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI2','ACI2Desc1',5000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI2','ACI2Desc2',6000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI2','ACI2Desc3', 7000)
SELECT * FROM #PivotExample
--Declare necessary variables
DECLARE #SQLQuery AS NVARCHAR(MAX)
DECLARE #PivotColumns AS NVARCHAR(MAX)
--Get unique values of pivot column
SELECT #PivotColumns= COALESCE(#PivotColumns + ',','') + QUOTENAME([Description])
FROM (SELECT DISTINCT [Description] FROM [dbo].#PivotExample) AS PivotExample
--SELECT #PivotColumns
--Create the dynamic query with all the values for
--pivot column at runtime
SET #SQLQuery =
N' -- Your pivoted result comes here
SELECT ID, ' + #PivotColumns + '
FROM
(
-- Source table should in a inner query
SELECT ID,[Description],[ClientId]
FROM #PivotExample
)AS P
PIVOT
(
-- Select the values from derived table P
SUM(ClientId)
FOR [Description] IN (' + #PivotColumns + ')
)AS PVTTable'
--SELECT #SQLQuery
--Execute dynamic query
EXEC sp_executesql #SQLQuery
Drop table #PivotExample
Fully generic way that will work in non-traditional MS SQL environments (e.g. Azure Synapse Analytics Serverless SQL Pools) - it's in a SPROC but no need to use as such...
-- DROP PROCEDURE IF EXISTS
if object_id('dbo.usp_generic_pivot') is not null
DROP PROCEDURE dbo.usp_generic_pivot
GO;
CREATE PROCEDURE dbo.usp_generic_pivot (
#source NVARCHAR (100), -- table or view object name
#pivotCol NVARCHAR (100), -- the column to pivot
#pivotAggCol NVARCHAR (100), -- the column with the values for the pivot
#pivotAggFunc NVARCHAR (20), -- the aggregate function to apply to those values
#leadCols NVARCHAR (100) -- comma seprated list of other columns to keep and order by
)
AS
BEGIN
DECLARE #pivotedColumns NVARCHAR(MAX)
DECLARE #tsql NVARCHAR(MAX)
SET #tsql = CONCAT('SELECT #pivotedColumns = STRING_AGG(qname, '','') FROM (SELECT DISTINCT QUOTENAME(', #pivotCol,') AS qname FROM ',#source, ') AS qnames')
EXEC sp_executesql #tsql, N'#pivotedColumns nvarchar(max) out', #pivotedColumns out
SET #tsql = CONCAT ( 'SELECT ', #leadCols, ',', #pivotedColumns,' FROM ',' ( SELECT ',#leadCols,',',
#pivotAggCol,',', #pivotCol, ' FROM ', #source, ') as t ',
' PIVOT (', #pivotAggFunc, '(', #pivotAggCol, ')',' FOR ', #pivotCol,
' IN (', #pivotedColumns,')) as pvt ',' ORDER BY ', #leadCols)
EXEC (#tsql)
END
GO;
-- TEST EXAMPLE
EXEC dbo.usp_generic_pivot
#source = '[your_db].[dbo].[form_answers]',
#pivotCol = 'question',
#pivotAggCol = 'answer',
#pivotAggFunc = 'MAX',
#leadCols = 'candidate_id, candidate_name'
GO;

Order rows in a sequence and fill gaps for missing rows

I got a problem regarding missing rows in a table that is giving me a headache.
As base data, I have the following table:
declare #table table
(
id1 int,
id2 int,
ch char(1) not null,
val int
)
insert into #table values (1112, 121, 'A', 12)
insert into #table values (1351, 121, 'A', 13)
insert into #table values (1411, 121, 'B', 81)
insert into #table values (1312, 7, 'C', 107)
insert into #table values (1401, 2, 'A', 107)
insert into #table values (1454, 2, 'D', 107)
insert into #table values (1257, 6, 'A', 1)
insert into #table values (1269, 6, 'B', 12)
insert into #table values (1335, 6, 'C', 12)
insert into #table values (1341, 6, 'D', 5)
insert into #table values (1380, 6, 'A', 3)
The output should be ordered by id2 and follow a fixed sequence of ch, which should repeat until next id2 begins.
Sequence:
'A'
'B'
'C'
'D'
If the sequence or the pattern is interrupted, it should fill the missing rows with null, so that i get this result table:
id1 id2 ch val
----------------------------
1112 121 'A' 12
NULL 121 'B' NULL
NULL 121 'C' NULL
NULL 121 'D' NULL
1351 121 'A' 13
1411 121 'B' 81
NULL 121 'C' NULL
NULL 121 'D' NULL
NULL 7 'A' NULL
NULL 7 'B' NULL
1312 7 'C' 107
NULL 7 'D' NULL
1401 2 'A' 107
NULL 2 'B' NULL
NULL 2 'C' NULL
1454 2 'D' 107
and so on...
What I'm looking for is a way to do this without iterations.
I hope someone can help!
Thanks in advance!
A solution might be this:
declare #table table ( id1 int, id2 int, ch char(1) not null, val int )
insert into #table values (1112, 121, 'A', 12)
,(1351, 121, 'A', 13),(1411, 121, 'B', 81),(1312, 7, 'C', 107),(1401, 2, 'A', 107)
,(1454, 2, 'D', 107),(1257, 6, 'A', 1),(1269, 6, 'B', 12),(1335, 6, 'C', 12)
,(1341, 6, 'D', 5),(1380, 6, 'A', 3)
;with foo as
(select
*
,row_number() over (partition by id2 order by id1) rwn
,ascii(isnull(lag(ch,1) over (partition by id2 order by id1),'A'))-ascii('A') prev
,count(*) over (partition by id2,ch) nr
,ascii(ch)-ascii('A') cur
from #table
)
,bar as
(
select
*,case when cur<=prev and rwn>1 then 4 else 0 end + cur-prev step
from foo
)
,foobar as
(
select *,sum(step) over (partition by id2 order by id1 rows unbounded preceding) rownum
from bar
)
,iterations as
(
select id2,max(nr) nr from foo
group by id2
)
,blanks as
(
select
id2,ch chnr,char(ch+ascii('A') )ch,ROW_NUMBER() over (partition by id2 order by c.nr,ch)-1 rownum,c.nr
from iterations a
inner join (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) c(nr)
on c.nr<=a.nr
cross join (values (0),(1),(2),(3)) b(ch)
)
select
b.id1,a.id2,a.ch,b.val
from blanks a
left join foobar b
on a.id2=b.id2 and a.rownum=b.rownum
order by a.id2,a.rownum
I first make the query "foo" which looks at the row number and gets the previous value for ch for each id2.
"bar" then finds how many missing values there are between the rows. For instance If the previous was an A and the current is a c then there are 2. If the previous was an A and the current is an A, then there are 4!
"foobar" then adds the steps, thus numbering the original rows, where they should be in the final output.
"iterations" counts the number of times the "ABCD" rows should appear.
"BLANKS" then is all the final rows, that is for each id2, it outputs all the "ABCD" rows that should be in the final output, and numbers them in rownum
Finally I left join "foobar" with "BLANKS" on id2 and rownum. Thus we get the correct number of rows, and the places where there are values in the original is output.
If you can manage to add an extra column in your table, that defines which [id2] are part from the same sequence you can try this:
declare #table table
(
id1 int,
id2 int,
ch char(1) not null,
val int,
category int -- extra column
)
insert into #table values (1112, 121, 'A', 12, 1)
insert into #table values (1351, 121, 'A', 13, 2)
insert into #table values (1411, 121, 'B', 81, 2)
insert into #table values (1312, 7, 'C', 107, 3)
insert into #table values (1401, 2, 'A', 107, 4)
insert into #table values (1454, 2, 'D', 107, 4)
insert into #table values (1257, 6, 'A', 1, 5)
insert into #table values (1269, 6, 'B', 12, 5)
insert into #table values (1335, 6, 'C', 12, 5)
insert into #table values (1341, 6, 'D', 5, 5)
insert into #table values (1380, 6, 'A', 3, 5)
DECLARE #sequence table (seq varchar(1))
INSERT INTO #sequence values ('A'), ('B'), ('C'), ('D')
SELECT b.id1, a.id2, a.seq, b.val, a.category
INTO #T1
FROM (
SELECT *
FROM #table
CROSS JOIN #sequence
) A
LEFT JOIN (
SELECT * FROM #table
) B
ON 1=1
AND a.id1 = b.id1
AND a.id2 = b.id2
AND a.seq = b.ch
AND a.val = b.val
;WITH rem_duplicates AS (
SELECT *, dup = ROW_NUMBER() OVER (PARTITION by id2, seq, category ORDER BY id1 DESC)
FROM #T1
) DELETE FROM rem_duplicates WHERE dup > 1
SELECT * FROM #T1 ORDER BY id2 DESC, category ASC, seq ASC
DROP TABLE #T1
I'm little confused by your output, try this:
Update
DECLARE #table TABLE
(
row INT IDENTITY(1, 1) ,
id1 INT ,
id2 INT ,
ch CHAR(1) NOT NULL ,
val INT
);
DECLARE #Sequence TABLE ( ch3 CHAR(1) NOT NULL );
INSERT INTO #Sequence
VALUES ( 'A' );
INSERT INTO #Sequence
VALUES ( 'B' );
INSERT INTO #Sequence
VALUES ( 'C' );
INSERT INTO #Sequence
VALUES ( 'D' );
INSERT INTO #table
VALUES ( 1112, 121, 'A', 12 );
INSERT INTO #table
VALUES ( 1351, 121, 'A', 13 );
INSERT INTO #table
VALUES ( 1411, 121, 'B', 81 );
INSERT INTO #table
VALUES ( 1312, 7, 'C', 107 );
INSERT INTO #table
VALUES ( 1401, 2, 'A', 107 );
INSERT INTO #table
VALUES ( 1454, 2, 'D', 107 );
INSERT INTO #table
VALUES ( 1257, 6, 'A', 1 );
INSERT INTO #table
VALUES ( 1269, 6, 'B', 12 );
INSERT INTO #table
VALUES ( 1335, 6, 'C', 12 );
INSERT INTO #table
VALUES ( 1341, 6, 'D', 5 );
INSERT INTO #table
VALUES ( 1380, 6, 'A', 3 );
SELECT r.id1 ,
fin.id2 ,
ch3 ,
r.val
FROM ( SELECT *
FROM ( SELECT CASE WHEN r.chd - l.chd = 1 THEN 0
ELSE 1
END [gap in sq] ,
l.*
FROM ( SELECT id2 ,
ASCII(ch) chd ,
ch ,
val ,
id1 ,
row
FROM #table
) AS l
LEFT JOIN ( SELECT id2 ,
ASCII(ch) chd ,
row
FROM #table
) AS r ON l.row = r.row - 1
) AS temp ,
#Sequence s
WHERE temp.[gap in sq] = 1
OR ( temp.[gap in sq] = 0
AND s.ch3 = temp.ch
)
) AS fin
LEFT JOIN #table r ON r.id2 = fin.id2
AND r.id1 = fin.id1
AND r.ch = fin.ch3

Select Max Date in either case statement or where clause

I have 2 columns 1) id (int) and 2) Date. I want to select id on max date. Grouping results in returning both id's/more than one id. Instead i only want to retrieve id on max date .
I am sure there is easier way to do this however below should work fine.
-- create sample data
create table #temp(ID int, courseID int, end_date datetime)
go
insert into #temp
select 1 , 11 , getdate()
union
select 1, 12, getdate()-20
union
select 1, 13, getdate()-40
union
select 2, 13, getdate()-70
union
select 2, 14, getdate()-80
-- create temp table to calculate correct date
select id, max(end_date) as correctDate
into #temp2
from #temp
group by id
-- final desired outup
select #temp2.id , #temp.courseID
from #temp2
inner join #temp
on #temp2.id = #temp.id
and #temp2.correctDate = #temp.end_date
-- drop temp tables
drop table #temp
drop table #temp2
give me a shout if you have any questions
Simpler alternative
-- create sample data
create table #temp(id int, courseID int, end_date datetime)
go
insert into #temp
select
1 , 11 , getdate()
union
select
1, 12, getdate()-20
union
select
1, 13, getdate()-40
union
select
2, 13, getdate()-70
union
select
2, 14, getdate()-80
SELECT * FROM(
SELECT DENSE_RANK() OVER(PARTITION BY id ORDER BY end_date DESC ) sira, id,courseID,end_date FROM #temp
) t WHERE sira = 1
-- drop temp tables
drop table #temp
drop table #temp2

TSQL Pivoting Issue - looking for better approach

This is a T-SQL related question. I am using SQL Server 2012.
I have a table like this:
I would like to have output like this:
Explanation:
For each employee, there will be a row. An employee has one or more assignments. Batch Id specifies this. Based on the batch Id, the column names will change (e.g. Country 1, Country 2 etc.).
Approach so far:
Un-pivot the source table like the following:
select
EmpId, 'Country ' + cast(BatchId as varchar) as [ColumnName],
Country as [ColumnValue]
from
SourceTable
UNION
select
EmpId, 'Pass ' + cast(BatchId as varchar) as [ColumnName],
Pass as [ColumnValue]
from
SourceTable
which gives each column's values as rows. Then, this result can be pivoted to get the desired output.
Questions:
Is there a better way of doing this?
At the moment, I know there will be fixed amount of batches, but, for future, if I like to make the pivoting part dynamic, what is the best approach?
Using tools like SSIS or SSRS, is it easier to handle the pivot dynamically?
Screw doing it in SQL.
Let SSRS do the work for you with a MATRIX. It will PIVOT for you without having to create dynamic SQL to handle the terrible limitation of needing to know all the columns.
For your data, you would have EMP ID as the ROW Group and PASS as your column grouping.
https://msdn.microsoft.com/en-us/library/dd207149.aspx
There are many possible solutions to achieve what you want (search for Dynamic Pivot on multiple columns)
SqlFiddleDemo
Warning: I assume that columns Country and Pass are NOT NULL
CREATE TABLE SourceTable(EmpId INT, BatchId INT,
Country NVARCHAR(100) NOT NULL, Pass NVARCHAR(5) NOT NULL);
INSERT INTO SourceTable(EmpId, BatchId, Country, Pass)
VALUES
(100, 1, 'UK', 'M'), (200, 2, 'USA', 'U'),
(100, 2, 'Romania', 'M'), (100, 3, 'India', 'MA'),
(100, 4, 'Hongkong', 'MA'), (300, 1, 'Belgium', 'U'),
(300, 2, 'Poland', 'U'), (200, 1, 'Australia', 'M');
/* Get Number of Columns Groups Country1..Country<MaxCount> */
DECLARE #max_count INT
,#sql NVARCHAR(MAX) = ''
,#columns NVARCHAR(MAX) = ''
,#i INT = 0
,#i_s NVARCHAR(10);
WITH cte AS
(
SELECT EmpId
,[cnt] = COUNT(*)
FROM SourceTable
GROUP BY EmpId
)
SELECT #max_count = MAX(cnt)
FROM cte;
WHILE #i < #max_count
BEGIN
SET #i += 1;
SET #i_s = CAST(#i AS NVARCHAR(10));
SET #columns += N',MAX(CASE WHEN [row_no] = ' + #i_s + ' THEN Country END) AS Country' + #i_s +
',MAX(CASE WHEN [row_no] = ' + #i_s + ' THEN Pass END) AS Pass' + #i_s;
END
SELECT #sql =
N';WITH cte AS (
SELECT EmpId, Country, Pass, [row_no] = ROW_NUMBER() OVER (PARTITION BY EmpId ORDER BY BatchId)
FROM SourceTable)
SELECT EmpId ' + #columns + N'
FROM cte
GROUP BY EmpId';
/* Debug */
/* SELECT #sql */
EXEC(#sql);
Or:
SQLFiddleDemo2
DECLARE #cols NVARCHAR(MAX),
#sql NVARCHAR(MAX) = '';
;WITH cte(col_name, rn) AS(
SELECT DISTINCT col_name = col_name + CAST(BatchId AS VARCHAR(10)),
rn = ROW_NUMBER() OVER(PARTITION BY EmpId ORDER BY BatchId)
FROM SourceTable
CROSS APPLY (VALUES ('Country', Country), ('Pass', Pass)) AS c(col_name, val)
)
SELECT #cols = STUFF((SELECT ',' + QUOTENAME(col_name)
FROM cte
ORDER BY rn /* If column order is important for you */
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
, 1, 1, '');
SET #sql =
N';WITH cte AS
(
SELECT EmpId, col_name = col_name + CAST(BatchId AS VARCHAR(10)), val
FROM SourceTable
CROSS APPLY (VALUES (''Country'', Country), (''Pass'', Pass)) AS c(col_name, val)
)
SELECT *
FROM cte
PIVOT
(
MAX(val)
FOR col_name IN (' + #cols + ')
) piv';
EXEC(#sql);

T-SQL Average of Table Subset

I have a table (in SQL Server 2005) of daily weather data for a single location which includes these columns:
LogDate DATETIME
HighTemp INT
Temp6MonthHighAverage INT
LogDate and HighTemp have data. HighTemp6MonthAverage will be populated with, as the name suggests, the average high temperature for the 6 months ending in LogDate.
There are similar requirements for LowTemp, as well as humidity and several other items, for data spanning decades.
I find myself thinking in circles. Can I derive this average for each row in an UPDATE statement using set operations, or do I need to implement a solution with cursors? I will appreciate any suggestions.
-- select
select HighTemp, LogDate,(select AVG(HighTemp)
from tbl where
DATEDIFF(MONTH, LogDate, t1.LogDate) between 0 and 6)
from tbl t1
-- update
update t1 set Temp6MonthHighAverage = (select AVG(HighTemp)
from tbl where
DATEDIFF(MONTH, LogDate, t1.LogDate) between 0 and 6)
from tbl t1
You can certainly do this with a simple UPDATE:
UPDATE table SET Temp6MonthHighAverage =
(SELECT AVG(HighTemp) FROM table t2 WHERE
t2.LogDate <= table.LogDate
AND t2.LogDate > DATEADD(m, -6, table.LogDate)
)
To avoid re-calculating constantly (since the past will not change), just add a WHERE Temp6MonthHighAverage IS NULL at the end and the same UPDATE can be run as needed to fill in the gaps as new dates are added.
Have a look at something like this
DECLARE #Table TABLE(
LogDate DATETIME,
HighTemp INT,
Temp6MonthHighAverage INT
)
INSERT INTO #Table SELECT '01 Jan 2000', 15, NULL
INSERT INTO #Table SELECT '01 May 2000', 14, NULL
INSERT INTO #Table SELECT '01 Jun 2000', 13, NULL
INSERT INTO #Table SELECT '01 Jul 2000', 12, NULL
INSERT INTO #Table SELECT '01 Dec 2000', 17, NULL
SELECT *
FROM #Table
;WITH DistinctDates AS (
SELECT DATEADD(month,-6,LogDate) StartDate,
LogDate EndDate,
HighTemp
FROM #Table
)
, Aggregates AS (
SELECT dd.EndDate LogDate,
dd.HighTemp,
MAX(t.HighTemp) Temp6MonthHighAverage
FROM DistinctDates dd LEFT JOIN
#Table t ON t.LogDate BETWEEN dd.StartDate AND dd.EndDate
GROUP BY dd.EndDate,
dd.HighTemp
)
UPDATE #Table
SET Temp6MonthHighAverage = a.Temp6MonthHighAverage
FROM #Table t INNER JOIN
Aggregates a ON t.LogDate = a.LogDate
SELECT *
FROM #Table