Postgres Jsonb aggregation - postgresql

I am trying to achieve following (Result Required) output from POSTGRES jsonb columns, but not getting desired result using "jsonb_agg" function.
I went through this postgres document https://www.postgresql.org/docs/12/functions-json.html, but no luck here.
Also am not that good in json data in postgres, so please suggest good resource for json formatting related stuff for postgres.
City
JColA
JColB
NY
[{"id":"ID1","name":"ID1_NAME","type":"ID1_TYPE","amount":20.12,"full_name":null},{"id":"ID2","name":"ID2_NAME","type":"ID2_TYPE","amount":11.55,"full_name":null},{"id":"ID1","name":"ID1_NAME","type":"ID1_TYPE","amount":5.45,"full_name":null}]
[{"key":"key1","value":"1"},{"key":"key2","value":"2"},{"key":"key3","value":"3"}]
DC
[{"id":"ID1","name":"ID1_NAME","type":"ID1_TYPE","amount":1.5,"full_name":null},{"id":"ID3","name":"ID3_NAME","type":"ID3_TYPE","amount":1.2,"full_name":null},{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":1,"full_name":null}]
[{"key":"key1","value":"1"},{"key":"key1","value":"2"},{"key":"key1","value":"3"}]
DL
[{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":1.5,"full_name":null},{"id":"ID2","name":"ID2_NAME","type":"ID2_TYPE","amount":1.2,"full_name":null},{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":1,"full_name":null}]
[{"key":"key1","value":"2"},{"key":"key2","value":"2"},{"key":"key3","value":"4"}]
NY
[{"id":"ID1","name":"ID1_NAME","type":"ID1_TYPE","amount":4.5,"full_name":null},{"id":"ID2","name":"ID2_NAME","type":"ID2_TYPE","amount":2.2,"full_name":null},{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":6,"full_name":null}]
[{"key":"key4","value":"2"},{"key":"key2","value":"5"},{"key":"key2","value":"4"}]
DC
[{"id":"ID3","name":"ID3_NAME","type":"ID3_TYPE","amount":2.5,"full_name":null},{"id":"ID3","name":"ID3_NAME","type":"ID3_TYPE","amount":2.2,"full_name":null},{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":2,"full_name":null}]
[{"key":"key1","value":"2"},{"key":"key2","value":"2"},{"key":"key3","value":"4"}]
Required Result
City
AggJSonColA
AggJsonColB
NY
[{"id":"ID1","name":"ID1_NAME","type":"ID1_TYPE","amount":30.07,"full_name":null},{"id":"ID2","name":"ID2_NAME","type":"ID2_TYPE","amount":13.75,"full_name":null},{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":6,"full_name":null}]
[{"key":"key1","value":"1"},{"key":"key2","value":"11"},{"key":"key3","value":"3"}, {"key":"key4","value":"2"}]
DC
[{"id":"ID1","name":"ID1_NAME","type":"ID1_TYPE","amount":1.5,"full_name":null},{"id":"ID3","name":"ID3_NAME","type":"ID3_TYPE","amount":5.9,"full_name":null},{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":3,"full_name":null}]
[{"key":"key1","value":"8"},{"key":"key2","value":"2"},{"key":"key3","value":"4"}]
DL
[{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":1.5,"full_name":null},{"id":"ID2","name":"ID2_NAME","type":"ID2_TYPE","amount":1.2,"full_name":null},{"id":"ID4","name":"ID4_NAME","type":"ID4_TYPE","amount":1,"full_name":null}]
[{"key":"key1","value":"2"},{"key":"key2","value":"2"},{"key":"key3","value":"4"}]

You need to break out the arrays with jsonb_to_recordset, rebuild the objects using jsonb_build_object, and aggregate them back up with jsonb_agg
SELECT
A.City,
A.JColA,
B.JColB
FROM (
SELECT
City,
jsonb_agg(JColA) AS JColA
FROM (
SELECT
t.City,
json_build_object(
'id', id,
'name', name,
'type', type,
'amount', SUM(amount),
'full_name', full_name
) AS JColA
FROM YourTable t,
LATERAL jsonb_to_recordset(t.JColA)
AS arr(id varchar(10), name varchar(100), type varchar(100), amount decimal (18,2), full_name varchar(100))
GROUP BY
t.City, arr.id, arr.name, arr.type, arr.full_name
) A
GROUP BY
City
) A
JOIN (
SELECT
City,
jsonb_agg(JColB) AS JColB
FROM (
SELECT
t.City,
json_build_object(
'key', "key",
'value', SUM(value)
) AS JColB
FROM YourTable t,
LATERAL jsonb_to_recordset(t.JColB)
AS arr("key" varchar(10), value int)
GROUP BY
t.City, arr."key"
) B
GROUP BY
City
) B ON B.City = A.City;
I feel it's easier to requery the original table again, however if you want to avoid that, you could first aggregate all the arrays together by City, break them back out and re-aggregate.
SELECT
t.City,
(
SELECT
jsonb_agg(JColA)
FROM (
SELECT
json_build_object(
'id', id,
'name', name,
'type', type,
'amount', SUM(amount),
'full_name', full_name
) AS JColA
FROM jsonb_array_elements(t.JColA) AS outerArr,
LATERAL jsonb_to_recordset(outerArr)
AS arr(id varchar(10), name varchar(100), type varchar(100), amount decimal (18,2), full_name varchar(100))
GROUP BY
arr.id, arr.name, arr.type, arr.full_name
) A
) AS JColA,
(
SELECT
jsonb_agg(JColB)
FROM (
SELECT
json_build_object(
'key', "key",
'value', SUM(arr.value)
) AS JColB
FROM jsonb_array_elements(t.JColB) AS outerArr,
LATERAL jsonb_to_recordset(outerArr)
AS arr("key" varchar(10), value int)
GROUP BY
arr."key"
) B
) AS JColB
FROM (
SELECT
t.City,
jsonb_agg(JColA) AS JColA,
jsonb_agg(JColB) AS JColB
FROM YourTable t
GROUP BY
t.City
) t;
db<>fiddle

Related

SQL query with SUM, ORDER BY, with limited values

I am having a troubles to get expected result.
I have successfully joined 2 tables (database is SQL Server), however I would like to have something more.
Excel - it's table which contains list of PartNumbers(GBC) with corresponding Quantity of this part needed for build.
I'm joining this Excel with my Inventory database to retrieve information about what I have, what is missing, what I need to purchase.
Current query:
string sqlCheck = #"SELECT e.GBC, e.Replaced, e.Description, Barcode, Location, Bookstand, Quantity, Buildneed, p.Quantity - e.Buildneed as Afterbuild FROM Parts p Right JOIN Excel e ON e.GBC = p.GBC ORDER BY GBC ASC, Quantity DESC";
Results as below image:
It is needed to use ALL duplicated GBC but in specific order.
First I need to take GBC(86911) with Quantity = 100, this should result in Afterbuild = 0.
But in column Buildneed I see that I need 768 in total, so next step would be to take GBC(86911) with quantity = 500, this should result in Afterbuild 0, and in this line I would like to see new column called Totals which will be equal to -168 (which will mean I need to buy 168 pcs of this part).
I can have many same parts with different quantities
I always want to start from lowest quantity on particular duplicated GBC
This should not show me more GBC if Buildneed value will be exceeded
Expected output as on below image:
I have added column 'UseInOrder' - it is not neccessery but would be awesome if its possible also, it will point me that I will need to use all parts from each in that order.
As suggested, below table definitions:
CREATE TABLE [dbo].[Excel] (
[GBC] INT NULL,
[Description] VARCHAR (50) NULL,
[Buildneed] INT NULL,
[Replaced] VARCHAR (50) NULL
);
CREATE TABLE [dbo].[Parts] (
[Barcode] INT IDENTITY (201900001, 1) NOT NULL,
[GBC] INT NULL,
[Description] VARCHAR (50) NULL,
[Location] VARCHAR (50) NULL,
[Bookstand] VARCHAR (50) NULL,
[Value] VARCHAR (50) NULL,
[Quantity] INT NULL,
[MQuantity] INT NULL,
[Manufacturer1] VARCHAR (50) NULL,
[MPN1] VARCHAR (50) NULL,
[Manufacturer2] VARCHAR (50) NULL,
[MPN2] VARCHAR (50) NULL,
[Manufacturer3] VARCHAR (50) NULL,
[MPN3] VARCHAR (50) NULL,
CONSTRAINT [PK_Parts] PRIMARY KEY CLUSTERED ([Barcode] ASC)
);
EDIT Sample data
declare #tblParts table(
GBC int,
Barcode varchar(256),
[Location] varchar(256),
Quantity int
)
declare #tblPartsUsed table(
GBC int,
Replaced varchar(1) default '',
[Description] varchar(50),
Buildneed int
)
insert into #tblParts (GBC,[Description], Barcode, [Location], Quantity)
select 86911, 'CAP_CER,10nF,0603,10%,100V,X7R' ,201901200, 'JD-01/  14' ,500
union all
select 86911, 'CAP_CER,10nF,0603,10%,100V,X7R' ,201901166, 'ESB-03' ,100
union all
select 99529, 'DIO_ZENR,5,6V,2%,MM3Z5V6ST1G,SOD323' ,201901024, 'ESB-01' ,100
union all
select 128082, 'CAP_CER,100nF,0603,10%,50V,X7R, poly' ,201901120, 'JD-01/  3' ,500
union all
select 128082, 'CAP_CER,100nF,0603,10%,50V,X7R, poly' ,201901121, 'JD-01/  3' ,500
union all
select 168078, 'CAP_CER,470nF,0805,10%,50V,X7R' ,201901207, 'JD-01/  19' ,170
union all
select 168078, 'CAP_CER,470nF,0805,10%,50V,X7R' ,201901152, 'ESB-03' ,140
union all
select 196881, 'BJT,C,SMBT3946DW1T1G,SOT363' ,201901085, 'ESB-02' ,100
union all
select 199296, 'BJT_DIG,C,SMUN5311DW1T1G,SOT363' ,201901083, 'ESB-02' ,100
union all
select 207735, 'DIO_LED, NFSA123DT' ,201902132, 'KRK' ,10
insert into #tblPartsUsed(GBC, [Description], Buildneed)
select 71744, 'RES_TF,10k,0402,1%,0,1W,100PPM/C' ,192
union all
select 71746, 'RES_TF,10k,0603,1%,0,1W,100PPM/C' ,168
union all
select 76527, 'CAP_CER,10nF,0402,10%,50V,X7R' ,288
union all
select 86911, 'CAP_CER,10nF,0603,10%,100V,X7R' ,1464
union all
select 92854, 'RES_TF,30k,0603,1%,0,1W,100PPM/C' ,72
union all
select 93018, 'RES_TF,68k,0603,1%,0,1W,100PPM/C' ,72
union all
select 95241, 'RES_TF,2k2,0402,1%,0,1W,100PPM/C' ,192
union all
select 95549, 'RES_TF,47k,0603,1%,0,1W,100PPM/C' ,72
union all
select 99529, 'DIO_ZENR,5,6V,2%,MM3Z5V6ST1G,SOD323' ,72
union all
select 112117, 'RES_TF,2k2,0603,1%,0,1W,100PPM/C' ,96
union all
select 126486, 'RES_TF,0R,0603' ,24
union all
select 128082, 'CAP_CER,100nF,0603,10%,50V,X7R, poly' ,72
union all
select 168078, 'CAP_CER,470nF,0805,10%,50V,X7R' ,72
union all
select 196200, 'BJT_DIG,N,PDTC114EU,SOT323' ,72
union all
select 196881, 'BJT,C,SMBT3946DW1T1G,SOT363' ,144
union all
select 199296, 'BJT_DIG,C,SMUN5311DW1T1G,SOT363' ,504
union all
select 199302, 'RES_TF,100R,0603,10%,0,1W,200PPM/C' ,72
union all
select 202047, 'UNI,N,PMPB215ENEA,DFN2020MD-6' ,72
union all
select 202054, 'DIO_LED,SPMWHT346EA3' ,648
union all
select 203509, 'CONN_HEADER,MOLEX,5023521100' ,24
union all
select 207735, 'DIO_LED, NFSA123DT' ,648
union all
select 207843, 'Thermistor,10k,0603,1%,NTC' ,24
union all
select 208252, 'FOOTPRINT_BOARD-IN,MOLEX,350220011' ,48
union all
select 212145, 'DIO_SIGN,200V,250mA,50nS,BAV21WSQ-7-F,SOD323' ,72
Based on this date, output should be like on below image of table:
Try this
EDIT2 (after change initial data)
SELECT distinct u.GBC,p2.Replaced, p2.Description, IIF(u.Afterbuild>0, MIN(p1.Barcode) OVER(PARTITION BY u.GBC), p1.Barcode) as Barcode, p1.Location, u.Quantity, u.Buildneed,
CASE
when (u.Total <0 AND p1.Barcode is NULL) OR (SUM(u.Quantity) OVER(PARTITION BY u.GBC) - u.Buildneed)>0 then u.Afterbuild
else 0
end
as Afterbuild,
CASE
when u.Total <0 AND p1.Barcode is NULL then ABS(u.Total)
when (SUM(u.Quantity) OVER(PARTITION BY u.GBC) - u.Buildneed)>0 then u.Total
else u.Total
END as Total FROM
(
SELECT distinct b.GBC, b.Quantity, b.Buildneed,
case
when b.Total <0 then null
when b.Total >=0 AND MIN(b.Quantity) OVER(PARTITION BY b.GBC) - b.Buildneed < 0 AND b.Quantity = MAX(b.Quantity) OVER(PARTITION BY b.GBC) then SUM(b.Quantity) OVER(PARTITION BY b.GBC) - b.Buildneed
when b.Total >=0 AND MIN(b.Quantity) OVER(PARTITION BY b.GBC) - b.Buildneed >= 0 AND b.Quantity = MIN(b.Quantity) OVER(PARTITION BY b.GBC) then MIN(b.Quantity) OVER(PARTITION BY b.GBC) - b.Buildneed
END AS Afterbuild,
case
when b.Total >=0 then null
when b.Total <0 AND b.RowNumber = MAX(b.RowNumber) OVER(PARTITION BY b.GBC) then b.Total
END AS Total
FROM
(
select r.GBC, r.Quantity, r.Buildneed,
(SUM(r.Quantity) OVER(PARTITION BY r.GBC)) - r.Buildneed as Total, ROW_NUMBER() OVER(order by r.GBC, r.Quantity) as RowNumber
from
(
SELECT e.GBC, ISNULL(Quantity,0) as Quantity, ISNULL(Buildneed,0) as Buildneed FROM #tblParts p RIGHT JOIN #tblPartsUsed e ON e.GBC = p.GBC
) as r
) as b
) as u
left join #tblParts as p1 on u.GBC = p1.GBC and ISNULL(u.Quantity,0) = ISNULL(p1.Quantity,0)
left join #tblPartsUsed as p2 on u.GBC = p2.GBC and ISNULL(u.Buildneed,0) = ISNULL(p2.Buildneed,0)
where (u.Afterbuild is not null or u.Total is not null or u.Quantity - u.Buildneed < 0)
order by u.GBC, u.Quantity
As per #RomaRuzich this question needs expected results in a table format.
Also the Parts and Excel table structure is needed with some data to clarify the question.
Made some assumptions and created script with output results.
declare #tblParts table(
GBC int,
Barcode varchar(20),
[Location] varchar(20),
Bookstand varchar(10) default '',
Quantity int
)
declare #tblPartsUsed table(
GBC int,
Replaced varchar(1) default '',
[Description] varchar(50),
Buildneed int
)
insert into #tblParts(GBC, Barcode, [Location], Quantity)
select 72223, '', '', 0
union all
select 86911, '201901200','JD-01/',500
union all
select 86911, '201901166','JD-01/ 14', 100
insert into #tblPartsUsed(GBC, [Description], Buildneed)
select '72223', 'RES_TF', 60
union all
select '86911', 'CAP_CER, 10nf,0603', 768
union all
select '86911', 'CAP_CER, 10nf,0603', 768
SELECT distinct e.GBC, e.Replaced, e.[Description], Barcode, [Location],
Bookstand, Quantity, Buildneed,
p.Quantity - e.Buildneed as Afterbuild,
x.TotalQuantity - Buildneed as Totals
FROM #tblParts p Right JOIN #tblPartsUsed e ON e.GBC = p.GBC
left join (select GBC, sum(Quantity) TotalQuantity from #tblParts group by GBC) x <br/>on e.GBC = x.GBC
ORDER BY e.GBC ASC, p.Quantity ASC

SQL Server split overlapping date ranges

I need to split date ranges that overlap. I have a primary table (I've called it Employment for this example), and I need to return all Begin-End date ranges for a person from this table. I also have multiple sub tables (represented by Car and Food), and I want to return the value that was active in the sub tables during the times given in the main tables. This will involve splitting the main table date ranges when a sub table item changes.
I don't want to return sub table information for dates not in the main tables.
DECLARE #Employment TABLE
( Person_ID INT, Employment VARCHAR(50), Begin_Date DATE, End_Date DATE )
DECLARE #Car TABLE
( Person_ID INT, Car VARCHAR(50), Begin_Date DATE, End_Date DATE )
DECLARE #Food TABLE
( Person_ID INT, Food VARCHAR(50), Begin_Date DATE, End_Date DATE )
INSERT INTO #Employment ( [Person_ID], [Employment], [Begin_Date], [End_Date] )
VALUES ( 123 , 'ACME' , '1986-01-01' , '1990-12-31' )
, ( 123 , 'Office Corp' , '1995-05-15' , '1998-10-03' )
, ( 123 , 'Job 3' , '1998-10-04' , '2999-12-31' )
INSERT INTO #Car ( [Person_ID] , [Car] , [Begin_Date] , [End_Date] )
VALUES ( 123, 'Red Car', '1986-05-01', '1997-06-23' )
, ( 123, 'Blue Car', '1997-07-03', '2999-12-31' )
INSERT INTO #Food ( [Person_ID], [Food], [Begin_Date], [End_Date] )
VALUES ( 123, 'Eggs', '1997-01-01', '1997-03-09' )
, ( 123, 'Donuts', '2001-02-23', '2001-02-25' )
For the above data, the results should be:
Person_ID Employment Food Car Begin_Date End_Date
123 ACME 1986-01-01 1986-04-30
123 ACME Red Car 1986-05-01 1990-12-31
123 Office Corp Red Car 1995-05-15 1996-12-31
123 Office Corp Eggs Red Car 1997-01-01 1997-03-09
123 Office Corp Red Car 1997-03-10 1997-06-23
123 Office Corp 1997-06-24 1997-07-02
123 Office Corp Blue Car 1997-07-03 1998-10-03
123 Job 3 Blue Car 1998-10-04 2001-02-22
123 Job 3 Donuts Blue Car 2001-02-23 2001-02-25
123 Job 3 Blue Car 2001-02-26 2999-12-31
The first row is his time working for ACME, where he didn't have a car or a weird food obsession. In the second row, he purchased a car, and still worked at ACME. In the third row, he changed jobs to Office Corp, but still has the Red Car. Note how we're not returning data during his unemployment gap, even though he had the Red Car. We only want to know what was in the Car and Food tables during the times there are values in the Employment table.
I found a solution for SQL Server 2012 that uses the LEAD/LAG functions to accomplish this, but I'm stuck with 2008 R2.
To change the 2012 solution from that blog to work with 2008, you need to replace the LEAD in the following
with
ValidDates as …
,
ValidDateRanges1 as
(
select EmployeeNo, Date as ValidFrom, lead(Date,1) over (partition by EmployeeNo order by Date) ValidTo
from ValidDates
)
There are a number of ways to do this, but one example is a self join to the same table + 1 row (which is effectively what a lead does). One way to do this is to put a rownumber on the previous table (so it is easy to find the next row) by adding another intermediate CTE (eg ValidDatesWithRowno). Then do a left outer join to that table where EmployeeNo is the same and rowno = rowno + 1, and use that value to replace the lead. If you wanted a lead 2, you would join to rowno + 2, etc. So the 2008 version would look something like
with
ValidDates as …
,
ValidDatesWithRowno as --This is the ValidDates + a RowNo for easy self joining below
(
select EmployeeNo, Date, ROW_NUMBER() OVER (ORDER BY EmployeeNo, Date) as RowNo from ValidDates
)
,
ValidDateRanges1 as
(
select VD.EmployeeNo, VD.Date as ValidFrom, VDLead1.Date as ValidTo
from ValidDatesWithRowno VD
left outer join ValidDatesWithRowno VDLead1 on VDLead1.EmployeeNo = VD.EmployeeNo
and VDLead1.RowNo = VD.RowNo + 1
)
The rest of the solution described looks like it will work like you want on 2008.
Here is the answer I came up with. It works, but it's not very pretty.
It goes it two waves, first splitting any overlapping Employment/Car dates, then running the same SQL a second time add the Food dates and split any overlaps again.
DECLARE #Employment TABLE
( Person_ID INT, Employment VARCHAR(50), Begin_Date DATE, End_Date DATE )
DECLARE #Car TABLE
( Person_ID INT, Car VARCHAR(50), Begin_Date DATE, End_Date DATE )
DECLARE #Food TABLE
( Person_ID INT, Food VARCHAR(50), Begin_Date DATE, End_Date DATE )
INSERT INTO #Employment ( [Person_ID], [Employment], [Begin_Date], [End_Date] )
VALUES ( 123 , 'ACME' , '1986-01-01' , '1990-12-31' )
, ( 123 , 'Office Corp' , '1995-05-15' , '1998-10-03' )
, ( 123 , 'Job 3' , '1998-10-04' , '2999-12-31' )
INSERT INTO #Car ( [Person_ID] , [Car] , [Begin_Date] , [End_Date] )
VALUES ( 123, 'Red Car', '1986-05-01', '1997-06-23' )
, ( 123, 'Blue Car', '1997-07-03', '2999-12-31' )
INSERT INTO #Food ( [Person_ID], [Food], [Begin_Date], [End_Date] )
VALUES ( 123, 'Eggs', '1997-01-01', '1997-03-09' )
, ( 123, 'Donuts', '2001-02-23', '2001-02-25' )
DECLARE #Person_ID INT = 123;
--A table to hold date ranges that need to be merged together
DECLARE #DatesToMerge TABLE
(
ID INT,
Person_ID INT,
Date_Type VARCHAR(10),
Begin_Date DATETIME,
End_Date DATETIME
)
INSERT INTO #DatesToMerge
SELECT ROW_NUMBER() OVER(ORDER BY [Car])
, Person_ID
, 'Car'
, Begin_Date
, End_Date
FROM #Car
WHERE Person_ID = #Person_ID
INSERT INTO #DatesToMerge
SELECT ROW_NUMBER() OVER(ORDER BY [Employment])
, Person_ID
, 'Employment'
, Begin_Date
, End_Date
FROM #Employment
WHERE Person_ID = #Person_ID;
--A table to hold the merged #Employment and Car records
DECLARE #EmploymentAndCar TABLE
(
RowNumber INT,
Person_ID INT,
Begin_Date DATETIME,
End_Date DATETIME
)
;
WITH CarCTE AS
(--This CTE grabs just the Car rows so we can compare and split dates from them
SELECT ID,
Person_ID,
Date_Type,
Begin_Date,
End_Date
FROM #DatesToMerge
WHERE Date_Type = 'Car'
),
NewRowsCTE AS
( --This CTE creates just new rows starting after the Car dates for each #Employment date range
SELECT a.ID,
a.Person_ID,
a.Date_Type,
DATEADD(DAY,1,b.End_Date) AS Begin_Date,
a.End_Date
FROM #DatesToMerge a
INNER JOIN CarCTE b
ON a.Begin_Date <= b.Begin_Date
AND a.End_Date > b.Begin_Date
AND a.End_Date > b.End_Date -- This is needed because if both the Car and #Employment end on the same date, there is split row after
),
UnionCTE AS
( -- This CTE merges the new rows with the existing ones
SELECT ID,
Person_ID,
Date_Type,
Begin_Date,
End_Date
FROM #DatesToMerge
UNION ALL
SELECT ID,
Person_ID,
Date_Type,
Begin_Date,
End_Date
FROM NewRowsCTE
),
FixEndDateCTE AS
(
SELECT CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.Begin_Date) AS FixID,
MIN(d.Begin_Date) AS Begin_Date
FROM UnionCTE c
LEFT OUTER JOIN CarCTE d
ON c.Begin_Date < d.Begin_Date
AND c.End_Date >= d.Begin_Date
WHERE c.Date_Type <> 'Car'
GROUP BY CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.Begin_Date)
),
Finalize AS
(
SELECT ROW_NUMBER() OVER (ORDER BY e.Begin_Date) AS RowNumber,
e.Person_ID,
e.Begin_Date,
CASE WHEN f.Begin_Date IS NULL THEN e.End_Date
ELSE DATEADD (DAY,-1,f.Begin_Date)
END AS EndDate
FROM UnionCTE e
LEFT OUTER JOIN FixEndDateCTE f
ON (CONVERT (CHAR,e.ID)+CONVERT (CHAR,e.Begin_Date)) = f.FixID
)
INSERT INTO #EmploymentAndCar ( RowNumber, Person_ID, Begin_Date, End_Date )
SELECT F.RowNumber
, F.Person_ID
, F.Begin_Date
, F.EndDate
FROM Finalize F
INNER JOIN #Employment Employment
ON F.Begin_Date BETWEEN Employment.Begin_Date AND Employment.End_Date AND Employment.Person_ID = #Person_ID
ORDER BY F.Begin_Date
--------------------------------------------------------------------------------------------------
--Now that the Employment and Car dates have been merged, empty the DatesToMerge table
DELETE FROM #DatesToMerge;
--Reload the DatesToMerge table with the newly-merged Employment and Car records,
--and the Food records that still need to be merged
INSERT INTO #DatesToMerge
SELECT RowNumber
, Person_ID
, 'PtBCar'
, Begin_Date
, End_Date
FROM #EmploymentAndCar
WHERE Person_ID = #Person_ID
INSERT INTO #DatesToMerge
SELECT ROW_NUMBER() OVER(ORDER BY [Food])
, Person_ID
, 'Food'
, Begin_Date
, End_Date
FROM #Food
WHERE Person_ID = #Person_ID
;
WITH CarCTE AS
(--This CTE grabs just the Food rows so we can compare and split dates from them
SELECT ID,
Person_ID,
Date_Type,
Begin_Date,
End_Date
FROM #DatesToMerge
WHERE Date_Type = 'Food'
),
NewRowsCTE AS
( --This CTE creates just new rows starting after the Food dates for each Employment date range
SELECT a.ID,
a.Person_ID,
a.Date_Type,
DATEADD(DAY,1,b.End_Date) AS Begin_Date,
a.End_Date
FROM #DatesToMerge a
INNER JOIN CarCTE b
ON a.Begin_Date <= b.Begin_Date
AND a.End_Date > b.Begin_Date
AND a.End_Date > b.End_Date -- This is needed because if both the Food and Car/Employment end on the same date, there is split row after
),
UnionCTE AS
( -- This CTE merges the new rows with the existing ones
SELECT ID,
Person_ID,
Date_Type,
Begin_Date,
End_Date
FROM #DatesToMerge
UNION ALL
SELECT ID,
Person_ID,
Date_Type,
Begin_Date,
End_Date
FROM NewRowsCTE
),
FixEndDateCTE AS
(
SELECT CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.Begin_Date) AS FixID,
MIN(d.Begin_Date) AS Begin_Date
FROM UnionCTE c
LEFT OUTER JOIN CarCTE d
ON c.Begin_Date < d.Begin_Date
AND c.End_Date >= d.Begin_Date
WHERE c.Date_Type <> 'Food'
GROUP BY CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.Begin_Date)
),
Finalize AS
(
SELECT ROW_NUMBER() OVER (ORDER BY e.Begin_Date) AS RowNumber,
e.Person_ID,
e.Begin_Date,
CASE WHEN f.Begin_Date IS NULL THEN e.End_Date
ELSE DATEADD (DAY,-1,f.Begin_Date)
END AS EndDate
FROM UnionCTE e
LEFT OUTER JOIN FixEndDateCTE f
ON (CONVERT (CHAR,e.ID)+CONVERT (CHAR,e.Begin_Date)) = f.FixID
)
SELECT DISTINCT
F.Person_ID
, Employment
, Car
, Food
, F.Begin_Date
, F.EndDate
FROM Finalize F
INNER JOIN #Employment Employment
ON F.Begin_Date BETWEEN Employment.Begin_Date AND Employment.End_Date AND Employment.Person_ID = #Person_ID
LEFT JOIN #Car Car
ON Car.[Begin_Date] <= F.Begin_Date
AND Car.[End_Date] >= F.[EndDate]
AND Car.Person_ID = #Person_ID
LEFT JOIN #Food Food
ON Food.[Begin_Date] <= F.[Begin_Date]
AND Food.[End_Date] >= F.[EndDate]
AND Food.Person_ID = #Person_ID
ORDER BY F.Begin_Date
If anyone has a more elegant solution, I will be happy to accept their answer.

derived column in Order by in partition

Using Sql Server 2008 R2.
Where there is more than 1 row of type demographic change, I need to delete all but 1 per person, but the types of demographic changes are weighted, with some more important than others. I don't know what the data will hold but if a more important one exists for a particular Contact, I want it to rise to the top.
I tried:
;WITH cte AS
(
SELECT lastname, firstname, FieldChanged,
Case 'FieldChanged'
When 'firstname' then 0
When 'lastname' then 0
When 'ssn' then 1
When 'xyz' then 5
End as "Weight"
, ROW_NUMBER() OVER (PARTITION BY D2.ContactId, D2.ContractId ORDER BY weight asc) AS demorow
FROM MyDATA d2
where d2.FieldChanged in ('firstname', 'lastname', 'ssn', 'xyz')
)
SELECT *
FROM cte
WHERE demorow > 1
This gives me an error: Invalid column name 'weight'.
I think I can't use APPLY since there's no unique key in the source table, which is not under my control.
Update:
CREATE TABLE dbo.MyTempTable
(firstname varchar(25) NOT NULL,
lastname varchar(25) NOT NULL,
FieldChanged varchar(25),
ContactId uniqueidentifier,
ContractId uniqueidentifier
)
GO
Insert into dbo.mytemptable
(firstname ,
lastname ,
FieldChanged ,
ContactId ,
ContractId)
Values
('john', 'smith', 'ssn', '688CB150-C7FD-E511-8709-00155D070201', '688CB150-C7FD-E511-8709-00155D070202')
, ('john', 'smith', 'xyz', '688CB150-C7FD-E511-8709-00155D070201', '688CB150-C7FD-E511-8709-00155D070202')
, ('mary', 'doe', 'xyz', '688CB150-C7FD-E511-8709-00155D070203', '688CB150-C7FD-E511-8709-00155D070202')
, ('mary', 'doe', 'firstname', '688CB150-C7FD-E511-8709-00155D070203', '688CB150-C7FD-E511-8709-00155D070202')
, ('mary', 'doe', 'lastname', '688CB150-C7FD-E511-8709-00155D070203', '688CB150-C7FD-E511-8709-00155D070202')
, ('mary', 'doe', 'ssn', '688CB150-C7FD-E511-8709-00155D070203', '688CB150-C7FD-E511-8709-00155D070202')
For this data I'd want John Smith's and Mary Doe's respective xyz rows to be selected, as less important than their name change rows.
Update 2:
I think this works:
;WITH cte AS
(
SELECT lastname, firstname, FieldChanged,
Case FieldChanged
When 'firstname' then 0
When 'lastname' then 0
When 'ssn' then 5
When 'xyz' then 1
else 9
End as "Weight",
ContactId, ContractID
FROM edi..MyDATA d2
where d2.FieldChanged in ('firstname', 'lastname', 'ce_ssn', 'Policy Number')
),
cte2 As
(
SELECT *
, ROW_NUMBER() OVER (PARTITION BY ContactId, ContractId ORDER BY weight asc) AS demorow
FROM cte
)
SELECT *
FROM cte2
WHERE demorow > 1
Column aliases are assigned after all of the other clauses of a SELECT expression are executed (except for ORDER BY clauses, but not ORDER BY expressions), so you cannot use them within the same SELECT expression, only outside of them (or in an ORDER BY clause).
Here's a quick fix:
;WITH cte AS
(
SELECT lastname, firstname, FieldChanged,
Case FieldChanged
When 'firstname' then 0
When 'lastname' then 0
When 'ssn' then 1
When 'xyz' then 5
End as "Weight",
ContactId, ContractID
FROM MyDATA d2
where d2.FieldChanged in ('firstname', 'lastname', 'ssn', 'xyz')
),
cte2 As
(
SELECT *
, ROW_NUMBER() OVER (PARTITION BY ContactId, ContractId ORDER BY weight asc) AS demorow
FROM cte
)
SELECT *
FROM cte2
WHERE demorow > 1
replace "weight" in the order by with the full CASE statement. Or put the main query (without order by) in a sub query and the row number in the outer query. YOu should then be able to access the "weight" column in order by.

PostgreSQL grouping

I would like to group values according to values in over columns.
This is an example:
I would like to get the output:
{{-30,-50,20},{-20,30,60},{-30,NULL or other value, 20}}
I managed to arrive to:
SELECT array_agg("val")
FROM my_table
WHERE "t_id" = 1
GROUP BY "m_id";
{{-30,-50,20},{-20,30,60},{-30,20}}
What would be the best approach?
create table my_table (
t_id int,
m_id int,
s_id int,
val int
);
insert into my_table (t_id, m_id, s_id, val) values
(1,1,1,-30),
(1,1,2,-50),
(1,1,3,20),
(1,2,1,-20),
(1,2,2,30),
(1,2,3,60),
(1,3,1,-30),
(1,3,3,20);
select array_agg(val order by s_id)
from
my_table t
right join
(
(
select distinct t_id, m_id
from my_table
) a
cross join
(
select distinct s_id
from my_table
) b
) s using (t_id, m_id, s_id)
where t_id = 1
group by m_id
order by m_id
;
array_agg
---------------
{-30,-50,20}
{-20,30,60}
{-30,NULL,20}

How to use Common Table Expression with parameters?

I have a stored procedure with 2 CTEs. The second CTE has a parameter
WITH path_sequences
AS
(
),
WITH categories
AS
(
... WHERE CategoryId = #CategoryId
// I dont know how to get this initial parameter inside the CTE
)
SELECT * FROM path_sequences p
JOIN categories c
ON p.CategoryId = c.CategoryId
The initial parameter that I need to get inside the second TCE is p.CategoryId. How do I do that without having to create another stored procedure to contain the second CTE?
Thanks for helping
You can create table valued function
create function ftCategories
(
#CategoryID int
)
returns table
as return
with categories as (
... WHERE CategoryId = #CategoryId
)
select Col1, Col2 ...
from categories
and use it as
SELECT *
FROM path_sequences p
cross apply ftCategories(p.CategoryId) c
I have created simple query using your code. You can use it like -
DECLARE #CategoryId INT
SET #CategoryId = 1
;WITH path_sequences
AS
(
SELECT 1 CategoryId
),
categories
AS
(
SELECT 1 CategoryId WHERE 1 = #CategoryId
)
SELECT * FROM path_sequences p
JOIN categories c
ON p.CategoryId = c.CategoryId
This syntax is for External Aliases:
-- CTES With External Aliases:
WITH Sales_CTE (SalesPersonID, SalesOrderID, SalesYear)
AS
-- Define the CTE query.
(
SELECT SalesPersonID, SalesOrderID, YEAR(OrderDate) AS SalesYear
FROM Sales.SalesOrderHeader
WHERE SalesPersonID IS NOT NULL
)
The only way to add parameters is to use scope variables like so:
--Declare a variable:
DECLARE #category INT
WITH
MyCTE1 (exName1, exName2)
AS
(
SELECT <SELECT LIST>
FROM <TABLE LIST>
--Use the variable as 'a parameter'
WHERE CategoryId = #CategoryId
)
First remove the second WITH, separate each cte with just a comma. Next you can add parameters like this:
DECLARE #category INT; -- <~~ Parameter outside of CTEs
WITH
MyCTE1 (col1, col2) -- <~~ were poorly named param1 and param2 previously
AS
(
SELECT blah blah
FROM blah
WHERE CategoryId = #CategoryId
),
MyCTE2 (col1, col2) -- <~~ were poorly named param1 and param2 previously
AS
(
)
SELECT *
FROM MyCTE2
INNER JOIN MyCTE1 ON ...etc....
EDIT (and CLARIFICATION):
I have renamed the columns from param1 and param2 to col1 and col2 (which is what I meant originally).
My example assumes that each SELECT has exactly two columns. The columns are optional if you want to return all of the columns from the underlying query AND those names are unique. If you have more or less columns than what is being SELECTed you will need to specify names.
Here is another example:
Table:
CREATE TABLE Employee
(
Id INT NOT NULL IDENTITY PRIMARY KEY CLUSTERED,
FirstName VARCHAR(50) NOT NULL,
LastName VARCHAR(50) NOT NULL,
ManagerId INT NULL
)
Fill table with some rows:
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Donald', 'Duck', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Micky', 'Mouse', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Daisy', 'Duck', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Fred', 'Flintstone', 5)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Darth', 'Vader', null)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Bugs', 'Bunny', null)
INSERT INTO Employee
(FirstName, LastName, ManagerId)
VALUES
('Daffy', 'Duck', null)
CTEs:
DECLARE #ManagerId INT = 5;
WITH
MyCTE1 (col1, col2, col3, col4)
AS
(
SELECT *
FROM Employee e
WHERE 1=1
AND e.Id = #ManagerId
),
MyCTE2 (colx, coly, colz, cola)
AS
(
SELECT e.*
FROM Employee e
INNER JOIN MyCTE1 mgr ON mgr.col1 = e.ManagerId
WHERE 1=1
)
SELECT
empsWithMgrs.colx,
empsWithMgrs.coly,
empsWithMgrs.colz,
empsWithMgrs.cola
FROM MyCTE2 empsWithMgrs
Notice in the CTEs the columns are being aliased. MyCTE1 exposes columns as col1, col2, col3, col4 and MyCTE2 references MyCTE1.col1 when it references it. Notice the final select uses MyCTE2's column names.
Results:
For anyone still struggling with this, the only thing you need to is terminate your declaration of variables with a semicolon before the CTE. Nothing else is required.
DECLARE #test AS INT = 42;
WITH x
AS (SELECT #test AS 'Column')
SELECT *
FROM x
Results:
Column
-----------
42
(1 row affected)