Group nested objects array by property - mongodb
I saw a low of question this type suggesting 'Unwind' & 'Group' stages, But it did not managed to get what i want yet.
After quite long aggregation pipeline i almost got what i need. If required ill post my entire initial documents structure and pipeline but I don`t sure if its necessary, It prettily big.
So i got up untill this step:
$group:
{
{
_id:
{
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params"
},
count:{$sum:1},
Steps:
{
$push:
{
FPGAVersion:"$FPGAVersion",
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
Id:"$Id"
}
}
}
}
Which reproduce document like that:
_id:{
Frequency:"XXX"
LowLimit:"220.11"
HighLimit:"285.89"
Units:""
Params:""
count:16
}
Steps:
[
{
FPGAVersion:"XXX"
Result:"232.00"
PassFail:" "
KeyIndex:"2305202109411720D27255"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.15.583"
Id:60fd61a2d8034825203ac424
},
{
FPGAVersion:"XXX"
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
FPGAVersion:"XXX"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
{
FPGAVersion:"YYY"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
{
FPGAVersion:"YYY"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
What i need is the main grouping will still be the same, But i want the inner array to also have grouping by FPGAVersion. Like that:
_id:{
Frequency:"XXX"
LowLimit:"220.11"
HighLimit:"285.89"
Units:""
Params:""
count:16
}
Steps:
[
{
FPGAVersion:"XXX"
Steps:
[
{
Result:"232.00"
PassFail:" "
KeyIndex:"2305202109411720D27255"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.15.583"
Id:60fd61a2d8034825203ac424
},
{
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
},
{
FPGAVersion:"YYY"
Steps:
[
{
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
},
]
EDIT
A complete document example (some unnecessary data removed )
{
"_id":{
"$oid":"60fd4cdbbbbc873d1c831df4"
},
"Index":0,
"KeyIndex":"25042021173703bb12345678",
"SerialNumber":"XXX",
"UnitName":"Unit A",
"CatalogNumber":"XXX",
"StartWorkDate":{
"$date":"2021-04-25T17:37:03.000Z"
},
"FinishWorkDate":{
"$date":"2021-04-25T17:44:14.000Z"
},
"WorkTime":"00:07:10",
"TotalTests":4,
"FailedTestsCount":0,
"PassedTestsCount":4,
"Status":"PASS",
"AppVersion":"XXX",
"FPGAVersion":"XXX",
"Tests":[
{
"TestName":"Test A",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Duration":"00:00:01",
"TotalSteps":9,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.00",
"HighLimit":"0.45",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.40",
"HighLimit":"1.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.01",
"HighLimit":"0.20",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.01",
"HighLimit":"1.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.10",
"HighLimit":"0.40",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.50",
"HighLimit":"1.70",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.30",
"HighLimit":"2.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.00",
"HighLimit":"1.10",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0",
"HighLimit":"0.04",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message.."
},
{
"Type":"Info",
"Message":"Log Message.."
},
]
},
{
"TestName":"Test B",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:38:20.000Z"
},
"Duration":"00:00:00",
"TotalSteps":1,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:38:20.000Z"
},
"Frequency":" ",
"LowLimit":"AC",
"HighLimit":"AC",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"FPGA Version"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message..."
},
{
"Type":"Info",
"Message":"Log Message..."
}
]
},
{
"TestName":"Test C",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:39:27.000Z"
},
"Duration":"00:01:44",
"TotalSteps":4,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:39:27.000Z"
},
"Frequency":"XXX",
"LowLimit":"69.00",
"HighLimit":"89.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 6500"
},
{
"TestDate":{
"$date":"2021-04-25T14:39:57.000Z"
},
"Frequency":"XXX",
"LowLimit":"89.00",
"HighLimit":"109.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2B00"
},
{
"TestDate":{
"$date":"2021-04-25T14:40:36.000Z"
},
"Frequency":"XXX",
"LowLimit":"394.00",
"HighLimit":"414.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value B500"
},
{
"TestDate":{
"$date":"2021-04-25T14:41:11.000Z"
},
"Frequency":"XXX",
"LowLimit":"699.00",
"HighLimit":"719.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value B370"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message"
},
{
"Type":"Info",
"Message":"Log Message"
}
]
},
{
"TestName":"Test D",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:41:55.000Z"
},
"Duration":"00:01:42",
"TotalSteps":6,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:41:55.000Z"
},
"Frequency":"XXX",
"LowLimit":"107.00",
"HighLimit":"127.00",
"Units":" ",
"Result":"0",
"PassFail":" ",
"Params":"Value 5100"
},
{
"TestDate":{
"$date":"2021-04-25T14:42:36.000Z"
},
"Frequency":"XXX",
"LowLimit":"227.00",
"HighLimit":"247.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 4800"
},
{
"TestDate":{
"$date":"2021-04-25T14:42:47.000Z"
},
"Frequency":"XXX",
"LowLimit":"282.00",
"HighLimit":"302.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2000"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:05.000Z"
},
"Frequency":"XXX",
"LowLimit":"462.00",
"HighLimit":"482.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2D00"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:19.000Z"
},
"Frequency":"XXX",
"LowLimit":"517.00",
"HighLimit":"537.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 1570"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:37.000Z"
},
"Frequency":"XXX",
"LowLimit":"697.00",
"HighLimit":"717.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2500"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message..."
},
{
"Type":"Info",
"Message":"Log Message..."
}
]
}
]
}
The complete pipline so far (An example document output is shown before edit):
[{$match: {
$and:[
{StartWorkDate:{ $gte:ISODate("2019-02-04T11:15:15.000+00:00")}},
{FinishWorkDate:{ $lte:ISODate("2022-04-05T14:15:15.000+00:00")}},
{UnitName:{$eq:"XXX"}},
{Component:{$eq:"TYYY"}},
{FPGAVersion:{$in:["XXX",
"YYY",
"ZZZ"]}},
{"Tests.TestName":{$eq:"Test A"} }
]
}}, {$unset: "Tests.Logs"}, {$project: {
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AtsVersion:"$AppVersion",
FPGAVersion:"$FPGAVersion",
Tests:
{
$filter:
{
input: '$Tests',
as: 'test',
cond: {$eq: ['$$test.TestName', 'Test A']}
}
}
}}, {$project: {
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AtsVersion:"$AtsVersion",
Id:"$_id",
FPGAVersion:"$FPGAVersion",
Steps:{
$reduce:{
input:"$Tests.Steps",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}}, {$unwind: {
path: "$Steps"
}}, {$project: {
FPGAVersion:1,
Id:1,
KeyIndex:1,
SerialNumber:1,
CatalogNumber:1,
AtsVersion:1,
Frequency:"$Steps.Frequency",
LowLimit:"$Steps.LowLimit",
HighLimit:"$Steps.HighLimit",
TestDate:"$Steps.TestDate",
Params:"$Steps.Params",
Units:"$Steps.Units",
Result:"$Steps.Result",
PassFail:"$Steps.PassFail",
}}, {$group: {
_id:
{
FPGAVersion:"$FPGAVersion",
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params"
},
count:{$sum:1},
Steps:
{
$push:
{
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
FPGAVersion:"$FPGAVersion",
Id:"$Id"
}
}
}}, {$project: {
_id:0,
FPGAVersion:"$_id.FPGAVersion",
Frequency:"$_id.Frequency",
LowLimit:"$_id.LowLimit",
HighLimit:"$_id.HighLimit",
Units:"$_id.Units",
Params:"$_id.Params",
Steps:"$Steps",
"Total":
{
$size:"$Steps"
},
Passed:
{
$size:
{
$filter:
{
input:"$Steps.PassFail",
as:"res",
cond:
{
$eq:["$$res"," "]
}
}
}
},
Failed:
{
$size:
{
$filter:
{
input:"$Steps.PassFail",
as:"res",
cond:
{
$eq:["$$res","*"]
}
}
}
}
}}]
add FPGAVersion field in group by in $group stage
second $group stage group by your required fields and construct the array of FPGAVersion and Steps fields
{
$group: {
_id: {
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params",
FPGAVersion:"$FPGAVersion"
},
count: { $sum: 1 },
Steps: {
$push: {
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
Id:"$Id"
}
}
}
},
{
$group: {
_id: {
Frequency:"$_id.Frequency",
LowLimit:"$_id.LowLimit",
HighLimit:"$_id.HighLimit",
Units:"$_id.Units",
Params:"$_id.Params"
},
count: { $sum: "$count" },
Steps: {
$push: {
FPGAVersion: "$_id.FPGAVersion",
Steps: "$Steps"
}
}
}
}
Related
MongoDB aggregate error (MongoError: BSONObj size: 20726581 (0x13C4335) is invalid.)
I got the below error while I execute the MongoDB aggregate function. It is not working even I set user allowDiskUse to true. Currently, I am using MongoDB Atlas M10. May I have a solution, pls? MongoError: BSONObj size: 20726581 (0x13C4335) is invalid. Size must be between 0 and 16793600(16MB) First element: _id: "ecba-d187-0635-84c1-db59" The data info is like this: COLLECTION SIZE: 21.45MB TOTAL DOCUMENTS: 41051 INDEXES TOTAL SIZE: 560KB I am using the code below. router.get("/load_agg", async (req, res) => { if (req.query.projectName) { var dbName = req.query.projectName; var appId = req.query.appId; var viewName = req.query.viewName; var model = ItemGroup(dbName, appId + "_" + viewName); await model.aggregate([ { $match: { parent: 0 } }, { $graphLookup: { from: appId + "_" + viewName + "s", startWith: "$id", connectFromField: "id", connectToField: "parent", depthField: "level", as: "data" } }, { $unset: [ "data._id", "data.createdAt", "data.updatedAt", "data.updateBy" ] }, { $unwind: { path: "$data", preserveNullAndEmptyArrays: true } }, { $sort: { "data.level": -1 } }, { $group: { _id: "$id", parent: { $first: "$parent" }, value: { $first: "$value" }, type: { $first: "$type" }, data: { $push: "$data" } } }, { $addFields: { data: { $reduce: { input: "$data", initialValue: { level: -1, presentData: [], prevData: [] }, in: { $let: { vars: { prev: { $cond: [ { $eq: [ "$$value.level", "$$this.level" ] }, "$$value.prevData", "$$value.presentData" ] }, current: { $cond: [ { $eq: [ "$$value.level", "$$this.level" ] }, "$$value.presentData", [] ] } }, in: { level: "$$this.level", prevData: "$$prev", presentData: { $concatArrays: [ "$$current", [ { $mergeObjects: [ "$$this", { data: { $filter: { input: "$$prev", as: "e", cond: { $eq: [ "$$e.parent", "$$this.id" ] } } } } ] } ] ] } } } } } } } }, { $addFields: { data: "$data.presentData" } } ]).allowDiskUse(true).then(data => { if (data) { res.send({ code: 200, message: "Successful!", data: data }); } else { res.send({ code: 500, message: "Invalid Info!" }); } }).catch(function (error) { console.log(error); // Failure res.send({ code: 500, message: "Data loading fail!" }); }); } else { res.send({ code: 500, message: "Invalid Info!" }); } }); Atlas Screen Shoot: https://i.stack.imgur.com/AFtzP.png
List of objects to nested objects using aggregate
I'm having the following documents in a collection [ { "category":"category1", "type":"type1", "item":"item1", "name":"testname", "settings":{ "enable":"true", "mode":"1" }, "status":"active" }, { "category":"category1", "type":"type1", "item":"item1", "name":"testname2", "settings":{ "enable":"true", "mode":"1" }, "status":"inactive" }, { "category":"category1", "type":"type1", "item":"item2", "name":"testname3", "settings":{ "enable":"true", "mode":"1" }, "status":"active" }, { "category":"category2", "type":"type2", "item":"item3", "name":"testname4", "settings":{ "enable":"true", "mode":"1" }, "status":"active" }, { "category":"category3", "type":"type4", "item":"item5", "name":"testname5", "settings":{ "enable":"true", "mode":"1" }, "status":"active" } ] I want to convert this into nested objects with four level. Expecting output like below { "category1":{ "type1":{ "item1":{ "active":[ { "name":"testname", "settings":{ "enable":"true", "mode":"1" } } ], "inactive":[ { "name":"testname2", "settings":{ "enable":"true", "mode":"1" } } ] }, "item2":{ "active":[ { "name":"testname3", "settings":{ "enable":"true", "mode":"1" } } ] } } }, "category2":{ "type2":{ "item3":{ "active":[ { "name":"testname4", "settings":{ "enable":"true", "mode":"1" } } ] } } }, "category3":{ "type4":{ "item4":{ "active":[ { "name":"testname5", "settings":{ "enable":"true", "mode":"1" } } ] } } } } I'm able to convert into two levels with aggregate and replaceRoot option. Unable to convert into four levels after replacing the root. How to achieve this output using mongo aggregate>
I'm not sure exactly how your pipeline looks as you didn't include it, however here is how I would do it by using $arrayToObject, $group and $replaceRoot to manipulate the structure into the required format: db.collection.aggregate([ { $group: { _id: { category: "$category", type: "$type", item: "$item", status: "$status" }, data: { $push: { name: "$name", settings: "$settings" }, } } }, { $group: { _id: { category: "$_id.category", type: "$_id.type", item: "$_id.item" }, statusData: { $push: { data: "$data", status: "$_id.status" } } } }, { $replaceRoot: { newRoot: { "$mergeObjects": [ { _id: "$_id" }, { data: { "$arrayToObject": { $map: { input: "$statusData", as: "datum", in: { k: "$$datum.status", v: "$$datum.data" } } } } } ] } } }, { $sort: { "_id.item": 1 } }, { $group: { _id: { category: "$_id.category", type: "$_id.type" }, items: { $push: { "$arrayToObject": [ [ { k: "$_id.item", v: "$data" } ] ] } } } }, { $sort: { "_id.type": 1 } }, { $replaceRoot: { newRoot: { "$arrayToObject": [ [ { k: "$_id.category", v: { "$arrayToObject": [ [ { k: "$_id.type", v: { "$mergeObjects": "$items" } } ] ] } } ] ] } } } ]) Mongo Playground
Mongoose aggregate with nested collection
I have following schemas. First: AdditionalFieldSchema const AdditionalFieldSchema = new Schema({ names: [ { type: Schema.Types.ObjectId, ref: "multiLanguageContent", required: true } ], ... }); Second: MultiLanguageContentSchema const MultiLanguageContentSchema = new Schema({ value: { type: String, required: true }, ... }); and I have the following mongoose aggregate query. The goal is to fetch every employees with their additionalField attached. const employees = await Employee.aggregate([ { $match: { status: "active", $nor: [ { email: "blablabla" }, { email: "blobloblo" } ] } }, { $lookup: { from: AdditionalField.collection.name, let: { af_id: "$_id" }, pipeline: [{ $match: { $expr: { $and: [ { $eq: ["$ownership", "$$af_id"] }, { $eq: ["$status", "active"] }, { $eq: ["$functionalType", "blablabla"] } ] } } }], as: "afs" } }, { $unwind: "$afs" }, { $unwind: "$afs.names" }, { $group: { _id: "$_id", email: { $first: "$email" }, afs: { $push: { value: "$afs.value", names: "$afs.names" } } } } ]); The query I run to test this aggregate function. query TestQuery { testQuery { email afs { names { value } value } } } The result I have. "data": { "testQuery": [ { "email": "blablabla#test.com" "afs": [ { "names": [ { "value": "Name 1" } ], "value": "Value 1" }, { "names": [ { "value": "Name 2" } ], "value": "Value 2" }, ... ] }, ... Result is good, I have data I want. But I would like to have a result like this below "data": { "testQuery": [ { "email": "blablabla#test.com", "afs": [ { "names": "Name 1", "value": "Value 1" }, { "names": "Name 2", "value": "Value 2" }, ... ] }, ... It seems like { $unwind: "$afs.names" }, is not working. Any ideas ? Thanks, Flo
Parent child query in mongodb
I have collection with this data : [ { _id: "id1", parentId: null, text: "text 1" }, { _id: "id2", parentId: null, text: "text 2" }, { _id: "id3", parentId: null, text: "text 3" }, { _id: "id4", parentId: "id1", text: "text 4" }, { _id: "id5", parentId: "id1", text: "text 5" }, { _id: "id6", parentId: "id2", text: "text 6" }, { _id: "id7", parentId: "id5", text: "text 7" } ] I want every child to come after its parent like this : [ { _id: "id1", parentId: null, text: "text 1" }, { _id: "id4", parentId: "id1", text: "text 4" }, { _id: "id5", parentId: "id1", text: "text 5" }, { _id: "id7", parentId: "id5", text: "text 7" }, { _id: "id2", parentId: null, text: "text 2" }, { _id: "id6", parentId: "id2", text: "text 6" }, { _id: "id3", parentId: null, text: "text 3" } ] but I didn't find any query for this in mongodb . Is that possible ? I use this schema for store comments and I need to sort like that. for sql query I found this video on youtube: https://www.youtube.com/watch?v=yA-YqKBNyNc
Here is a solution using aggregate framework : db.test.aggregate([{ $addFields: { parentIdId: { $concat: [{ $ifNull: [{ $toString: "$parentId" }, ""] }, { $toString: "$_id" }] } } }, { $sort: { parentIdId: 1 } }, { $project: { parentIdId: 0 } }]) The result : {"_id":"id1","parentId":null,"text":"text 1","parentIdId":"id1"} {"_id":"id4","parentId":"id1","text":"text 4","parentIdId":"id1id4"} {"_id":"id5","parentId":"id1","text":"text 5","parentIdId":"id1id5"} {"_id":"id2","parentId":null,"text":"text 2","parentIdId":"id2"} {"_id":"id6","parentId":"id2","text":"text 6","parentIdId":"id2id6"} {"_id":"id3","parentId":null,"text":"text 3","parentIdId":"id3"} {"_id":"id7","parentId":"id5","text":"text 7","parentIdId":"id5id7"} In this solution, I create a new field in your query result that is the concatenation of parentId and Id and, then I sort on it. If you want to remove this technical field, you can add this pipeline stage : {$project: {parentIdId: 0}} Your final query would be : db.test.aggregate([{ $addFields: { parentIdId: { $concat: [{ $ifNull: [{ $toString: "$parentId" }, ""] }, { $toString: "$_id" }] } } }, { $sort: { parentIdId: 1 } }, { $project: { parentIdId: 0 } }])
How could I remove the duplicated items(complex object) from array
In each document, the records is an array containing many duplicated objects. and in buy_items there are also containing many duplicated items. How could I clean the duplicated items ? Original documents: { "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records": [ { "DATE": new Date("1996-02-08T08:00:00+0800"), "buy_items": [ "5210 ", "5210 ", "5210 " ] }, { "DATE": new Date("1996-02-08T08:00:00+0800"), "buy_items": [ "5210 ", "5210 ", "5210 " ] } { "DATE": new Date("2012-12-08T08:00:00+0800"), "buy_items": [ "5210 ", "1234 ", " " ] } ] } Expected Output: { "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records": [ { "DATE": new Date("1996-02-08T08:00:00+0800"), "buy_items": [ "5210 " ] }, { "DATE": new Date("2012-12-08T08:00:00+0800"), "buy_items": [ "5210 ", "1234 ", " " ] } ] } With Michaels solution, the output might looks like this { "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records": [ "date": new Date("1996-02-08T08:00:00+0800"), "buy_items": [ "5210 " "1234 ", " " ] ] }
You can remove duplicated objects using the aggregation framework db.collection.aggregate( [ { $unwind: "$records" }, { $unwind: "$records.buy_items" }, { $group: { "_id": {id: "$_id", date: "$records.DATE" }, buy_items: { $addToSet: "$records.buy_items" }}}, { $group: {"_id": "$_id.id", records: { $push: {"date": "$_id.date", "buy_items": "$buy_items" }}}}, { $sort: { "records.0.date": 1 }} , { $out: "collection" } ] ) The $out operator let you write your aggregation result in specified collection or Replace you existing collection. Even better using "Bulk" operations var bulk = bulk = db.collection.initializeOrderedBulkOp(), count = 0; db.collection.aggregate([ { "$unwind": "$records" }, { "$project": { "date": "$records.DATE", "buy_items": { "$setIntersection": "$records.buy_items" } }}, { "$unwind": "$buy_items" }, { "$group": { "_id": { "id": "$_id", "date": "$date" }, "buy_items": { "$addToSet": "$buy_items" } }}, { "$group": { "_id": "$_id.id", "records": { "$push": { "date": "$_id.date", "buy_items": "$buy_items" }} }} ]).forEach(function(doc) { bulk.find({"_id": doc._id}).updateOne({ "$set": { "records": doc.records } }); count++; if (count % 500 == 0) { bulk.execute(); bulk = db.collection.initializeOrderedBulkOp(); } }) if (count % 500 != 0) bulk.execute(); Result: { "_id" : "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records" : [ { "date" : ISODate("2012-12-08T00:00:00Z"), "buy_items" : [ " ", "1234 ", "5210 " ] }, { "date" : ISODate("1996-02-08T00:00:00Z"), "buy_items" : [ "5210 " ] } ] }
If you want to update your current collections without creating new collection and drop previous collection. I tried this but doing this you should run two different update commands. First update records with distinct like this : db.collectionName.update({},{"$set":{"records":db.collectionName.distinct('records')}}) and second update for buy_items with distinct like this : db.collectionName.update({},{"$set":{"records.0.buy_items":db.collectionName.distinct('records.buy_items')}}) If you want to avoid two update query then follow Michael answer .
You could try using the forEach() method of the find() cursor to iterate over each document properties, check for uniqueness and filter distinct values as follows: db.collection.find().forEach(function(doc){ var records = [], seen = {}; doc.records.forEach(function (item){ var uniqueBuyItems = item["buy_items"].filter(function(i, pos) { return item["buy_items"].indexOf(i) == pos; }); item["buy_items"] = uniqueBuyItems; if (JSON.stringify(item["buy_items"]) !== JSON.stringify(seen["buy_items"])) { records.push(item); seen["buy_items"] = item["buy_items"]; } }); doc.records = records; db.collection.save(doc); })