Group nested objects array by property - mongodb

I saw a low of question this type suggesting 'Unwind' & 'Group' stages, But it did not managed to get what i want yet.
After quite long aggregation pipeline i almost got what i need. If required ill post my entire initial documents structure and pipeline but I don`t sure if its necessary, It prettily big.
So i got up untill this step:
$group:
{
{
_id:
{
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params"
},
count:{$sum:1},
Steps:
{
$push:
{
FPGAVersion:"$FPGAVersion",
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
Id:"$Id"
}
}
}
}
Which reproduce document like that:
_id:{
Frequency:"XXX"
LowLimit:"220.11"
HighLimit:"285.89"
Units:""
Params:""
count:16
}
Steps:
[
{
FPGAVersion:"XXX"
Result:"232.00"
PassFail:" "
KeyIndex:"2305202109411720D27255"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.15.583"
Id:60fd61a2d8034825203ac424
},
{
FPGAVersion:"XXX"
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
FPGAVersion:"XXX"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
{
FPGAVersion:"YYY"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
{
FPGAVersion:"YYY"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
What i need is the main grouping will still be the same, But i want the inner array to also have grouping by FPGAVersion. Like that:
_id:{
Frequency:"XXX"
LowLimit:"220.11"
HighLimit:"285.89"
Units:""
Params:""
count:16
}
Steps:
[
{
FPGAVersion:"XXX"
Steps:
[
{
Result:"232.00"
PassFail:" "
KeyIndex:"2305202109411720D27255"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.15.583"
Id:60fd61a2d8034825203ac424
},
{
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
},
{
FPGAVersion:"YYY"
Steps:
[
{
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
},
]
EDIT
A complete document example (some unnecessary data removed )
{
"_id":{
"$oid":"60fd4cdbbbbc873d1c831df4"
},
"Index":0,
"KeyIndex":"25042021173703bb12345678",
"SerialNumber":"XXX",
"UnitName":"Unit A",
"CatalogNumber":"XXX",
"StartWorkDate":{
"$date":"2021-04-25T17:37:03.000Z"
},
"FinishWorkDate":{
"$date":"2021-04-25T17:44:14.000Z"
},
"WorkTime":"00:07:10",
"TotalTests":4,
"FailedTestsCount":0,
"PassedTestsCount":4,
"Status":"PASS",
"AppVersion":"XXX",
"FPGAVersion":"XXX",
"Tests":[
{
"TestName":"Test A",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Duration":"00:00:01",
"TotalSteps":9,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.00",
"HighLimit":"0.45",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.40",
"HighLimit":"1.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.01",
"HighLimit":"0.20",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.01",
"HighLimit":"1.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.10",
"HighLimit":"0.40",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.50",
"HighLimit":"1.70",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.30",
"HighLimit":"2.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.00",
"HighLimit":"1.10",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0",
"HighLimit":"0.04",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message.."
},
{
"Type":"Info",
"Message":"Log Message.."
},
]
},
{
"TestName":"Test B",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:38:20.000Z"
},
"Duration":"00:00:00",
"TotalSteps":1,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:38:20.000Z"
},
"Frequency":" ",
"LowLimit":"AC",
"HighLimit":"AC",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"FPGA Version"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message..."
},
{
"Type":"Info",
"Message":"Log Message..."
}
]
},
{
"TestName":"Test C",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:39:27.000Z"
},
"Duration":"00:01:44",
"TotalSteps":4,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:39:27.000Z"
},
"Frequency":"XXX",
"LowLimit":"69.00",
"HighLimit":"89.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 6500"
},
{
"TestDate":{
"$date":"2021-04-25T14:39:57.000Z"
},
"Frequency":"XXX",
"LowLimit":"89.00",
"HighLimit":"109.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2B00"
},
{
"TestDate":{
"$date":"2021-04-25T14:40:36.000Z"
},
"Frequency":"XXX",
"LowLimit":"394.00",
"HighLimit":"414.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value B500"
},
{
"TestDate":{
"$date":"2021-04-25T14:41:11.000Z"
},
"Frequency":"XXX",
"LowLimit":"699.00",
"HighLimit":"719.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value B370"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message"
},
{
"Type":"Info",
"Message":"Log Message"
}
]
},
{
"TestName":"Test D",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:41:55.000Z"
},
"Duration":"00:01:42",
"TotalSteps":6,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:41:55.000Z"
},
"Frequency":"XXX",
"LowLimit":"107.00",
"HighLimit":"127.00",
"Units":" ",
"Result":"0",
"PassFail":" ",
"Params":"Value 5100"
},
{
"TestDate":{
"$date":"2021-04-25T14:42:36.000Z"
},
"Frequency":"XXX",
"LowLimit":"227.00",
"HighLimit":"247.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 4800"
},
{
"TestDate":{
"$date":"2021-04-25T14:42:47.000Z"
},
"Frequency":"XXX",
"LowLimit":"282.00",
"HighLimit":"302.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2000"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:05.000Z"
},
"Frequency":"XXX",
"LowLimit":"462.00",
"HighLimit":"482.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2D00"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:19.000Z"
},
"Frequency":"XXX",
"LowLimit":"517.00",
"HighLimit":"537.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 1570"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:37.000Z"
},
"Frequency":"XXX",
"LowLimit":"697.00",
"HighLimit":"717.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2500"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message..."
},
{
"Type":"Info",
"Message":"Log Message..."
}
]
}
]
}
The complete pipline so far (An example document output is shown before edit):
[{$match: {
$and:[
{StartWorkDate:{ $gte:ISODate("2019-02-04T11:15:15.000+00:00")}},
{FinishWorkDate:{ $lte:ISODate("2022-04-05T14:15:15.000+00:00")}},
{UnitName:{$eq:"XXX"}},
{Component:{$eq:"TYYY"}},
{FPGAVersion:{$in:["XXX",
"YYY",
"ZZZ"]}},
{"Tests.TestName":{$eq:"Test A"} }
]
}}, {$unset: "Tests.Logs"}, {$project: {
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AtsVersion:"$AppVersion",
FPGAVersion:"$FPGAVersion",
Tests:
{
$filter:
{
input: '$Tests',
as: 'test',
cond: {$eq: ['$$test.TestName', 'Test A']}
}
}
}}, {$project: {
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AtsVersion:"$AtsVersion",
Id:"$_id",
FPGAVersion:"$FPGAVersion",
Steps:{
$reduce:{
input:"$Tests.Steps",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}}, {$unwind: {
path: "$Steps"
}}, {$project: {
FPGAVersion:1,
Id:1,
KeyIndex:1,
SerialNumber:1,
CatalogNumber:1,
AtsVersion:1,
Frequency:"$Steps.Frequency",
LowLimit:"$Steps.LowLimit",
HighLimit:"$Steps.HighLimit",
TestDate:"$Steps.TestDate",
Params:"$Steps.Params",
Units:"$Steps.Units",
Result:"$Steps.Result",
PassFail:"$Steps.PassFail",
}}, {$group: {
_id:
{
FPGAVersion:"$FPGAVersion",
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params"
},
count:{$sum:1},
Steps:
{
$push:
{
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
FPGAVersion:"$FPGAVersion",
Id:"$Id"
}
}
}}, {$project: {
_id:0,
FPGAVersion:"$_id.FPGAVersion",
Frequency:"$_id.Frequency",
LowLimit:"$_id.LowLimit",
HighLimit:"$_id.HighLimit",
Units:"$_id.Units",
Params:"$_id.Params",
Steps:"$Steps",
"Total":
{
$size:"$Steps"
},
Passed:
{
$size:
{
$filter:
{
input:"$Steps.PassFail",
as:"res",
cond:
{
$eq:["$$res"," "]
}
}
}
},
Failed:
{
$size:
{
$filter:
{
input:"$Steps.PassFail",
as:"res",
cond:
{
$eq:["$$res","*"]
}
}
}
}
}}]

add FPGAVersion field in group by in $group stage
second $group stage group by your required fields and construct the array of FPGAVersion and Steps fields
{
$group: {
_id: {
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params",
FPGAVersion:"$FPGAVersion"
},
count: { $sum: 1 },
Steps: {
$push: {
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
Id:"$Id"
}
}
}
},
{
$group: {
_id: {
Frequency:"$_id.Frequency",
LowLimit:"$_id.LowLimit",
HighLimit:"$_id.HighLimit",
Units:"$_id.Units",
Params:"$_id.Params"
},
count: { $sum: "$count" },
Steps: {
$push: {
FPGAVersion: "$_id.FPGAVersion",
Steps: "$Steps"
}
}
}
}

Related

MongoDB aggregate error (MongoError: BSONObj size: 20726581 (0x13C4335) is invalid.)

I got the below error while I execute the MongoDB aggregate function.
It is not working even I set user allowDiskUse to true. Currently, I am using MongoDB Atlas M10.
May I have a solution, pls?
MongoError: BSONObj size: 20726581 (0x13C4335) is invalid. Size must be between 0 and 16793600(16MB) First element: _id: "ecba-d187-0635-84c1-db59"
The data info is like this:
COLLECTION SIZE: 21.45MB
TOTAL DOCUMENTS: 41051
INDEXES TOTAL SIZE: 560KB
I am using the code below.
router.get("/load_agg", async (req, res) => {
if (req.query.projectName) {
var dbName = req.query.projectName;
var appId = req.query.appId;
var viewName = req.query.viewName;
var model = ItemGroup(dbName, appId + "_" + viewName);
await model.aggregate([
{
$match: {
parent: 0
}
},
{
$graphLookup: {
from: appId + "_" + viewName + "s",
startWith: "$id",
connectFromField: "id",
connectToField: "parent",
depthField: "level",
as: "data"
}
},
{
$unset: [
"data._id",
"data.createdAt",
"data.updatedAt",
"data.updateBy"
]
},
{
$unwind: {
path: "$data",
preserveNullAndEmptyArrays: true
}
},
{
$sort: {
"data.level": -1
}
},
{
$group: {
_id: "$id",
parent: {
$first: "$parent"
},
value: {
$first: "$value"
},
type: {
$first: "$type"
},
data: {
$push: "$data"
}
}
},
{
$addFields: {
data: {
$reduce: {
input: "$data",
initialValue: {
level: -1,
presentData: [],
prevData: []
},
in: {
$let: {
vars: {
prev: {
$cond: [
{
$eq: [
"$$value.level",
"$$this.level"
]
},
"$$value.prevData",
"$$value.presentData"
]
},
current: {
$cond: [
{
$eq: [
"$$value.level",
"$$this.level"
]
},
"$$value.presentData",
[]
]
}
},
in: {
level: "$$this.level",
prevData: "$$prev",
presentData: {
$concatArrays: [
"$$current",
[
{
$mergeObjects: [
"$$this",
{
data: {
$filter: {
input: "$$prev",
as: "e",
cond: {
$eq: [
"$$e.parent",
"$$this.id"
]
}
}
}
}
]
}
]
]
}
}
}
}
}
}
}
},
{
$addFields: {
data: "$data.presentData"
}
}
]).allowDiskUse(true).then(data => {
if (data) {
res.send({
code: 200,
message: "Successful!",
data: data
});
} else {
res.send({ code: 500, message: "Invalid Info!" });
}
}).catch(function (error) {
console.log(error); // Failure
res.send({ code: 500, message: "Data loading fail!" });
});
} else {
res.send({ code: 500, message: "Invalid Info!" });
}
});
Atlas Screen Shoot:
https://i.stack.imgur.com/AFtzP.png

List of objects to nested objects using aggregate

I'm having the following documents in a collection
[
{
"category":"category1",
"type":"type1",
"item":"item1",
"name":"testname",
"settings":{
"enable":"true",
"mode":"1"
},
"status":"active"
},
{
"category":"category1",
"type":"type1",
"item":"item1",
"name":"testname2",
"settings":{
"enable":"true",
"mode":"1"
},
"status":"inactive"
},
{
"category":"category1",
"type":"type1",
"item":"item2",
"name":"testname3",
"settings":{
"enable":"true",
"mode":"1"
},
"status":"active"
},
{
"category":"category2",
"type":"type2",
"item":"item3",
"name":"testname4",
"settings":{
"enable":"true",
"mode":"1"
},
"status":"active"
},
{
"category":"category3",
"type":"type4",
"item":"item5",
"name":"testname5",
"settings":{
"enable":"true",
"mode":"1"
},
"status":"active"
}
]
I want to convert this into nested objects with four level. Expecting output like below
{
"category1":{
"type1":{
"item1":{
"active":[
{
"name":"testname",
"settings":{
"enable":"true",
"mode":"1"
}
}
],
"inactive":[
{
"name":"testname2",
"settings":{
"enable":"true",
"mode":"1"
}
}
]
},
"item2":{
"active":[
{
"name":"testname3",
"settings":{
"enable":"true",
"mode":"1"
}
}
]
}
}
},
"category2":{
"type2":{
"item3":{
"active":[
{
"name":"testname4",
"settings":{
"enable":"true",
"mode":"1"
}
}
]
}
}
},
"category3":{
"type4":{
"item4":{
"active":[
{
"name":"testname5",
"settings":{
"enable":"true",
"mode":"1"
}
}
]
}
}
}
}
I'm able to convert into two levels with aggregate and replaceRoot option. Unable to convert into four levels after replacing the root. How to achieve this output using mongo aggregate>
I'm not sure exactly how your pipeline looks as you didn't include it, however here is how I would do it by using $arrayToObject, $group and $replaceRoot to manipulate the structure into the required format:
db.collection.aggregate([
{
$group: {
_id: {
category: "$category",
type: "$type",
item: "$item",
status: "$status"
},
data: {
$push: {
name: "$name",
settings: "$settings"
},
}
}
},
{
$group: {
_id: {
category: "$_id.category",
type: "$_id.type",
item: "$_id.item"
},
statusData: {
$push: {
data: "$data",
status: "$_id.status"
}
}
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
{
_id: "$_id"
},
{
data: {
"$arrayToObject": {
$map: {
input: "$statusData",
as: "datum",
in: {
k: "$$datum.status",
v: "$$datum.data"
}
}
}
}
}
]
}
}
},
{
$sort: {
"_id.item": 1
}
},
{
$group: {
_id: {
category: "$_id.category",
type: "$_id.type"
},
items: {
$push: {
"$arrayToObject": [
[
{
k: "$_id.item",
v: "$data"
}
]
]
}
}
}
},
{
$sort: {
"_id.type": 1
}
},
{
$replaceRoot: {
newRoot: {
"$arrayToObject": [
[
{
k: "$_id.category",
v: {
"$arrayToObject": [
[
{
k: "$_id.type",
v: {
"$mergeObjects": "$items"
}
}
]
]
}
}
]
]
}
}
}
])
Mongo Playground

Mongoose aggregate with nested collection

I have following schemas.
First: AdditionalFieldSchema
const AdditionalFieldSchema = new Schema({
names: [
{
type: Schema.Types.ObjectId,
ref: "multiLanguageContent",
required: true
}
],
...
});
Second: MultiLanguageContentSchema
const MultiLanguageContentSchema = new Schema({
value: {
type: String,
required: true
},
...
});
and I have the following mongoose aggregate query.
The goal is to fetch every employees with their additionalField attached.
const employees = await Employee.aggregate([
{
$match: {
status: "active",
$nor: [
{ email: "blablabla" },
{ email: "blobloblo" }
]
}
},
{
$lookup: {
from: AdditionalField.collection.name,
let: { af_id: "$_id" },
pipeline: [{
$match: {
$expr: {
$and: [
{ $eq: ["$ownership", "$$af_id"] },
{ $eq: ["$status", "active"] },
{ $eq: ["$functionalType", "blablabla"] }
]
}
}
}],
as: "afs"
}
},
{ $unwind: "$afs" },
{ $unwind: "$afs.names" },
{
$group: {
_id: "$_id",
email: { $first: "$email" },
afs: {
$push: {
value: "$afs.value",
names: "$afs.names"
}
}
}
}
]);
The query I run to test this aggregate function.
query TestQuery {
testQuery
{
email
afs {
names {
value
}
value
}
}
}
The result I have.
"data": {
"testQuery": [
{
"email": "blablabla#test.com"
"afs": [
{
"names": [
{
"value": "Name 1"
}
],
"value": "Value 1"
},
{
"names": [
{
"value": "Name 2"
}
],
"value": "Value 2"
},
...
]
},
...
Result is good, I have data I want.
But I would like to have a result like this below
"data": {
"testQuery": [
{
"email": "blablabla#test.com",
"afs": [
{
"names": "Name 1",
"value": "Value 1"
},
{
"names": "Name 2",
"value": "Value 2"
},
...
]
},
...
It seems like { $unwind: "$afs.names" }, is not working.
Any ideas ?
Thanks, Flo

Parent child query in mongodb

I have collection with this data :
[
{
_id: "id1",
parentId: null,
text: "text 1"
},
{
_id: "id2",
parentId: null,
text: "text 2"
},
{
_id: "id3",
parentId: null,
text: "text 3"
},
{
_id: "id4",
parentId: "id1",
text: "text 4"
},
{
_id: "id5",
parentId: "id1",
text: "text 5"
},
{
_id: "id6",
parentId: "id2",
text: "text 6"
},
{
_id: "id7",
parentId: "id5",
text: "text 7"
}
]
I want every child to come after its parent like this :
[
{
_id: "id1",
parentId: null,
text: "text 1"
},
{
_id: "id4",
parentId: "id1",
text: "text 4"
},
{
_id: "id5",
parentId: "id1",
text: "text 5"
},
{
_id: "id7",
parentId: "id5",
text: "text 7"
},
{
_id: "id2",
parentId: null,
text: "text 2"
},
{
_id: "id6",
parentId: "id2",
text: "text 6"
},
{
_id: "id3",
parentId: null,
text: "text 3"
}
]
but I didn't find any query for this in mongodb . Is that possible ?
I use this schema for store comments and I need to sort like that.
for sql query I found this video on youtube: https://www.youtube.com/watch?v=yA-YqKBNyNc
Here is a solution using aggregate framework :
db.test.aggregate([{
$addFields: {
parentIdId: {
$concat: [{
$ifNull: [{
$toString: "$parentId"
}, ""]
}, {
$toString: "$_id"
}]
}
}
}, {
$sort: {
parentIdId: 1
}
}, {
$project: {
parentIdId: 0
}
}])
The result :
{"_id":"id1","parentId":null,"text":"text 1","parentIdId":"id1"}
{"_id":"id4","parentId":"id1","text":"text 4","parentIdId":"id1id4"}
{"_id":"id5","parentId":"id1","text":"text 5","parentIdId":"id1id5"}
{"_id":"id2","parentId":null,"text":"text 2","parentIdId":"id2"}
{"_id":"id6","parentId":"id2","text":"text 6","parentIdId":"id2id6"}
{"_id":"id3","parentId":null,"text":"text 3","parentIdId":"id3"}
{"_id":"id7","parentId":"id5","text":"text 7","parentIdId":"id5id7"}
In this solution, I create a new field in your query result that is the concatenation of parentId and Id and, then I sort on it.
If you want to remove this technical field, you can add this pipeline stage :
{$project: {parentIdId: 0}}
Your final query would be :
db.test.aggregate([{
$addFields: {
parentIdId: {
$concat: [{
$ifNull: [{
$toString: "$parentId"
}, ""]
}, {
$toString: "$_id"
}]
}
}
}, {
$sort: {
parentIdId: 1
}
}, {
$project: {
parentIdId: 0
}
}])

How could I remove the duplicated items(complex object) from array

In each document,
the records is an array containing many duplicated objects.
and in buy_items there are also containing many duplicated items.
How could I clean the duplicated items ?
Original documents:
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"5210 ",
"5210 "
]
},
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"5210 ",
"5210 "
]
}
{
"DATE": new Date("2012-12-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"1234 ",
" "
]
}
]
}
Expected Output:
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 "
]
},
{
"DATE": new Date("2012-12-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"1234 ",
" "
]
}
]
}
With Michaels solution, the output might looks like this
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
"date": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 "
"1234 ",
" "
]
]
}
You can remove duplicated objects using the aggregation framework
db.collection.aggregate(
[
{ $unwind: "$records" },
{ $unwind: "$records.buy_items" },
{ $group: { "_id": {id: "$_id", date: "$records.DATE" }, buy_items: { $addToSet: "$records.buy_items" }}},
{ $group: {"_id": "$_id.id", records: { $push: {"date": "$_id.date", "buy_items": "$buy_items" }}}}, { $sort: { "records.0.date": 1 }} ,
{ $out: "collection" }
]
)
The $out operator let you write your aggregation result in specified collection or Replace you existing collection.
Even better using "Bulk" operations
var bulk = bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.aggregate([
{ "$unwind": "$records" },
{ "$project": {
"date": "$records.DATE",
"buy_items": { "$setIntersection": "$records.buy_items" }
}},
{ "$unwind": "$buy_items" },
{ "$group": {
"_id": { "id": "$_id", "date": "$date" },
"buy_items": { "$addToSet": "$buy_items" }
}},
{ "$group": {
"_id": "$_id.id",
"records": { "$push": {
"date": "$_id.date",
"buy_items": "$buy_items"
}}
}}
]).forEach(function(doc) {
bulk.find({"_id": doc._id}).updateOne({
"$set": { "records": doc.records }
});
count++;
if (count % 500 == 0) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
})
if (count % 500 != 0)
bulk.execute();
Result:
{
"_id" : "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records" : [
{
"date" : ISODate("2012-12-08T00:00:00Z"),
"buy_items" : [
" ",
"1234 ",
"5210 "
]
},
{
"date" : ISODate("1996-02-08T00:00:00Z"),
"buy_items" : [
"5210 "
]
}
]
}
If you want to update your current collections without creating new collection and drop previous collection. I tried this but doing this you should run two different update commands.
First update records with distinct like this :
db.collectionName.update({},{"$set":{"records":db.collectionName.distinct('records')}})
and second update for buy_items with distinct like this :
db.collectionName.update({},{"$set":{"records.0.buy_items":db.collectionName.distinct('records.buy_items')}})
If you want to avoid two update query then follow Michael answer .
You could try using the forEach() method of the find() cursor to iterate over each document properties, check for uniqueness and filter distinct values as follows:
db.collection.find().forEach(function(doc){
var records = [], seen = {};
doc.records.forEach(function (item){
var uniqueBuyItems = item["buy_items"].filter(function(i, pos) {
return item["buy_items"].indexOf(i) == pos;
});
item["buy_items"] = uniqueBuyItems;
if (JSON.stringify(item["buy_items"]) !== JSON.stringify(seen["buy_items"])) {
records.push(item);
seen["buy_items"] = item["buy_items"];
}
});
doc.records = records;
db.collection.save(doc);
})