MongoDB Need to $push array in nested foreach loops - mongodb

I have 3 collections: parents, children and links with subset of data like:
Parents:
{ "_id": 1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000 },
{ "_id": 2, "PID" : 2, "Pname" : "Jim", "Sal" : 14100 },
{ "_id": 3, "PID" : 3, "Pname" : "Bob", "Sal" : 13500 },
{ "_id": 4, "PID" : 4, "Pname" : "Amy", "Sal" : 12000 },
{ "_id": 5, "PID" : 5, "Pname" : "George", "Sal" : 10000 }
Children:
{ "_id" : 1, "CID" : 1, "Cname" : "Ronney", "Age" : 10 },
{ "_id" : 2, "CID" : 2, "Cname" : "Mo", "Age" : 11 },,
{ "_id" : 3, "CID" : 3, "Cname" : "Adam", "Age" : 13 },
{ "_id" : 4, "CID" : 4, "Cname" : "Eve", "Age" : 21 },
{ "_id" : 5, "CID" : 5, "Cname" : "Johny", "Age" : 19 },
{ "_id" : 6, "CID" : 6, "Cname" : "Sammy", "Age" : 25 },
{ "_id" : 7, "CID" : 7, "Cname" : "Sammy", "Age" : 23 }
Links:
{ "_id" : 1, "PID" : 1, "CID" : 1 },
{ "_id" : 2, "PID" : 1, "CID" : 3 },
{ "_id" : 3, "PID" : 2, "CID" : 5 },
{ "_id" : 4, "PID" : 2, "CID" : 7 },
{ "_id" : 5, "PID" : 2, "CID" : 2 },
{ "_id" : 6, "PID" : 4, "CID" : 4 },
{ "_id" : 7, "PID" : 5, "CID" : 6 }
I need to $push an array of children names into to the parents collection using the links collections which tie parent id to child id. So, for example Parent 1 will have:
{ "_id" :1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000, “Children” : [“Ronny”, ”Adam”]} }
I think I can use a nested foreach loops to achieve this, but I am confused about how.
Any help would be greatly appreciated!

You can forego the usage of "nested loops" with any MongoDB version supporting the $lookup aggregation pipeline operation. This will allow the "joining" of the data from multiple sources into one result set.
Given the collections "parents", "children" and "links", you want to perform two $lookup operations followed by $unwind statements to get the related data from "links" first and then join that to the "children".
Finally for convenience you can $group to get the child names into an array for each parent:
db.parents.aggregate([
{ "$lookup": {
"from": "links",
"localField": "PID",
"foreignField": "PID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$lookup": {
"from": "children",
"localField": "children.CID",
"foreignField": "CID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$group": {
"_id": "$_id",
"children": { "$push": "$children.Cname" }
}}
])
Which on your data sample gives output like:
{ "_id" : 4, "children" : [ "Eve" ] }
{ "_id" : 5, "children" : [ "Sammy" ] }
{ "_id" : 2, "children" : [ "Johny", "Sammy", "Mo" ] }
{ "_id" : 1, "children" : [ "Ronney", "Adam" ] }
Now you want to use that data output as a basis to loop and update the appropriate "parent" documents. Taking into consideration that the actual data can and will likely be much larger:
let ops = [];
db.parents.aggregate([
{ "$lookup": {
"from": "links",
"localField": "PID",
"foreignField": "PID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$lookup": {
"from": "children",
"localField": "children.CID",
"foreignField": "CID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$group": {
"_id": "$_id",
"children": { "$push": "$children.Cname" }
}}
]).forEach(doc => {
ops = [
...ops,
{
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$push": { "Children": { "$each": doc.children } }
}
}
}
];
if ( ops.length >= 500 ) {
db.parents.bulkWrite(ops);
ops = [];
}
});
if ( ops.length != 0 ) {
db.parents.bulkWrite(ops);
ops = [];
}
Which then ammends your data as :
{ "_id" : 1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000, "Children" : [ "Ronney", "Adam" ] }
{ "_id" : 2, "PID" : 2, "Pname" : "Jim", "Sal" : 14100, "Children" : [ "Johny", "Sammy", "Mo" ] }
{ "_id" : 3, "PID" : 3, "Pname" : "Bob", "Sal" : 13500 }
{ "_id" : 4, "PID" : 4, "Pname" : "Amy", "Sal" : 12000, "Children" : [ "Eve" ] }
{ "_id" : 5, "PID" : 5, "Pname" : "George", "Sal" : 10000, "Children" : [ "Sammy" ] }
The alternate to using the $push operator would be to use $set instead to write the whole array at once. But it is generally safter to $push or $addToSet which accounts for the "possibility" that an array may already exist at the location, and the intent is to "add" rather than "overwrite.

Related

Simplifying graphLookup output in MongoDB

I have a collection "people" in the form:
{ "_id" : 1, "name" : "Grandma"}
{ "_id" : 2, "name" : "Mum", "parentID": "1"}
{ "_id" : 3, "name" : "Uncle", "parentID": "1"}
{ "_id" : 4, "name" : "Kid", "parentID": "2"}
{ "_id" : 5, "name" : "Sister", "parentID": "2"}
To get the ancestors of a certain person (let's say Kid), I can use a simple match and graphLookup as follows:
people.aggregate([
{$match: {_id: "3"}},
{$graphLookup:
{
from: "people",
startWith: "$parentID",
connectFromField: "parentID",
connectToField: "_id",
as: "ancestors"
}
}
])
which will return
{ "_id" : 3, "name" : "Kid", "parentID": "2", "ancestors": [
{ "_id" : 1, "name" : "Grandma"},
{ "_id" : 2, "name" : "Mum", "parentID": "1"}]
}
Where I am stuck is how to refactor this output data into a single layered array, such that:
array = [
{ "_id" : 1, "name" : "Grandma"},
{ "_id" : 2, "name" : "Mum", "parentID": "1"},
{ "_id" : 3, "name" : "Kid", "parentID": "2"}
]
(array order isn't important).
Any help would be greatly appreciated!
Just need to change startWith from parentID to _id, this will return ancestors with current document
$project to show required fields
result = people.aggregate([
{ $match: { _id: "3" } },
{
$graphLookup: {
from: "collection",
startWith: "$_id",
connectFromField: "parentID",
connectToField: "_id",
as: "ancestors"
}
},
{
$project: {
_id: 0,
ancestors: 1
}
}
])
Playground
Access array by:
finalResult = result[0]['ancestors']

Limit the output of each bucket in `$bucketAuto` aggregation stage of MongoDB

Consider a collection user with the following documents:
{"id": 1, "name": "John", "designation": "customer"}
{"id": 2, "name": "Alison", "designation": "manager"}
{"id": 3, "name": "Sam", "designation": "customer"}
{"id": 4, "name": "George", "designation": "salesperson"}
{"id": 5, "name": "Will", "designation": "salesperson"}
{"id": 6, "name": "Daffney", "designation": "customer"}
{"id": 7, "name": "Julie", "designation": "salesperson"}
{"id": 8, "name": "Elliot", "designation": "customer"}
{"id": 9, "name": "Bruno", "designation": "customer"}
{"id": 10, "name": "Omar", "designation": "customer"}
{"id": 11, "name": "Sid", "designation": "customer"}
{"id": 12, "name": "Nelson", "designation": "manager"}
In the following operation, input documents are grouped into three buckets according to the values in the designation field:
db.users.aggregate([
{
"$bucketAuto": {
groupBy: "$designation",
buckets: 5,
output: {
"count": { $sum: 1 },
"users" : {
$push: {
"name": "$name"
},
}
}
}
}
])
Following are the results of this operation:
/* 1 */
{
"_id" : {"min" : "customer", "max" : "manager"},
"count" : 7.0,
"users" : [
{"name" : "John"},
{"name" : "Sam"},
{"name" : "Daffney"},
{"name" : "Elliot"},
{"name" : "Bruno"},
{"name" : "Omar"},
{"name" : "Sid"}
]
}
/* 2 */
{
"_id" : {"min" : "manager", "max" : "salesperson"},
"count" : 2.0,
"users" : [
{"name" : "Nelson"},
{"name" : "Alison"}
]
}
/* 3 */
{
"_id" : {"min" : "salesperson", "max" : "salesperson"},
"count" : 3.0,
"users" : [
{"name" : "George"},
{"name" : "Will"},
{"name" : "Julie"}
]
}
What I wanted to do was limit the number of results in the "users" attribute of the resulting documents to 2, something like this:
/* 1 */
{
"_id" : {"min" : "customer", "max" : "manager"},
"count" : 2.0,
"users" : [
{"name" : "John"},
{"name" : "Sam"}
]
}
/* 2 */
{
"_id" : {"min" : "manager", "max" : "salesperson"},
"count" : 2.0,
"users" : [
{"name" : "Nelson"},
{"name" : "Alison"}
]
}
/* 3 */
{
"_id" : {"min" : "salesperson", "max" : "salesperson"},
"count" : 2.0,
"users" : [
{"name" : "George"},
{"name" : "Will"}
]
}
Is there some way I can do that?
I am not sure it is possible with $bucketAuto, but you can try $slice to get limited elements from array and $size to get number of element in array,
add this stage after $bucketAuto stage,
{
$addFields: {
users: { $slice: ["$users", 2] }
}
},
{
$addFields: {
count: { $size: "$users" }
}
}
Playground

What is $$ROOT in MongoDB aggregate and how it works?

I am watching a tutorial I can understand how this aggregate works, What is the use of pings, $$ROOT in it.
client = pymongo.MongoClient(MY_URL)
pings = client['mflix']['watching_pings']
cursor = pings.aggregate([
{
"$sample": { "size": 50000 }
},
{
"$addFields": {
"dayOfWeek": { "$dayOfWeek": "$ts" },
"hourOfDay": { "$hour": "$ts" }
}
},
{
"$group": { "_id": "$dayOfWeek", "pings": { "$push": "$$ROOT" } }
},
{
"$sort": { "_id": 1 }
}
]);
Let's assume that our collection looks like below:
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
Now based on the history field you want to group and insert the whole documents in to an array field 'items'. Here $$ROOT variable will be helpful.
So, the aggregation query to achieve the above will be:
db.collection.aggregate([{
$group: {
_id: '$history',
items: {$push: '$$ROOT'}
}
}])
It will result in following output:
{
"_id" : ISODate("2020-05-12T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-13T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-16T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
]
}
I hope it helps.

Compare two array fields in same document

I have two different Arrays with Data and I have to merge two arrays same object in one object how can I merge with help of MongoDB Aggregation.
here are my two Arrays
{"ids" : [
{
"_id" : ObjectId("5ba8d8dfaa988532967029af"),
"level" : 2,
"completed" : 5,
"asset" : ObjectId("5ba8caa1aa98853296702989")
},
{
"_id" : ObjectId("5ba8d8dfaa988532967029b0"),
"level" : 2,
"completed" : 3,
"asset" : ObjectId("5ba8caf6aa9885329670298a")
},
{
"_id" : ObjectId("5ba8d8dfaa988532967029b1"),
"level" : 2,
"asset" : ObjectId("5ba8cb09aa9885329670298b")
}]}
{"total" : [
{
"total" : 1,
"asset" : ObjectId("5ba8caa1aa98853296702989"),
"level" : 2
},
{
"total" : 1,
"asset" : ObjectId("5ba8caf6aa9885329670298a"),
"level" : 2
},
{
"total" : 1,
"asset" : ObjectId("5ba8cb09aa9885329670298b"),
"level" : 2
}]}
in upper two arrays data are shown I want to merge only that object whose asset and level are same
Expected Result:[{
"level" : 2,
"asset" : ObjectId("5ba8caa1aa98853296702989"),
"total" : 1,
"completed" : 5,
},{
"level" : 2,
"asset" : ObjectId("5ba8caf6aa9885329670298a"),
"total" : 1,
"completed" : 3,
}]
if you already have two objects then you can do like this. but if you need to get it from mongoDb then you should use $mergeObject in mongo aggregation.
var a = [
{
"_id" : "5ba8d8dfaa988532967029af",
"level" : 2,
"completed" : 5,
"asset" : "5ba8caa1aa98853296702989"
},
{
"_id" : "5ba8d8dfaa988532967029b0",
"level" : 2,
"completed" : 3,
"asset" : "5ba8caf6aa9885329670298a"
}];
var b = [
{
"total" : 1,
"asset" : "5ba8caa1aa98853296702989",
"level" : 2
},
{
"total" : 1,
"asset" : "5ba8caf6aa9885329670298a",
"level" : 2
}];
var output = [];
function extend(obj, src) {
src.forEach(function(key,index) {
if(obj[index]["asset"] == src[index]["asset"] && obj[index]["level"] == src[index]["level"]){
let c = {...obj[index],...src[index]};
output.push(c);
}
});
}
extend(a, b);
console.log(output)
You can try below aggregation
db.collection.aggregate([
{ "$project": {
"Result": {
"$map": {
"input": "$total",
"as": "c",
"in": {
"total": "$$c.total",
"level": "$$c.level",
"asset": "$$c.asset",
"completed": {
"$arrayElemAt": [
"$ids.completed",
{ "$indexOfArray": ["$ids.asset", "$$c.asset"] }
]
}
}
}
}
}}
])
Output
[
{
"Result": [
{
"asset": ObjectId("5ba8caa1aa98853296702989"),
"completed": 5,
"level": 2,
"total": 1
},
{
"asset": ObjectId("5ba8caf6aa9885329670298a"),
"completed": 3,
"level": 2,
"total": 1
},
{
"asset": ObjectId("5ba8cb09aa9885329670298b"),
"level": 2,
"total": 1
}
]
}
]

Group using the value from two possible fields

Let's say I have a match collection in the following format
{user1: "a", user2: "b"},
{user1: "a", user2: "c"},
{user1: "b", user2: "d"},
{user1: "b", user2: "c"},
{user1: "b", user2: "e"},
{user1: "c", user2: "f"}
I would like to know which user has the most appearance (either in user1 or user2). The result should be in this format ordered by the number of occurence.
{"user": "b", count:4},
{"user": "c", count:3},
{"user": "a", count:2},
{"user": "d", count:1},
{"user": "f", count:1},
{"user": "e", count:1}
Is there a way I can group on the value of two fields?
Something like match.aggregate({$group: {_id: {$or:["user1","user2]}}, count:{$sum:1}})
db.match.aggregate([
{$project: { user: [ "$user1", "$user2" ]}},
{$unwind: "$user"},
{$group: {_id: "$user", count: {$sum:1}}}
])
First stage projects each document into array of users
{user: ["a", "b"]},
{user: ["a", "c"]},
{user: ["b", "d"]},
...
Next we unwind arrays
{user:"a"},
{user:"b"},
{user:"a"},
{user:"c"},
{user:"b"},
...
And simple grouping at the end
Basically the concept is to $map onto an array and work from there:
db.collection.aggregate([
{ "$project": {
"_id": 0,
"user": { "$map": {
"input": ["A","B"],
"as": "el",
"in": {
"$cond": {
"if": { "$eq": [ "$$el", "A" ] },
"then": "$user1",
"else": "$user2"
}
}
}}
}},
{ "$unwind": "$user" },
{ "$group": {
"_id": "$user",
"count": { "$sum": 1 }
}}
])
Let us take an example and go through
db.users_data.find();
{
"_id" : 1,
"user1" : "a",
"user2" : "aa",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T08:52:32.434Z")
},
{
"_id" : 2,
"user1" : "a",
"user2" : "ab",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T09:52:32.434Z")
},
{
"_id" : 3,
"user1" : "b",
"user2" : "aa",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T10:52:32.434Z")
},
{
"_id" : 4,
"user1" : "b",
"user2" : "ab",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T10:52:32.434Z")
},
{
"_id" : 5,
"user1" : "a",
"user2" : "aa",
"status" : "OLD",
"createdDate" : ISODate("2015-05-03T08:52:32.434Z")
},
{
"_id" : 6,
"user1" : "a",
"user2" : "ab",
"status" : "OLD",
"createdDate" : ISODate("2015-05-03T08:52:32.434Z")
},
Then
db.users_data.aggregate([
{"$group" : {_id:{user1:"$user1",user2:"$user2"}, count:{$sum:1}}} ])
])
will give the resuls as
{ "_id" : { "user1" : "a", "user2" : "aa" }, "count" : 2}
{ "_id" : { "user1" : "a", "user2" : "ab" }, "count" : 2}
{ "_id" : { "user1" : "b", "user2" : "aa" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "ab" }, "count" : 1}
Thus grouping by multiple ids are possible
Now one more variation
db.users_data.aggregate([
{"$group" : {_id:{user1:"$user1",user2:"$user2",status:"$status"}, count:{$sum:1}}} ])
])
will give the resuls as
{ "_id" : { "user1" : "a", "user2" : "aa","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "ab","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "aa","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "ab","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "aa","status":"OLD" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "ab","status":"OLD" }, "count" : 1}