Aggregate Populate array of ids with Their Documents - mongodb

I'm Strugling with some aggregation functions in mongodb.
I want to get books Documents in author's document that has just books ids as array of strings ids like this :
Author Document
{
"_id" : "10",
"full_name" : "Joi Dark",
"books" : ["100", "200", "351"],
}
And other documents (books) :
{
"_id" : "100",
"title" : "Node.js In Action",
"ISBN" : "121215151515154",
"date" : "2015-10-10"
}
So in result i want this :
{
"_id" : "10",
"full_name" : "Joi Dark",
"books" : [
{
"_id" : "100",
"title" : "Node.js In Action",
"ISBN" : "121215151515154",
"date" : "2015-10-10"
},
{
"_id" : "200",
"title" : "Book 2",
"ISBN" : "1212151454515154",
"date" : "2015-10-20"
},
{
"_id" : "351",
"title" : "Book 3",
"ISBN" : "1212151454515154",
"date" : "2015-11-20"
}
],
}

Use $lookup which retrieves data from the nominated collection in from based on the matching of the localField to the foreignField:
db.authors.aggregate([
{ "$lookup": {
"from": "$books",
"foreignField": "_id",
"localField": "books",
"as": "books"
}}
])
The as is where in the document to write an "array" containing the related documents. If you specify an existing property ( such as is done here ) then that property is overwritten with the new array content in output.
If you have a MongoDB before MongoDB 3.4 then you may need to $unwind the array of "books" as the localField first:
db.authors.aggregate([
{ "$unwind": "$books" },
{ "$lookup": {
"from": "$books",
"foreignField": "_id",
"localField": "books",
"as": "books"
}}
])
Which creates a new document for each array member in the original document, therefore use $unwind again and $group to create the original form:
db.authors.aggregate([
{ "$unwind": "$books" },
{ "$lookup": {
"from": "$books",
"foreignField": "_id",
"localField": "books",
"as": "books"
}},
{ "$unwind": "$books" },
{ "$group": {
"_id": "$_id",
"full_name": { "$first" "$full_name" },
"books": { "$push": "$books" }
}}
])
If in fact your _id values in the foreign collection of of ObjectId type, but you have values in the localField which are "string" versions of that, then you need to convert the data so the types match. There is no other way.
Run something like this through the shell to convert:
var ops = [];
db.authors.find().forEach(doc => {
doc.books = doc.books.map( book => new ObjectId(book.valueOf()) );
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$set": { "books": doc.books }
}
}
});
if ( ops.length >= 500 ) {
db.authors.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.authors.bulkWrite(ops);
ops = [];
}
That will convert all the values in the "books" array into real ObjectId values that can actually match in a $lookup operation.

Just adding on top of the previous answer. If your input consists of an array of strings and you want to convert them to ObjectIds, you can achieve this by using a projection, followed by a map and the $toObjectId method.
db.authors.aggregate([
{ $project: {
books: {
$map: {
input: '$books',
as: 'book',
in: { $toObjectId: '$$book' },
},
},
},},
{ $lookup: {
from: "$books",
foreignField: "_id",
localField: "books",
as: "books"
}
},
])
Ideally, your database would be formatted in such a manner that your aggregates are stored as ObjectIds, but in the case where that is not an option, this poses as a viable solution.

Related

How to lookup through an Array in MongoDB and Project Names From a Certain Collection

I have two collections, one named Exports and one named Service. Inside the Exports collection there is an object that holds inside of it an array of servicesIds.
I want to aggregate and lookup for the corressponding matching _ids from the Exports collection with the Service collection to find the name of the services.
The structure of the each document for the two collection is as follows:
Exports:
{
"_id" : "818a2c4fc4",
"companyId" : "7feb1812d8",
"filter" : {
"servicesIds" : [
"0111138dc679d",
"0c18c499435e9",
],
},
"_created_at" : ISODate("2019-10-27T09:06:03.102+0000"),
"_updated_at" : ISODate("2019-10-27T09:06:05.099+0000"),
}
Service:
An example of one document with its _id is a foreign key inside the filters object then inside the servicesIds array
{
"_id" : "0111138dc679d",
"name" : "Bay Services",
"character" : "B",
"company" : {
"id" : "f718a1c385",
"name" : "xxx"
},
"active" : true,
"tags" : [
],
"_created_at" : ISODate("2020-04-09T06:36:14.442+0000"),
"_updated_at" : ISODate("2020-06-06T03:52:16.770+0000"),
}
How can i do that?
Here is what i tried, but it keeps giving me and error reading
Mongo Server error '$in requires an array as a second argument, found: missing' on server
Here is my code:
db.getCollection("Exports").aggregate([
{
"$match": { "companyId":"818a2c4fc4" },
},
{
"$lookup": {
"from": "Service",
"let":{ id : "$_id" },
"pipeline": [
{
"$match":
{
"$expr":
{
"$in": ["$$id","$filter.servicesIds"]
}
}
}
],
"as":"services"
}
},
])
$unwind the array first, or you can edit your answer with an expected result you want, then I will correct my answer.
db.Exports.aggregate([
{
"$match": {
"companyId": "7feb1812d8"
}
},
{
"$unwind": "$filter.servicesIds"
},
{
"$lookup": {
"from": "Service",
"localField": "filter.servicesIds",
"foreignField": "_id",
"as": "docs"
}
}
])
https://mongoplayground.net/p/l2VweVYz1Fy

How to get Child data after filtering dataset of Parents in a single mongo collection?

I have a single mongo collection called "CourseCollection" and it contains both parent and child doc. Any document with the key "Parent" is a child doc and a parent can have multiple child doc.
{
"_id" : "abracadavra",
"Name" : "abracadavra",
"Description" : "",
"Type" : "Spell",
"Parent" : {
"_id" : "Magic",
"Type" : "Course",
"Name" : "Magic"
}
},
{
"_id" : "Magic",
"Name" : "Magic",
"Type" : "Course",
"Access" : [
{
"_id" : "2sssdw5oe",
"Name" : "Abc"
},
{
"_id" : "4fddfye42",
"Name" : "Xyz"
}
]
}
What I'm trying to do is, based on the Access of Parent doc, I'm trying to get all the child doc.
Existing and working solution:
The solution that I have currently is to perform 2 queries.
Query 1. Get all the courses that the user has access to.
db.getCollection("CourseCollection").find({"Type": "Course", "Access._id": {"$in": ["2sssdw5oe"]}})
Query 2. Since I'm using Python, I do a list comprehension to get only the IDs of the course and then perform another query with this list
db.getCollection("CourseCollection").find({"Type": "Spell", "Parent._id": {"$in": course_list_id}})
Is there a way to get the child data after filtering out the parent in a single query. I also tried aggregation but only the results of the previous stage are passed to the next stage.
I guess you're trying to do something like this:
db.CourseCollection.aggregate([
{
"$match": {
"Type": "Spell"
}
},
{
"$lookup": {
"from": "CourseCollection",
"localField": "Parent._id",
"foreignField": "_id",
"as": "Parents"
}
},
{
"$match": {
"Parents": {
"$elemMatch": {
"Type": "Course",
"Access._id": {
"$in": [
"2sssdw5oe"
]
}
}
}
}
}
])
You can achieve the same result doing this too:
db.CourseCollection.aggregate([
{
"$match": {
"Type": "Spell"
}
},
{
"$lookup": {
"from": "CourseCollection",
"localField": "Parent._id",
"foreignField": "_id",
"as": "Parents",
"pipeline": [
{
"$match": {
"Type": "Course",
"Access._id": {
"$in": [
"2sssdw5oe"
]
}
}
}
]
}
},
{
"$match": {
"Parents.0": {
"$exists": true
}
}
}
])

How to aggregate array of ObjectId pairs with their relevant collection

I have a course collection in which I am allotting teachers for each subject of that course. The allotment is saved as an array of JSON please take a look at the reference doc below.
{
"_id" : ObjectId("5cc7d72d8e165005cbef939e"),
"isAssigned" : true,
"name" : "11",
"section" : "A",
"allotment" : [
{
"subject" : ObjectId("5cc3f7cc88e95a0c8e8ccd7d"),
"teacher" : ObjectId("5cbee0e37a3c852868ec9797")
},
{
"subject" : ObjectId("5cc3f80e88e95a0c8e8ccd7e"),
"teacher" : ObjectId("5cbee10c7a3c852868ec9798")
}
]
}
I am trying to match the subject and teacher fields along with their doc from two different collections. I could get them in two different array's but couldn't get them as structured in my expected output
Doc in teachers collection
{
_id: ObjectId("5cbee0e37a3c852868ec9797"),
name: "Alister"
}
Doc in subject
{
_id: ObjectId("5cc3f7cc88e95a0c8e8ccd7d"),
name: "English",
code: "EN"
}
Query I tried
Course.aggregate([
{"$match": matchQuery},
{"$lookup": {
"from": "subjects",
"localField": "allotment.subject",
"foreignField": "_id",
"as": "subjectInfo"
}
},
{"$lookup": {
"from": "teachers",
"localField": "allotment.teacher",
"foreignField": "_id",
"as": "teacherInfo"}
},
])
Output of that Query
{
isAssigned: true
name: "11"
section: "A"
subjectInfo:[
{_id: "5cc3f7cc88e95a0c8e8ccd7d", name:"English", code:"EN"}
{_id: "5cc3f80e88e95a0c8e8ccd7e", name: "Science", code:"SC"}
]
teacherInfo:[
{_id: ObjectId("5cbee0e37a3c852868ec9797"),name: "Alister"},
{ _id: ObjectId("5cbee10c7a3c852868ec9798"),name: "Frank"}
]
}
Expexted output
{
"_id" : ObjectId("5cc7d72d8e165005cbef939e"),
"isAssigned" : true,
"name" : "11",
"section" : "A",
"allotment" : [
{
"subject" : {
_id: ObjectId("5cc3f7cc88e95a0c8e8ccd7d"),
name: "English",
code: "EN"
}
"teacher" : {
_id: ObjectId("5cbee0e37a3c852868ec9797"),
name: "Alister"
}
},
{
"subject" : {
_id: ObjectId("5cc3f80e88e95a0c8e8ccd7e"),
name: "Science",
code: "SC"
}
"teacher" : {
_id: ObjectId("5cbee10c7a3c852868ec9798"),
name: "Frank"
}
}
]
}
Just unwind the array before the lookups:
Course.aggregate([
{"$match": matchQuery},
{"$unwind: "$allotment"}
{"$lookup": {
"from": "subjects",
"localField": "allotment.subject",
"foreignField": "_id",
"as": "subjectInfo"
}
},
{"$lookup": {
"from": "teachers",
"localField": "allotment.teacher",
"foreignField": "_id",
"as": "teacherInfo"}
},
])
if you want to re-group after that to restore expected format you can add:
{ $group : {
_id: "$_id",
name: {$first: "$name"},
section: {$first: "$section},
isAssigned: {$first: "$isAssigned},
allotment: {$push: {teacher: "$teacherInfo.0", subject: "$subjectInfo.0"}}
I'm assuming teacherInfo and subjectInfo are never empty, if this is not the case you should add a $match to filter empty ones.
Take a look at $lookup aggregation stage which lets you join collections. There's a plenty of examples on the usage in the documentation.
EDIT: Here's the complete pipeline that should provide the expected result:
courses.aggregate(
[
{
"$unwind" : {
"path" : "$allotment"
}
},
{
"$lookup" : {
"from" : "subjects",
"localField" : "allotment.subject",
"foreignField" : "_id",
"as" : "allotment.subject"
}
},
{
"$lookup" : {
"from" : "teachers",
"localField" : "allotment.teacher",
"foreignField" : "_id",
"as" : "allotment.teacher"
}
},
{
"$addFields" : {
"allotment.subject" : {
"$arrayElemAt" : [
"$allotment.subject",
0.0
]
},
"allotment.teacher" : {
"$arrayElemAt" : [
"$allotment.teacher",
0.0
]
}
}
},
{
"$group" : {
"_id" : "$_id",
"isAssigned" : {
"$first" : "$isAssigned"
},
"name" : {
"$first" : "$name"
},
"section" : {
"$first" : "$section"
},
"allotment" : {
"$addToSet" : "$allotment"
}
}
}
]
)
Firstly you have to $unwind the allotment array and then apply $lookup for subject and then repeat same for teachers and finally apply $group to combine back it inside array. See below aggregate query that is have tried and its working for me.
Course.aggregate([
{"$match": matchQuery},
{
$unwind: '$allotment'
},
{
$lookup:{
"from": "subjects",
"localField": "allotment.subject",
"foreignField": "_id",
"as": "allotment.subject"
}
},
{
$unwind: '$allotment.subject'
},
{
"$lookup": {
"from": "teachers",
"localField": "allotment.teacher",
"foreignField": "_id",
"as": "allotment.teacher"
}
},
{
$unwind: '$allotment.teacher'
},
{
"$group" : {
"_id" : "$_id",
"isAssigned" : {
"$first" : "$isAssigned"
},
"name" : {
"$first" : "$name"
},
"section" : {
"$first" : "$section"
},
"allotment" : {
"$addToSet" : "$allotment"
}
}
}
])

Parent child reversal in the result

I am using MongoDB 3.4.
Have 2 collection as follows.
Collection 1:- type
{
"_id": {
"$numberLong": "1234"
},
"name" : "board1"
"type" : "electronic"
},
{
"_id": {
"$numberLong": "1235"
},
"name" : "board2",
"type" : "electronic"
}
Collection 2:- products
{
"_id": {
"$numberLong": "9876"
},
"types" : [
"1234",
"1235",
"1238"
]
},
{
"_id": {
"$numberLong": "9875"
},
"types" : [
"1234",
"1238"
]
}
Type collection will have multiple types and each product in products collection will have multiple types.
There can be multiple document with different ids for the same type in type collection. And, product collection might have types array with different Ids of same type or different type.
I would like to get all the ids of type electronic and find the products which has id in the types array for each product.
I want result like the below one.
{
"_id": {
"$numberLong": "1234"
},
"name" : "board1",
"products" : [
"9876",
"9875"
]
},
{
"_id": {
"$numberLong": "1235"
},
"name" : "board2"
"products" : [
"9876",
"9875"
]
}
Currently, I am making so many calls, like for each type id, get all products.
Is there any other simple way with single query using $lookup or any other mechanism?
You can try below aggregation in mongodb 3.6 and above
db.types.aggregate([
{ "$match": { "type" : "electronic" }},
{ "$lookup": {
"from": "testCollection2",
"let": { "typeId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$in": ["$$typeId", "$types"] }}}
],
"as": "products"
}},
{ "$addFields": {
"products": "$products._id"
}}
])
You can try bbelow aggregation in mongodb 3.4
db.types.aggregate([
{ "$match": { "type" : "electronic" }},
{ "$lookup": {
"from": "testCollection2",
"localField": "_id",
"foreignField": "types",
"as": "products"
}},
{ "$addFields": {
"products": "$products._id"
}}
])
In MongoDB 3.4 you can use $lookup and then $addFields to get _id from products:
db.types.aggregate([
{
"$match": { "type" : "electronic" }
},
{
$lookup: {
from: "products",
localField: "_id",
"foreignField": "types",
"as": "products"
}
},
{
$project: {
field1: 1,
field2: 1,
products: {
$map: {
input: "$products",
as: "p",
in: "$$p._id"
}
}
}
}
])

$geowithin with Foriegn Collection on $lookup

I have two collections Members and MobileUserLocations - where each users locations is saved(Can be multiple) as userId as the foreign field.
Members:
{
_id: ObjectId("591553ffa4233a181506880c"),
userName: "Test user"
}
MobileUserLocations:
{ _id: ObjectId("59156070a4233a1815068b6b"),
userId: ObjectId("591553ffa4233a181506880c"),
location: {type: "Point", coordinates: [76.9121, 10.2232]]},
updatedOn: 2017-05-12T07:12:48.626Z,
status: 1
},
{ _id: ObjectId("59156070a4233a1815068b6b"),
userId: ObjectId("591553ffa4233a181506880c"),
location: {type: "Point", coordinates: [76.8121, 10.1232]]},
updatedOn: 2017-05-12T07:12:48.626Z,
status: 1
}
I want to get the Members who are within a radius - say 5km with reference to a particular geo point - say: [10.0132295, 76.3630502] (lat,lng format).
I tried this:
collection.aggregate([
{$match: {_id: { $ne: options.accessToken.userId }},
{ "$lookup": {
"localField": "_id",
"from": "MobileUserLocations",
"foreignField": "userId",
"as": "userLocInfo"
}
},
{
$project: {
_id: 1,
userLocInfo: {
"$filter": {
"input": "$userLocInfo",
"as": "userLoc",
"cond": {
"$eq": [ "$$userLoc.status", -1],
"$$userLoc.location": {"$geoWithin": {"$centerSphere": [[76.3630502, 10.0132295], 5 / 3963.2]}}
}
}
}
}
},
{$unwind: "$userLocInfo"}
]
But not getting. If I am removing the $geowithin from the filter cond, it is getting, otherwise not getting. But if I am individullay querying the collections, I am getting the result.
Can anyone know the issue?
That does not work because $geoWithin is not a "logical operator", but it's a "query operator" and can only be used in an aggregation pipeline using $match. Fortunately for you, that is really what you want. Though you don't yet see why:
collection.aggregate([
{ "$match": {
"_id": { "$ne": options.accessToken.userId }
}},
{ "$lookup": {
"localField": "_id",
"from": "MobileUserLocations",
"foreignField": "userId",
"as": "userLocInfo"
}},
{ "$unwind": "$userLocInfo" },
{ "$match": {
"userLocInfo.status": -1,
"userLocInfo.updatedOn": "2017-05-12T12:11:04.183Z",
"userLocInfo.location": {
"$geoWithin": {
"$centerSphere": [[76.3630502, 10.0132295], 5 / 3963.2]
}
}
}}
])
There's a really good reason for that aside from it's the only way it works. To understand, look at the "explain" output:
{
"$lookup" : {
"from" : "MobileUserLocations",
"as" : "userLocInfo",
"localField" : "_id",
"foreignField" : "userId",
"unwinding" : {
"preserveNullAndEmptyArrays" : false
},
"matching" : {
"$and" : [
{
"status" : {
"$eq" : -1.0
}
},
{
"updatedOn" : {
"$eq" : "2017-05-12T12:11:04.183Z"
}
},
{
"location" : {
"$geoWithin" : {
"$centerSphere" : [
[
76.3630502,
10.0132295
],
0.00126160678239806
]
}
}
}
]
}
}
}
What that shows you is that both the $unwind and following $match get absorbed into the $lookup stage itself. This means that the $geoWithin and other conditions are actually executed on the foreign collection "before" the results are returned.
This is how $lookup deals with resulting joins that can possibly breach the 16MB limit. It's also the most efficient way you can presently "filter" results of the join.
So that's what you really want to do here instead.
Based on the data in your question, this statement:
db.members.aggregate([
{ "$lookup": {
"localField": "_id",
"from": "MobileUserLocations",
"foreignField": "userId",
"as": "userLocInfo"
}},
{ "$unwind": "$userLocInfo" },
{ "$match": {
"userLocInfo.location": {
"$geoWithin": {
"$centerSphere": [[76.9121, 10.2232], 5 / 3963.2]
}
}
}}
])
Filters out the one location in $lookup that matches the constraint:
/* 1 */
{
"_id" : ObjectId("591553ffa4233a181506880c"),
"userName" : "Test user",
"userLocInfo" : {
"_id" : ObjectId("59c3c37359f55d64d6e30297"),
"userId" : ObjectId("591553ffa4233a181506880c"),
"location" : {
"type" : "Point",
"coordinates" : [
76.9121,
10.2232
]
},
"updatedOn" : ISODate("2017-05-12T07:12:48.626Z"),
"status" : 1.0
}
}