I'm a newbie to MongoDB, I'm trying to aggregate complete details of students in referencing with other collections.
students collection structure:
{
"_id" : ObjectId("5cc973dd008221192148177a"),
"name" : "James Paulson",
"teachers" : [
ObjectId("5cc973dd008221192148176f"),
ObjectId("5cc973dd0082211921481770")
],
"attenders": [
ObjectId("5cc973dd0082211921481732"),
ObjectId("5cc973dd008221192148173f")
]
}
staff collection structure:
{
"_id" : ObjectId("5cc973dd008221192148176f"),
"name" : "John Paul",
"subject" : [
"english",
"maths"
]
}
{
"_id" : ObjectId("5cc973dd0082211921481771"),
"name" : "Pattrick",
"subject" : [
"physics",
"history"
]
}
{
"_id" : ObjectId("5cc973dd0082211921481732"),
"name" : "Roger",
"subject" : [
"sweeper"
]
}
{
"_id" : ObjectId("5cc973dd008221192148173f"),
"name" : "Ken",
"subject" : [
"dentist"
]
}
This is the query I used for the retrieval of all teacher details of a particular student.
Query:
db.getCollection('students').aggregate([
{
$unwind: "$teachers"
},
{
$lookup:
{
from: 'staff',
localField: 'teachers',
foreignField: '_id',
as: 'teachers'
}
}
]);
Result:
{
"_id" : ObjectId("5cc973dd008221192148177a"),
"name" : "James Paulson",
"teachers" : [
{
"_id" : ObjectId("5cc973dd008221192148176f"),
"name" : "John Paul",
"subject" : [
"english",
"maths"
]
},
{
"_id" : ObjectId("5cc973dd008221192148176f"),
"name" : "Pattrick",
"subject" : [
"physics",
"history"
]
}
],
"attenders": [
ObjectId("5cc973dd0082211921481732"),
ObjectId("5cc973dd008221192148173f")
]
}
As you can see, the attenders array is also similar to teachers except the difference in column name in students table. So how to applying a similar query to the second column (attenders)? Also is there any way to select specific columns from the second table (like only _id and name from staff collection)?
Any help on this would be greatly appreciated.
You can use below aggregation with mongodb 3.6 and above
Firstly you don't need to use $unwind here as your field already contains array of ObjectIds. And to select the specific field from the referenced collection you can use the custom $lookup with pipeline and $project the fields inside it.
db.getCollection('students').aggregate([
{ "$lookup": {
"from": "staff",
"let": { "teachers": "$teachers" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$teachers" ] } } }
{ "$project": { "name": 1 }}
],
"as": "teachers"
}},
{ "$lookup": {
"from": "attenders",
"let": { "attenders": "$attenders" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$attenders" ] } } }
],
"as": "attenders"
}}
])
Related
I have a collection like below :
`{
"topics" : [
{
"id" : "2",
"name" : "Test1",
"owner" : [
"123"
]
},
{
"id" : "3",
"name" : "Test2",
"owner" : [
"123",
"456"
]
}
]
}`
As, this data is in single document, and I want only matching elements based on their owner, I am using below query ( using filter in aggregation ), but I am getting 0 matching elements.
Query :
Thanks in advance...!!
db.getCollection('topics').aggregate([
{"$match":{"topics.owner":{"$in":["123","456"]}}},
{"$project":{
"topics":{
"$filter":{
"input":"$topics",
"as":"topic",
"cond": {"$in": ["$$topic.owner",["123","456"]]}
}},
"_id":0
}}
])
This query should produce below output :
{
"topics" : [
{
"id" : "1",
"name" : "Test1",
"owner" : ["123"]
},
{
"id" : "2",
"name" : "Test2",
"owner" : ["123","456"]
}
]
}
As the topic.owner is an array, you can't use $in directly as this compares whether the array is within in an array.
Instead, you should do as below:
$filter - Filter the document in the topics array.
1.1. $gt - Compare the result from 1.1.1 is greater than 0.
1.1.1. $size - Get the size of the array from the result 1.1.1.1.
1.1.1.1. $setIntersection - Intersect the topic.owner array with the input array.
{
"$project": {
"topics": {
"$filter": {
"input": "$topics",
"as": "topic",
"cond": {
$gt: [
{
$size: {
$setIntersection: [
"$$topic.owner",
[
"123",
"456"
]
]
}
},
0
]
}
}
},
"_id": 0
}
}
Demo # Mongo Playground
db.getCollection('topics').aggregate([
{"$unwind":"$topics"},
{"$addFields":{
"rest":{"$or":[{"$in":["12z3","$topics.owner"]},{"$in":["456","$topics.owner"]}]}
}},
{"$match":{
"rest":true
}},
{"$group":{
"_id":"$_id",
"topics":{"$push":"$topics"}
}}
])
I need to fast count related documents.
So, I have four collections
groups
{ "_id" : "g1", "name" : "group1" }
{ "_id" : "g2", "name" : "group2" }
courses
{ "_id" : "c1", "name" : "course1", "group_id" : "g1" }
{ "_id" : "c2", "name" : "course2", "group_id" : "g2" }
topics
{ "_id" : "t1", "name" : "top1c11", "course_id" : "c1" }
{ "_id" : "t2", "name" : "top1c12", "course_id" : "c1" }
{ "_id" : "t3", "name" : "top1c21", "course_id" : "c2" }
lessons
{ "_id" : "l1", "name" : "lesson111", "topic_id" : "t1" }
{ "_id" : "l2", "name" : "lesson112", "topic_id" : "t1" }
{ "_id" : "l3", "name" : "lesson121", "topic_id" : "t2" }
{ "_id" : "l4", "name" : "lesson211", "topic_id" : "t3" }
I need count all lessons of the specific group.
I tried to run the following aggregation, but I didn't wait for an response. (But it working for small amount of data)
db.getCollection('lessons').aggregate([
{
"$lookup": {
"from": "topics",
"let": { "topicId": "$topic_id" },
"pipeline": [
{
"$match": { "$expr": { "$eq": [ "$_id", "$$topicId" ] } }
},
{
"$lookup": {
"from": "courses",
"let": { "courseId": "$topic_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$course_id", "$$courseId" ] } } },
],
"as": "course"
},
},
{
"$unwind": "$course"
}
],
"as": "topic"
},
},
{
"$unwind" : "$topic"
},
{
"$match": {
"topic.course.group_id" : "g1"
}
},
{
$group: {
_id: "$course",
"amount": {$sum:1},
}
}
])
I believe this aggregation can be optimized. But I don`t sure that is a good approach to use aggregation framework for such purpose. If so, how can I optimize the aggregation.
Size of collections (test data):
courses: 30000
topics: 200000
lessons: 30000000
Now I use simple nested loops in my code to count lessons. This takes 10 seconds (for 3000 topics of the certain group).
Query1
not nested lookups (lookup and unwind)
match the group
lookup and unwind 3x, last lookup only counts the lessons, and uses pipeline lookup
group by group _id, to get the total lessons
Indexes that you need (all the foreignField)
courses.group_id
topics.course_id
lessons.topic_id
Test code here
groups.aggregate(
[{"$match":{"_id":"g1"}},
{"$lookup":
{"from":"courses",
"localField":"_id",
"foreignField":"group_id",
"as":"courses"}},
{"$unwind":"$courses"},
{"$lookup":
{"from":"topics",
"localField":"courses._id",
"foreignField":"course_id",
"as":"topics"}},
{"$unwind":"$topics"},
{"$lookup":
{"from":"lessons",
"localField":"topics._id",
"foreignField":"topic_id",
"pipeline":
[{"$group":{"_id":null, "lessons":{"$sum":1}}},
{"$set":{"id":"$_id", "_id":"$$REMOVE"}}],
"as":"lessons"}},
{"$set":
{"lessons":
{"$cond":
[{"$eq":["$lessons", []]}, 0,
{"$arrayElemAt":["$lessons.lessons", 0]}]}}},
{"$group":{"_id":"$_id", "totalLessons":{"$sum":"$lessons"}}}])
Query2
nested lookups (without unwind)
code is the same, just nested
Test code here
groups.aggregate(
[{"$match":{"_id":"g1"}},
{"$lookup":
{"from":"courses",
"localField":"_id",
"foreignField":"group_id",
"pipeline":
[{"$lookup":
{"from":"topics",
"localField":"_id",
"foreignField":"course_id",
"pipeline":
[{"$lookup":
{"from":"lessons",
"localField":"_id",
"foreignField":"topic_id",
"pipeline":
[{"$group":{"_id":null, "lessons":{"$sum":1}}},
{"$set":{"id":"$_id", "_id":"$$REMOVE"}}],
"as":"lessons"}},
{"$set":
{"lessons":
{"$cond":
[{"$eq":["$lessons", []]}, 0,
{"$arrayElemAt":["$lessons.lessons", 0]}]}}}],
"as":"topics"}},
{"$project":
{"_id":0, "totalLessons":{"$sum":"$topics.lessons"}}}],
"as":"courses"}},
{"$set":
{"courses":"$$REMOVE",
"totalLessons":
{"$cond":
[{"$eq":["$courses", []]}, 0,
{"$arrayElemAt":["$courses.totalLessons", 0]}]}}}])
If you can send some feedback on which one was faster.
If for 1 group its very fast, maybe remove the match, to do it for all groups, or allow from match to pass more many groups.
Solution from comment of Takis. Query1, adopted for 4.2
groups.aggregate(
[{"$match":{"_id":"g1"}},
{"$lookup":
{"from":"courses",
"localField":"_id",
"foreignField":"group_id",
"as":"courses"}},
{"$unwind":"$courses"},
{"$lookup":
{"from":"topics",
"localField":"courses._id",
"foreignField":"course_id",
"as":"topics"}},
{"$unwind":"$topics"},
{"$lookup":
{"from":"lessons",
"pipeline":
[{"$match":{"$expr":{"$eq":["$$ptopic", "$topic_id"]}}},
{"$group":{"_id":null, "lessons":{"$sum":1}}},
{"$set":{"id":"$_id", "_id":"$$REMOVE"}}],
"as":"lessons",
"let":{"ptopic":"$topics._id"}}},
{"$set":
{"lessons":
{"$cond":
[{"$eq":["$lessons", []]}, 0,
{"$arrayElemAt":["$lessons.lessons", 0]}]}}},
{"$group":{"_id":"$_id", "totalLessons":{"$sum":"$lessons"}}}])
I have 2 collections student_details and subject_details where each student can have multiple subjects which I am storing in student_details collection as reference array.
Now I need to fetch Student details along with the filtered subjects where subject_details.status=ACTIVE.
How can I achieve this using $elemMatch for $ref objects.
I was using something like below but it is not returning any records.
db.getCollection('student_details').find( { subjects: { $elemMatch: { $ref: "subject_details", status: 'ACTIVE' }}})
student_details
================
{
"_id" : "STD-1",
"name" : "XYZ",
"subjects" : [
{
"$ref" : "subject_details",
"$id" : "SUB-1"
},
{
"$ref" : "subject_details",
"$id" : "SUB-2"
},
{
"$ref" : "subject_details",
"$id" : "SUB-3"
}
]
}
subject_details
===============
{
"_id" : "SUB-1",
"name" : "MATHEMATICS",
"status" : "ACTIVE"
}
{
"_id" : "SUB-2",
"name" : "PHYSICS",
"status" : "ACTIVE"
}
{
"_id" : "SUB-3",
"name" : "CHEMISTRY",
"status" : "INACTIVE"
}
dbref's are troublesome when used in lookups. but you can work around it with the following aggregation pipeline:
db.student_details.aggregate([
{
$unwind: "$subjects"
},
{
$set: {
"fk": {
$arrayElemAt: [{
$objectToArray: "$subjects"
}, 1]
}
}
},
{
$lookup: {
"from": "subject_details",
"localField": "fk.v",
"foreignField": "_id",
"as": "subject"
}
},
{
$match: {
"subject.status": "ACTIVE"
}
},
{
$group: {
"_id": "$_id",
"name": {
$first: "$name"
},
"subjects": {
$push: {
$arrayElemAt: ["$subject", 0]
}
}
}
}
])
the resulting object would be like so:
{
"_id": "STD-1",
"name": "XYZ",
"subjects": [
{
"_id": "SUB-1",
"name": "MATHEMATICS",
"status": "ACTIVE"
},
{
"_id": "SUB-2",
"name": "PHYSICS",
"status": "ACTIVE"
}
]
}
because they are in 2 collections you need $lookUp to bring them together... before that I believe you need to $unwind the Subjects array... kind of aircode here so this isn't so much an answer as general advice... the aggregation pipeline is used to do these in stages...
am assuming you are abbreviating for the post...cause if Subject Details is really just 3 fields your schema is better served in the NoSQL world to just put that info with Student Details and use 1 collection rather than a normalized relational approach
I am struggling with the newish (lovely) lookup operator in MongoDB. I have 3 collections:
artists
{
"_id" : ObjectId("5b0d2b2c7ac4792df69a9942"),
"name" : "Dream Theater",
"started_in" : NumberInt(1985),
"active" : true,
"country" : "US",
"current_members" : [
ObjectId("5b0d2a7c7ac4792df69a9941")
],
"previous_members" : [
ObjectId("5b0d2bf57ac4792df69a9954")
],
"albums" : [
ObjectId("5b0d16ee7ac4792df69a9924"),
ObjectId("5b0d47667ac4792df69a9994")
],
"genres" : [
"prog metal",
"prog rock"
]
}
Albums
{
"_id" : ObjectId("5b0d16ee7ac4792df69a9924"),
"title" : "Images and words",
"released" : ISODate("1992-07-07T00:00:00.000+0000"),
"songs" : [
ObjectId("5b0d15ab7ac4792df69a9916"),
ObjectId("5b0d15ee7ac4792df69a991e"),
ObjectId("5b0d2db37ac4792df69a995d"),
ObjectId("5b0d2dbe7ac4792df69a995e"),
ObjectId("5b0d2dcb7ac4792df69a995f"),
ObjectId("5b0d2dd87ac4792df69a9960"),
ObjectId("5b0d2de27ac4792df69a9961"),
ObjectId("5b0d2dec7ac4792df69a9962")
],
"type" : "LP"
}
{
"title" : "Awake",
"released" : ISODate("1994-10-04T00:00:00.000+0000"),
"songs" : [
ObjectId("5b0d470d7ac4792df69a9991")
],
"type" : "LP",
"_id" : ObjectId("5b0d47667ac4792df69a9994")
}
Songs
{
"_id" : ObjectId("5b0d15ab7ac4792df69a9916"),
"title" : "Pull me under"
}
{
"_id" : ObjectId("5b0d15ee7ac4792df69a991e"),
"title" : "Another day"
}
{
"title" : "Take the time",
"_id" : ObjectId("5b0d2db37ac4792df69a995d")
}
{
"title" : "Surrounded",
"_id" : ObjectId("5b0d2dbe7ac4792df69a995e")
}
{
"title" : "Metropolis - part I",
"_id" : ObjectId("5b0d2dcb7ac4792df69a995f")
}
{
"title" : "Under a glass moon",
"_id" : ObjectId("5b0d2dd87ac4792df69a9960")
}
{
"title" : "Wait for sleep",
"_id" : ObjectId("5b0d2de27ac4792df69a9961")
}
{
"title" : "Learning to live",
"_id" : ObjectId("5b0d2dec7ac4792df69a9962")
}
{
"title" : "6:00",
"_id" : ObjectId("5b0d470d7ac4792df69a9991")
}
I can easily do an aggregation with $lookup to get the detailed albums array, but how do I get also the detailed songs in the corresponding albums?
I would like to extend the following query:
db.artists.aggregate([ {
$lookup: {
from: "albums",
localField: "albums",
foreignField: "_id",
as: "albums"
}
}]).pretty()
If you have mongodb version 3.6 then you can try with nested $lookup aggregation...
db.collection.aggregate([
{ "$lookup": {
"from": Albums.collection.name,
"let": { "albums": "$albums" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$albums" ] } } },
{ "$lookup": {
"from": Songs.collection.name,
"let": { "songs": "$songs" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$songs" ] } } }
],
"as": "songs"
}}
],
"as": "albums"
}}
])
And for long-winded explanation you can go through $lookup multiple levels without $unwind?
Or If you have mongodb version prior to 3.6
db.collection.aggregate([
{ "$lookup": {
"from": Albums.collection.name,
"localField": "albums",
"foreignField": "_id",
"as": "albums"
}},
{ "$unwind": "$albums" },
{ "$lookup": {
"from": Songs.collection.name,
"localField": "albums.songs",
"foreignField": "_id",
"as": "albums.songs",
}},
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"started_in": { "$first": "$started_in" },
"active": { "$first": "$active" },
"country": { "$first": "$country" },
"albums": {
"$push": {
"_id": "$albums._id",
"title": "$albums.title",
"released": "$albums.released",
"type": "$albums.type",
"songs": "$albums.songs"
}
}
}}
])
I have document like this in a collection called diagnoses :
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" : ObjectId("58345e0e996d340bd8126149"),
"instructions" : "Take 2 daily, after meals."
},
{
"drug" : ObjectId("5836bc0b291918eb42966320"),
"instructions" : "Take 1 daily, after meals."
}
]
}
The drug id inside the prescription object array is from a separate collection called drugs, see sample document below :
{
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
}
I am trying to create a mongodb query using the native node.js driver to get a result like this:
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" :
{
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
},
"instructions" : "Take 2 daily, after meals."
},
...
]
}
Any advice on how to approach a similar result like this is much appreciated, thanks.
Using MongoDB 3.4.4 and newer
With the aggregation framework, the $lookup operators supports arrays
db.diagnoses.aggregate([
{ "$addFields": {
"prescription": { "$ifNull" : [ "$prescription", [ ] ] }
} },
{ "$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "drugs"
} },
{ "$addFields": {
"prescription": {
"$map": {
"input": "$prescription",
"in": {
"$mergeObjects": [
"$$this",
{ "drug": {
"$arrayElemAt": [
"$drugs",
{
"$indexOfArray": [
"$drugs._id",
"$$this.drug"
]
}
]
} }
]
}
}
}
} },
{ "$project": { "drugs": 0 } }
])
For older MongoDB versions:
You can create a pipeline that first flattens the prescription array using the $unwind operator and a $lookup subsequent pipeline step to do a "left outer join" on the "drugs" collection. Apply another $unwind operation on the created array from the "joined" field. $group the previously flattened documents from the first pipeline where there $unwind operator outputs a document for each element in the prescription array.
Assembling the above pipeline, run the following aggregate operation:
db.diagnoses.aggregate([
{
"$project": {
"patientid": 1,
"doctorid": 1,
"medicalcondition": 1,
"diagnosis": 1,
"addmissiondate": 1,
"dischargedate": 1,
"bhtno": 1,
"prescription": { "$ifNull" : [ "$prescription", [ ] ] }
}
},
{
"$unwind": {
"path": "$prescription",
"preserveNullAndEmptyArrays": true
}
},
{
"$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "prescription.drug"
}
},
{ "$unwind": "$prescription.drug" },
{
"$group": {
"_id": "$_id",
"patientid" : { "$first": "$patientid" },
"doctorid" : { "$first": "$doctorid" },
"medicalcondition" : { "$first": "$medicalcondition" },
"diagnosis" : { "$first": "$diagnosis" },
"addmissiondate" : { "$first": "$addmissiondate" },
"dischargedate" : { "$first": "$dischargedate" },
"bhtno" : { "$first": "$bhtno" },
"prescription" : { "$push": "$prescription" }
}
}
])
Sample Output
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" : {
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
},
"instructions" : "Take 2 daily, after meals."
},
{
"drug" : {
"_id" : ObjectId("5836bc0b291918eb42966320"),
"genericname" : "Paracetamol Tab 100mg",
"type" : "Y",
"isbrand" : false
},
"instructions" : "Take 1 daily, after meals."
}
]
}
In MongoDB 3.6 or later versions
It seems that
$lookup will overwrite the original array instead of merging it.
A working solution (a workaround, if you prefer) is to create a different field,
and then merge two fields, as shown below:
db.diagnoses.aggregate([
{ "$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "prescription_drug_info"
} },
{ "$addFields": {
"merged_drug_info": {
"$map": {
"input": "$prescription",
"in": {
"$mergeObjects": [
"$$this",
{ "$arrayElemAt": [
"$prescription_drug_info._id",
"$$this._id"
] }
]
}
}
}
} }
])
This would add two more fields and the name of the desired field
will be merged_drug_info. We can then add $project stage to filter
out excessive fields and $set stage to rename the field:
...
{ "$set": { "prescription": "$merged_drug_info" } },
{ "$project": { "prescription_drug_info": 0, "merged_drug_info": 0 } }
...