How do I use mongo aggregation to lookup data in a subdocument? - mongodb

I want to $lookup data from a subdocument in another collection. I have survey answers, and I want to group them by the question category's name.
The survey documents looks like this:
{
_id: new ObjectId("62555be60401f0a21553da9a"),
name: 'new survey',
questions: [
{
text: 'question 1',
category_id: new ObjectId("62555be60401f0a21553da99"),
options: [Array],
_id: new ObjectId("62555be60401f0a21553da9c"),
},
...
}
Category collection is just name and _id:
{
_id: new ObjectId("62555be60401r0a27553da99"),
name: "category name"
}
I have answer data like this:
[
{
answers: {
k: '62555be60401f0a21553da9c',
v: new ObjectId("62555880da8fb89651f6a292")
},
},
{
answers: {
k: '62555880da8fb89651f6a29b',
v: new ObjectId("62555880da8fb89651f6a29e")
},
}
...
]
k is a string that matches to the _id in the survey.questions array.
I'd like to get the resulting data like this:
[
{
answers: {
k: 'question 1',
v: new ObjectId("62555880da8fb89651f6a292")
},
category: 'category name'
},
{
answers: {
k: 'question 2',
v: new ObjectId("62555880da8fb89651f6a29e")
},
category: 'other category name'
}
...
]
any help would be greatly appreciated!
I think I could probably figure out the category part, but I cannot figure out how to use $lookup to get info from a subdocument. From the docs I'm guessing its maybe some pipeline within a lookup. Pretty stumped though.

You can do something like this, using a pipeline to match only surveys that have questions._id that matches the answer.k value
db.answer.aggregate([
{
$lookup: {
"from": "survey",
"let": {
"k": {
"$toObjectId": "$answers.k"
}
},
pipeline: [
{
"$match": {
"$expr": {"$in": ["$$k", "$questions._id"]}
}
}
],
as: "details"
}
},
{
$project: {
answers: 1,
details: {"$arrayElemAt": ["$details", 0]}
}
},
{
$project: {
answers: 1,
categoryData: {
$filter: {
input: "$details.questions",
as: "item",
cond: {$eq: ["$$item._id", {"$toObjectId": "$answers.k"}]}
}
}
}
},
{
$project: {
answers: 1,
catData: {"$arrayElemAt": ["$categoryData", 0]}
}
},
{
$lookup: {
from: "Category",
localField: "catData.category_id",
foreignField: "_id",
as: "cat"
}
},
{
$project: {
answers: 1,
_id: 0,
category: {"$arrayElemAt": ["$cat", 0]}
}
},
{
$project: {answers: 1, name: "$category.name"}
}
])
As you can see on the playground
Maybe it is possible to filter the results during the $lookup in order to simplify the rest of the query

Related

MongoDB: Optimal joining of one to many relationship

Here is a hypothetical case of orders and products.
'products' collection
[
{
"_id": "61c53eb76eb2dc65de621bd0",
"name": "Product 1",
"price": 80
},
{
"_id": "61c53efca0a306c3f1160754",
"name": "Product 2",
"price": 10
},
... // truncated
]
'orders' collection:
[
{
"_id": "61c53fb7dca0579de038cea8", // order id
"products": [
{
"_id": "61c53eb76eb2dc65de621bd0", // references products._id
"quantity": 1
},
{
"_id": "61c53efca0a306c3f1160754",
"quantity": 2
},
]
}
]
As you can see, an order owns a list of product ids. When I pull an order's details I also need the product details combined like so:
{
_id: ObjectId("61c53fb7dca0579de038cea8"),
products: [
{
_id: ObjectId("61c53eb76eb2dc65de621bd0"),
quantity: 1,
name: 'Product 1',
price: 80
},
{
_id: ObjectId("61c53efca0a306c3f1160754"),
quantity: 2,
name: 'Product 2',
price: 10
},
... // truncated
]
}
Here is the aggregation pipleline I came up with:
db.orders.aggregate([
{
$match: {_id: ObjectId('61c53fb7dca0579de038cea8')}
},
{
$unwind: {
path: "$products"
}
},
{
$lookup: {
from: 'products',
localField: 'products._id',
foreignField: '_id',
as: 'productDetail'
}
},
{
$unwind: {
path: "$productDetail"
}
},
{
$group: {
_id: "$_id",
products: {
$push: {$mergeObjects: ["$products", "$productDetail"]}
}
}
}
])
Given how the data is organized I'm doubting if the pipeline stages are optimal and could do better (possibility of reducing the number of stages, etc.). Any suggestions?
As already mentioned in comments the design is poor. You can avoid multiple $unwind and $group, usually the performance should be better with this:
db.orders.aggregate([
{ $match: { _id: "61c53fb7dca0579de038cea8" } },
{
$lookup: {
from: "products",
localField: "products._id",
foreignField: "_id",
as: "productDetail"
}
},
{
$project: {
products: {
$map: {
input: "$products",
as: "product",
in: {
$mergeObjects: [
"$$product",
{
$first: {
$filter: {
input: "$productDetail",
cond: { $eq: [ "$$this._id", "$$product._id" ] }
}
}
}
]
}
}
}
}
}
])
Mongo Playground

aggregation lookup and match a nested array

Hello i am trying to join two collections...
#COLLECTION 1
const valuesSchema= new Schema({
value: { type: String },
})
const categoriesSchema = new Schema({
name: { type: String },
values: [valuesSchema]
})
mongoose.model('categories', categoriesSchema )
#COLLECTION 2
const productsSchema = new Schema({
name: { type: String },
description: { type: String },
categories: [{
type: mongoose.Schema.Types.ObjectId,
ref: 'categories',
}]
})
mongoose.model('productos', productsSchema )
Now, what i pretend to do is join these collections and have an output like this.
#Example Product Document
{
name: 'My laptop',
description: 'Very ugly laptop',
categories: ['5f55949054f3f31db0491b5c','5f55949054f3f31db0491b5b'] // these are _id of valuesSchema
}
#Expected Output
{
name: 'My laptop',
description: 'Very ugly laptop',
categories: [{value: 'Laptop'}, {value: 'PC'}]
}
This is what i tried.
{
$lookup: {
from: "categories",
let: { "categories": "$categories" },
as: "categories",
pipeline: [
{
$match: {
$expr: {
$in: [ '$values._id','$$categories']
},
}
},
]
}
}
but this query is not matching... Any help please?
You can try,
$lookup with categories
$unwind deconstruct values array
$match categories id with value id
$project to show required field
db.products.aggregate([
{
$lookup: {
from: "categories",
let: { cat: "$categories" },
as: "categories",
pipeline: [
{ $unwind: "$values" },
{ $match: { $expr: { $in: ["$values._id", "$$cat"] } } },
{
$project: {
_id: 0,
value: "$values.value"
}
}
]
}
}
])
Playground
Since you try to use the non-co-related queries, I appreciate it, you can easily achieve with $unwind to flat the array and then $match. To regroup the array we use $group. The $reduce helps to move on each arrays and store some particular values.
[
{
$lookup: {
from: "categories",
let: {
"categories": "$categories"
},
as: "categories",
pipeline: [
{
$unwind: "$values"
},
{
$match: {
$expr: {
$in: [
"$values._id",
"$$categories"
]
},
}
},
{
$group: {
_id: "$_id",
values: {
$addToSet: "$values"
}
}
}
]
}
},
{
$project: {
categories: {
$reduce: {
input: "$categories",
initialValue: [],
in: {
$concatArrays: [
"$$this.values",
"$$value"
]
}
}
}
}
}
]
Working Mongo template

MongoDB aggregate lookup with nested array

I have a complicated structure I am trying to "join".
The best way to describe it is that I have "Favorite Teams" stored with a user, as an array of name/IDs - however they are stored in a nested object. I want to return the users Favorite Teams Players WITH the team.
Here are the data models
PLAYERS
{
_id:
team_id:
name:
position:
}
TEAMS
{
_id:
name:
}
USER
{
_id:
name:
favs: {
mascots: [{
_id:
name:
}],
teams: [{
_id:
name:
}],
}
}
I have an array of Team IDs from the user.favs.teams - and what I want back is the players with their team name.
This is the current aggregation I am using - it is returning the players but not the teams...I am pretty sure I need to unwind, or similar.
players.aggregate([
{
$match: {
team_id: {
$in: [--array of team ID's--]
}
}
},
{
$lookup: {
from: 'teams',
localField: 'team_id',
foreignField: '_id',
as: 'players_team'
}
},
{
$project: {
_id: 1,
name: 1,
position: 1,
'players_team[0].name': 1
}
}
])
What I am getting back...
_id: 5c1b37b6fd15241940b11111
name:"Bob"
position:"Test"
team_id:5c1b37b6fd15241940b441dd
player_team:[
_id:5c1b37b6fd15241940b441dd
name:"Team A"
...other fields...
]
What I WANT to get back...
_id: 5c1b37b6fd15241940b11111
name:"Bob"
position:"Test"
team_id:5c1b37b6fd15241940b441dd
player_team: "Team A"
Use Below $lookup (Aggregation)
db.players.aggregate([
{
$lookup: {
from: "teams",
let: { teamId: "$team_id" },
pipeline: [
{
$match: { $expr: { $eq: [ "$_id", "$$teamId" ] } }
},
{
$project: { _id: 0 }
}
],
as: "players_team"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
"_id": "$_id",
"name": "$name",
"position": "$position",
"team_id": "$team_id"
},
{
player_team: { $arrayElemAt: [ "$players_team.name", 0 ] }
}
]
}
}
}
])
Sorry If your MongoDB version is less then 3.6. Because of new changes in MongoDB 3.6.

$lookup when foreignField is in nested array

I have two collections :
Student
{
_id: ObjectId("657..."),
name:'abc'
},
{
_id: ObjectId("593..."),
name:'xyz'
}
Library
{
_id: ObjectId("987..."),
book_name:'book1',
issued_to: [
{
student: ObjectId("657...")
},
{
student: ObjectId("658...")
}
]
},
{
_id: ObjectId("898..."),
book_name:'book2',
issued_to: [
{
student: ObjectId("593...")
},
{
student: ObjectId("594...")
}
]
}
I want to make a Join to Student collection that exists in issued_to array of object field in Library collection.
I would like to make a query to student collection to get the student data as well as in library collection, that will check in issued_to array if the student exists or not if exists then get the library document otherwise not.
I have tried $lookup of mongo 3.6 but I didn`t succeed.
db.student.aggregate([{$match:{_id: ObjectId("593...")}}, $lookup: {from: 'library', let: {stu_id:'$_id'}, pipeline:[$match:{$expr: {$and:[{"$hotlist.clientEngagement": "$$stu_id"]}}]}])
But it thorws error please help me in regard of this. I also looked at other questions asked at stackoverflow like. question on stackoverflow,
question2 on stackoverflow but these are comapring simple fields not array of objects. please help me
I am not sure I understand your question entirely but this should help you:
db.student.aggregate([{
$match: { _id: ObjectId("657...") }
}, {
$lookup: {
from: 'library',
localField: '_id' ,
foreignField: 'issued_to.student',
as: 'result'
}
}])
If you want to only get the all book_names for each student you can do this:
db.student.aggregate([{
$match: { _id: ObjectId("657657657657657657657657") }
}, {
$lookup: {
from: 'library',
let: { 'stu_id': '$_id' },
pipeline: [{
$unwind: '$issued_to' // $expr cannot digest arrays so we need to unwind which hurts performance...
}, {
$match: { $expr: { $eq: [ '$issued_to.student', '$$stu_id' ] } }
}, {
$project: { _id: 0, "book_name": 1 } // only include the book_name field
}],
as: 'result'
}
}])
This might not be a very good answer, but if you can change your schema of Library to:
{
_id: ObjectId("987..."),
book_name:'book1'
issued_to: [
ObjectId("657..."),
ObjectId("658...")
]
},
{
_id: "ObjectId("898...")",
book_name:'book2'
issued_to: [
ObjectId("593...")
ObjectId("594...")
]
}
Then when you do:
{
$lookup: {
from: 'student',
localField: 'issued_to',
foreignField: '_id',
as: 'issued_to_students', // this creates a new field without overwriting your original 'issued_to'
}
},
You should get, based on your example above:
{
_id: ObjectId("987..."),
book_name:'book1'
issued_to_students: [
{ _id: ObjectId("657..."), name: 'abc', ... },
{ _id: ObjectId("658..."), name: <name of this _id>, ... }
]
},
{
_id: "ObjectId("898...")",
book_name:'book2'
issued_to: [
{ _id: ObjectId("593..."), name: 'xyz', ... },
{ _id: ObjectId("594..."), name: <name of this _id>, ... }
]
}
You need to $unwind the issued_to from library collection to match the issued_to.student with _id
db.student.aggregate([
{ "$match": { "_id": mongoose.Types.ObjectId(id) } },
{ "$lookup": {
"from": Library.collection.name,
"let": { "studentId": "$_id" },
"pipeline": [
{ "$unwind": "$issued_to" },
{ "$match": { "$expr": { "$eq": [ "$issued_to.student", "$$studentId" ] } } }
],
"as": "issued_to"
}}
])

How to resolve the many-to-many relation keeping the order of ID array in mongoDB

I have two collections posts and tags on mongoDB.
There is a many-to-many relationship between these collections.
A post can belong to some tags, and a tag can contain some posts.
I am looking for an efficient query method to join posts to tags keeping the order of postIds.
If the data schema is inappropriate, I can change it.
The mongoDB version is 3.6.5
Sample data
db.posts.insertMany([
{ _id: 'post001', title: 'this is post001' },
{ _id: 'post002', title: 'this is post002' },
{ _id: 'post003', title: 'this is post003' }
])
db.tags.insertMany([
{ _id: 'tag001', postIds: ['post003', 'post001', 'post002'] }
])
Desired result
{
"_id": "tag001",
"postIds": [ "post003", "post001", "post002" ],
"posts": [
{ "_id": "post003", "title": "this is post003" },
{ "_id": "post001", "title": "this is post001" },
{ "_id": "post002", "title": "this is post002" }
]
}
What I tried
I tried a query which use $lookup.
db.tags.aggregate([
{ $lookup: {
from: 'posts',
localField: 'postIds',
foreignField: '_id',
as: 'posts'
}}
])
However I got a result which is different from I want.
{
"_id": "tag001",
"postIds": [ "post003", "post001", "post002" ],
"posts": [
{ "_id": "post001", "title": "this is post001" },
{ "_id": "post002", "title": "this is post002" },
{ "_id": "post003", "title": "this is post003" }
]
}
In MongoDB you would attempt to model your data such that you avoid joins (as in $lookups) alltogether, e.g. by storing the tags alongside the posts.
db.posts.insertMany([
{ _id: 'post001', title: 'this is post001', tags: [ "tag001", "tag002" ] },
{ _id: 'post002', title: 'this is post002', tags: [ "tag001" ] },
{ _id: 'post003', title: 'this is post003', tags: [ "tag002" ] }
])
With this structure in place you could get the desired result like this:
db.posts.aggregate([{
$unwind: "$tags"
}, {
$group: {
_id: "$tags",
postsIds: {
$push: "$_id"
},
posts: {
$push: "$$ROOT"
}
}
}])
In this case, I would doubt that you even need the postIds field in the result as it would be contained in the posts array anyway.
You can use a combination of $map and $filter to re-order elements in the posts array in a projection stage:
db.tags.aggregate([
{ $lookup: {
from: 'posts',
localField: 'postIds',
foreignField: '_id',
as: 'posts'
} },
{ $project: {
_id: 1,
postIds: 1,
posts: { $map: {
input: "$postIds",
as: "postId",
in: {
$arrayElemAt: [ { $filter: {
input: "$posts",
as: "post",
cond: { $eq: ["$$post._id", "$$postId"] }
} }, 0 ]
}
} }
} }
])
The missing posts will be filled with null to keep index consistent with postIds.