Lookup for a referenced document in an array of embedded documents - mongodb

I got two collections.
One contains an array of objects. These objects own a field with an id to a document in another collection.
The goal is to "replace" the ref by the document. Sounds simple but I have no clue how to archive this.
E.G.
Collection "Product"
{ "_id": 1,
"alias": "ProductA"
},
{ "_id": 2,
"alias": "ProductC"
}
Collection "Order"
{ "_id": 5765,
"cart": [
{
"product": 1,
"qty": 7
}, {
"product": 2,
"qty": 6
}
]
}
What I need by a query is this:
{ "_id": 5765,
"cart": [
{
"product": {
"_id": 1,
"alias": "ProductA"
},
"qty": 7
}, {
"product": {
"_id": 2,
"alias": "ProductC"
},
"qty": 6
}
]
}
I tried a simple lookup, but the array will only contains the products. What do I need to change?
{
$lookup: {
from: "products",
let: {
tmp: "$cart.product"
},
pipeline: [
{
$match: {
$expr: {
$in: ["$_id", "$$tmp"]
}
}
}
],
as: "cart.product"
}
}
Thanks for your help.

I added a new $addFields stage to transform the output from the $lookup stage - it gets the desired output:
db.order.aggregate([
{
$lookup: {
from: "product",
let: {
tmp: "$cart.product"
},
pipeline: [
{
$match: {
$expr: {
$in: ["$_id", "$$tmp"]
}
}
}
],
as: "products"
}
},
{
$addFields: {
cart: {
$map: {
input: "$cart", as: "ct",
in: {
product: {
$arrayElemAt: [
{ $filter: {
input: "$products", as: "pr",
cond: {
$eq: [ "$$ct.product", "$$pr._id" ]
}
}
}, 0 ]
},
qty: "$$ct.qty"
}
}
}
}
}
] ).pretty()

Related

$lookup as into array does not insert into each object of the array

I have source collection like:
[
{
"_id": "0xeAAB59269bD1bA8522E8e5E0FE510F7aa4d47A09",
"id": "0xeAAB59269bD1bA8522E8e5E0FE510F7aa4d47A09",
"gameItemsWithQty": [
{
"gameItem": { "_id": 1 },
"qty": 1000
},
{
"gameItem": { "_id": 2 },
"qty": 1000
},
{
"gameItem": { "_id": 3 },
"qty": 1000
},
{
"gameItem": { "_id": 4 },
"qty": 1000
}
]
},
]
and game items collection like:
[
{
"_id": 1,
"id": 1,
"name": "Stamina Refill"
},
{
"_id": 2,
"id": 2,
"name": "Large Stamina Refill"
},
{
"_id": 3,
"id": 3,
"name": "XL Large Stamina Refill"
},
{
"_id": 4,
"id": 4,
"name": "Large Stamina Refill"
}
]
When performing lookup from first onto second collection:
{
from: 'game-items',
localField: 'gameItemsWithQty.gameItem._id',
foreignField: '_id',
as: 'gameItemsWithQty.gameItem'
}
It nests gameItem array into gameItemsWithQty field.
"_id": ...,
"id": ...,
"gameItemsWithQty": {
"gameItem": {
0: ...,
1: ...,
2: ...,
3: ...
}
}
I need it to nest results into each respective gameItem object inside of each object in gameItemsWithQty.
"_id": ...,
"id": ...,
"gameItemsWithQty": {
0: {
gameItem: ...
qty: ...
},
1: {
gameItem: ...
qty: ...
},
...
}
How do I correct this aggregation pipeline to achieve that?
$lookup - Lookup and return the result as gameItems array.
$set - Set gameItemsWithQty field.
2.1. $map - Iterate the element in gameItemsWithQty array and returns new array.
2.1.1. $mergeObjects - Merge current iterate document with the result 2.1.2.
2.1.2. Document with gameItem field. Merge the result of 2.1.2.1 with the gameItem field of current iterate document.
2.1.2.1. Get the first document (via $first) from the filtered array by matching the _ids.
db.source.aggregate([
{
$lookup: {
from: "game-items",
localField: "gameItemsWithQty.gameItem._id",
foreignField: "_id",
as: "gameItems"
}
},
{
$set: {
gameItemsWithQty: {
$map: {
input: "$gameItemsWithQty",
as: "gameItemWithQty",
in: {
$mergeObjects: [
"$$gameItemWithQty",
{
gameItem: {
$mergeObjects: [
{
$first: {
$filter: {
input: "$gameItems",
cond: {
$eq: [
"$$gameItemWithQty.gameItem._id",
"$$this._id"
]
}
}
}
},
"$$gameItemWithQty.gameItem"
]
}
}
]
}
}
}
}
},
{
$unset: "gameItems"
}
])
Demo # Mongo Playground

Mongo query for lookup array of keys which is itself an item in a nested array

My first collection is as below, I am searching the document with the email and match the particular jobid inside the jobs array. Then insert the document of second collection by matching _id with jobs.Process.profile_id.
{
"_id": {
"$oid": "6229d3cfdbfc81a8777e4821"
},
"jobs": [
{
"job_ID": {
"$oid": "62289ded8079821eb24760e0"
},
"Process": [
{
"profile_id": {
"$oid": "6285e571681188e83d434797"
}
},
{
"profile_id": {
"$oid": "6285e571681188e83d434799"
}
}
],
},
{
"job_ID": {
"$oid": "6228a252fb4554dd5c48202a"
},
"Process": [
{
"profile_id": {
"$oid": "62861067dc9771331e61df5b"
}
}
],
},
{
"job_ID": {
"$oid": "622af1c391b290d34701af9f"
},
"Process": [
""
],
}
],
"email": "********#gmail.com"
}
and my second collection is, I need to insert this document in my first collection by matching with jobs.Process.profile_id.
{
"_id": {
"$oid": "6285e571681188e83d434797"
},
"Name": "Lakshdwanan",
"Location":"California"
}
I have tried with query,
aggregate([
{ $match: { email: email } },
{
$lookup: {
from: 'user__profiles',
localField: 'jobs.Process.profile_id',
foreignField: '_id',
as: 'jobings',
},
},
{
$addFields: {
jobings: {
$map: {
input: {
$filter: {
input: '$jobs',
as: 'm',
cond: {
$eq: ['$$m.job_ID', objInstance],
},
},
},
as: 'm',
in: {
$mergeObjects: [
{
$arrayElemAt: [
{
$filter: {
input: '$jobings',
cond: {
$eq: ['$$this._id', '$$m.Process.profile_id'],
},
},
},
0,
],
},
'$$m',
],
},
},
},
},
},
{
$project: {
jobings: 1,
_id: 0,
},
},
]);
My output should only display second collection document based on the first collection document matching.
EDIT: If you want the data for a specific job only, it is better to $filter the jobs before the $lookup step. After the $lookup, just $unwind and format:
db.firstCol.aggregate([
{
$match: {email: email}
},
{
$project: {
jobs: {
$filter: {
input: "$jobs",
as: "item",
cond: {$eq: ["$$item.job_ID", objInstance]}
}
},
_id: 0
}
},
{
$lookup: {
from: "user__profiles",
localField: "jobs.Process.profile_id",
foreignField: "_id",
as: "jobings"
}
},
{
$project: {res: "$jobings", _id: 0}
},
{
$unwind: "$res"
},
{
$replaceRoot: {newRoot: "$res"}
}
])
Playground
The jobs.Process.profile_id is the user__profiles _id, so no need to merge anything...The results are documents from user__profiles collection "as is" but they can be formatted as wanted..._id key name can be renamed profile_id easily.

MongoDB $lookup on nested document, limit and count the retrieved data

I would like to get a count of all notifications that aren't read by an User ("A", "B", "C", etc) for each subRoom. Taking into account that it could be millions of notifications documents and hundreds of subrooms elements in Rooms Collections, i need to limit it. For that reason I've limited the $lookup for first 100 elements and then check if that notifications have been read or not by an User. I did it using documents (roomId) in $lookup but I cant do it using subdocuments (subRoom.id).
Notifications Collection is indexed using a Compound of (roomId: 1, timestamp: -1)
Notifications Collection: (id corresponds to notification id and roomId is the link to Rooms collection)
[{
"_id": "XXX",
"id": "1",
"read": ["A", "B", "C"],
"roomId": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"content": "XXX",
"timestamp": { "$date": "2021-12-31T22:50:53.000Z" }
},{
"_id": "XXX",
"id": "2",
"read": ["C"],
"roomId": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"content": "XXX",
"timestamp": { "$date": "2021-12-31T22:50:53.000Z" }
},
...
]
Rooms Collection:
[{
"_id": "XXX"
"subRoom": [{
"id": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"image": "XXX",
"name": "XXX"
}, {
"id": "c2d5081e-0cf1-4e69-937d-be357da1d104",
"image": "XXX",
"name": "XXX"
}, {
"id": "530c2c02-26e8-441c-af39-c5232dfe1f73",
"image": "XXX",
"name": "XXX"
}],
"id": "453a6458-6545-4842-8946-05f49efea216",
"name": "XXX",
},
...
]
Code working using roomId instead subRoom.id:
{ $lookup: {
from: "notifications",
let: { "id": "$id" },
pipeline: [
{ $match: {
$expr:
{ $eq: [ "$roomId", "$$id" ] }
}},
{ $limit: 100},
{ $project: {_id: 0, read: 1}}
],
as: "messages"
}},
{ $project: {_id: 0, id: 1, notRead: {
$size: {
$filter: {
input: "$notifications",
cond: {
$not: {
$in: [
"A",
"$$this.read"
]
}
}
}
}
},
}
Code NOT WORKING using subRoom.id:
{ $lookup: {
from: "notifications",
let: { "id": "$subRoom.id" },
pipeline: [
{ $match: {
$expr:
{ $eq: [ "$roomId", "$$id" ] }
}},
{ $limit: 100},
{ $project: {_id: 0, read: 1}}
],
as: "messages"
}},
{
$addFields: {
items: {
$map: {
input: { $zip: { inputs: ["$subRoom", "$messages"] } },
in: { $mergeObjects: "$$this" },
},
},
},
},
.
. projection
.
Expected Result:
[{
"_id": "XXX"
"subRoom": [{
"id": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"notRead": 50 //e.g
}, {
"id": "c2d5081e-0cf1-4e69-937d-be357da1d104",
"notRead": 35 //e.g
}, {
"id": "530c2c02-26e8-441c-af39-c5232dfe1f73",
"image": "XXX",
"notRead": 5 //e.g
}],
"id": "453a6458-6545-4842-8946-05f49efea216",
"name": "XXX",
},
...
]
Finally and very importantly, I want an scalable solution that can be done with big data.
Thank you very much in advance.
$unwind deconstruct subRoom array with preserve null and empty array property
$lookup with notification collection using pipeline, let to pass id to pipeline, check condition for roomId and user should not read notification
$group by null and count total unread notifications
$addFields to get count to notifications using $sum
$group by _id and reconstruct the subRoom array with required fields in result
db.rooms.aggregate([
{
$unwind: {
path: "$subRoom",
preserveNullAndEmptyArrays: true
}
},
{
$lookup: {
from: "nitifications",
let: { id: "$subRoom.id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $eq: ["$$id", "$roomId"] } },
{ read: { $ne: "A" } }
]
}
},
{
$group: {
_id: null,
count: { $sum: 1 }
}
}
],
as: "subRoom.notRead"
}
},
{
$addFields: {
"subRoom.notRead": { $sum: "$subRoom.notRead.count" }
}
},
{
$group: {
_id: "$_id",
name: { $first: "$name" },
id: { $first: "$id" },
subRoom: { $push: "$subRoom" }
}
}
])
Playground
Second option without using $unwind stage,
$lookup with notification collection using pipeline, let to pass id to pipeline, check condition for roomId and user should not read notification
$group by null and count total unread notifications
$map to iterate loop of subRoom array
$filter to iterate loop of return result from lookup notifications count and get current subRoom document
$let to declare a variable n and assign above filtered result to it and return $sum from count
$mergeObjects to merge current object of subRoom and new field notRead
db.rooms.aggregate([
{
$lookup: {
from: "nitifications",
let: { id: "$subRoom.id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $in: ["$roomId", "$$id"] } },
{ read: { $ne: "A" } }
]
}
},
{
$group: {
_id: "$roomId",
count: { $sum: 1 }
}
}
],
as: "notRead"
}
},
{
$project: {
id: 1,
name: 1,
subRoom: {
$map: {
input: "$subRoom",
as: "s",
in: {
$mergeObjects: [
"$$s",
{
notRead: {
$let: {
vars: {
n: {
$filter: {
input: "$notRead",
cond: { $eq: ["$$this._id", "$$s.id"] }
}
}
},
in: { $sum: "$$n.count" }
}
}
}
]
}
}
}
}
}
])
Playground

mongodb `$lookup` or `join` with attributes inside array of objects

I have this object that coming from mongodb
[
{
"_id": "5eaf2fc88fcee1a21ea0d94d",
"migration_customer_union_id": 517,
"__v": 0,
"account": 1,
"createdAt": "2020-05-03T20:55:36.335Z",
"customerUnion": "5eaf2fc7698de8321ccd841d",
"shaufel_customers": [
{
"percent": 50,
"_id": "5eaf2fc8698de8321ccd881f",
"customer": "5eaf2fb9698de8321ccd68c0"
},
{
"percent": 50,
"_id": "5eaf2fc9698de8321ccd8a9d",
"customer": "5eaf2fb9698de8321ccd68c0"
}
],
}
]
you can notice inside shaufel_customers array there is an attribute named customer which I want to use it to join with customers document, so that's what I am doing (wrote this code with help of stackoverflow :) )
const aggregate = await CustomerUnionCustomer.aggregate(
[
{
$match: {migration_customer_union_id: 517}
},
{
$lookup: {
from: 'customers',
localField: 'shaufel_customers.customer',
foreignField: '_id',
as: 'customers',
}
},
{
$project: {
shaufel_customer_union_id: 1,
customerUnion: '$customerUnions',
shaufel_customers: {
$map: {
input: "$customers",
as: "c",
in: {
$mergeObjects: [
"$$c",
{
$arrayElemAt: [{
$filter: {
input: "$shaufel_customers",
cond: {$eq: ["$$this.customer", "$$c._id"]}
}
}, 0]
},
]
}
},
}
}
},
{
"$project": { // this project just to get some specific values inside shaufel_customers
'_id': 0,
"shaufel_customers": {
"$map": {
"input": "$shaufel_customers",
"as": "customer",
"in": {
"customer_id": "$$customer.shaufel_customer_id",
"percent": "$$customer.percent"
}
}
}
}
}
]
)
when executing this code I am getting the following response
[
{
"shaufel_customers": [
{
"customer_id": "869",
"percent": 50
}
]
}
]
you can notice I got one object, although there was two objects inside the original array above, and that's because the customer attribute above has the same ObjectId value 5eaf2fb9698de8321ccd68c0 and that's what I want to ask. I want to get the same two objects even if the ids are the same, so the result I am expecting here is
[
{
"shaufel_customers": [
{
"customer_id": "869",
"percent": 50
},
{
"customer_id": "869",
"percent": 50
},
]
}
]
How should I do that :(
You need to revert your $map and iterate shaufel_customers instead of customer - this will return two results:
{
$project: {
shaufel_customer_union_id: 1,
customerUnion: '$customerUnions',
shaufel_customers: {
$map: {
input: "$shaufel_customers",
as: "sc",
in: {
$mergeObjects: [
"$$c",
{
$arrayElemAt: [{
$filter: {
input: "$customers",
cond: {$eq: ["$$this._id", "$$sc.customer"]}
}
}, 0]
},
]
}
},
}
}
},

MongoDB Aggregation - Lookup pipeline not returning any documents

I'm having hard time getting $lookup with a pipeline to work in MongoDB Compass.
I have the following collections:
Toys
Data
[
{
"_id": {
"$oid": "5d233c3bb173a546386c59bb"
},
"type": "multiple",
"tags": [
""
],
"searchFields": [
"Jungle Stampers - Two",
""
],
"items": [
{
"$oid": "5d233c3cb173a546386c59bd"
},
{
"$oid": "5d233c3cb173a546386c59be"
},
{
"$oid": "5d233c3cb173a546386c59bf"
},
{
"$oid": "5d233c3cb173a546386c59c0"
},
{
"$oid": "5d233c3cb173a546386c59c1"
},
{
"$oid": "5d233c3cb173a546386c59c2"
},
{
"$oid": "5d233c3cb173a546386c59c3"
},
{
"$oid": "5d233c3cb173a546386c59c4"
}
],
"name": "Jungle Stampers - Two",
"description": "",
"status": "active",
"category": {
"$oid": "5cfe727cac920000086b880e"
},
"subCategory": "Stamp Sets",
"make": "",
"defaultCharge": null,
"defaultOverdue": null,
"sizeCategory": {
"$oid": "5d0cfde57561e107c88fbde3"
},
"ageFrom": {
"$numberInt": "24"
},
"ageTo": {
"$numberInt": "120"
},
"images": [
{
"_id": {
"$oid": "5d233c3bb173a546386c59bc"
},
"id": {
"$oid": "5d233c39b173a546386c59ba"
},
"url": "/toyimages/5d233c39b173a546386c59ba.jpg",
"thumbUrl": "/toyimages/thumbs/tn_5d233c39b173a546386c59ba.jpg"
}
],
"__v": {
"$numberInt": "2"
}
}
]
Loans
Data
[
{
"_id": {
"$oid": "5e1f1661b712215978c746d9"
},
"tags": [],
"member": {
"$oid": "5e17495e4f81ab3f900dbb63"
},
"source": "admin portal - potter1#gmail.com",
"items": [
{
"id": {
"$oid": "5e1f160eb712215978c746d5"
},
"status": "new",
"_id": {
"$oid": "5e1f1661b712215978c746db"
},
"toy": {
"$oid": "5d233c3bb173a546386c59bb"
},
"cost": {
"$numberInt": "0"
}
},
{
"id": {
"$oid": "5e1f160eb712215978c746d5"
},
"status": "new",
"_id": {
"$oid": "5e1f1661b712215978c746da"
},
"toy": {
"$oid": "5d233b1ab173a546386c59b5"
},
"cost": {
"$numberInt": "0"
}
}
],
"dateEntered": {
"$date": {
"$numberLong": "1579095632870"
}
},
"dateDue": {
"$date": {
"$numberLong": "1579651200000"
}
},
"__v": {
"$numberInt": "0"
}
}
]
I am trying to return a list of toys and their associated loans that have a status of 'new' or 'out'.
I can use the following $lookup aggregate to fetch all loans:
{
from: 'loans',
localField: '_id',
foreignField: 'items.toy',
as: 'loansSimple'
}
However I am trying to use a pipeline to load loans that have the two statuses I am interested in, but it always only returns zero documents:
{
from: 'loans',
let: {
'toyid': '$_id'
},
pipeline: [
{
$match: {
$expr: {
$and: [
{$eq: ['$items.toy', '$$toyid']},
{$eq: ['$items.status', 'new']} // changed from $in to $eq for simplicity
]
}
}
}
],
as: 'loans'
}
This always seems to return 0 documents, however I arrange it:
Have I made a mistake somewhere?
I'm using MongoDB Atlas, v4.2.2, MongoDB Compass v 1.20.4
You are trying to search $$toyid inside inner array, but Operator Expression $eq cannot resolve it.
Best solution: $let (returns filtered loans by criteria) + $filter (applies filter for inner array) operator helps us to get desired result.
db.toys.aggregate([
{
$lookup: {
from: "loans",
let: {
"toyid": "$_id",
"toystatus": "new"
},
pipeline: [
{
$match: {
$expr: {
$gt: [
{
$size: {
$let: {
vars: {
item: {
$filter: {
input: "$items",
as: "tmp",
cond: {
$and: [
{
$eq: [
"$$tmp.toy",
"$$toyid"
]
},
{
$eq: [
"$$tmp.status",
"$$toystatus"
]
}
]
}
}
}
},
in: "$$item"
}
}
},
0
]
}
}
}
],
as: "loans"
}
}
])
MongoPlayground
Alternative solution 1. Use $unwind to flatten items attribute. (We create extra field named tmp which stores items value, flatten it with $unwind operator, match as you were doing and then exclude from result)
db.toys.aggregate([
{
$lookup: {
from: "loans",
let: {
"toyid": "$_id"
},
pipeline: [
{
$addFields: {
tmp: "$items"
}
},
{
$unwind: "$tmp"
},
{
$match: {
$expr: {
$and: [
{
$eq: [
"$tmp.toy",
"$$toyid"
]
},
{
$eq: [
"$tmp.status",
"new"
]
}
]
}
}
},
{
$project: {
tmp: 0
}
}
],
as: "loans"
}
}
])
MongoPlayground
Alternative solution 2. We use $reduce to create toy's array and with $in operator we check if toyid exists inside this array.
db.toys.aggregate([
{
$lookup: {
from: "loans",
let: {
"toyid": "$_id"
},
pipeline: [
{
$addFields: {
toys: {
$reduce: {
input: "$items",
initialValue: [],
in: {
$concatArrays: [
"$$value",
[
"$$this.toy"
]
]
}
}
}
}
},
{
$match: {
$expr: {
$in: [
"$$toyid",
"$toys"
]
}
}
},
{
$project: {
toys: 0
}
}
],
as: "loans"
}
}
])
$expr receives aggregation expressions, At that point $$items.toy is parsed for each element in an array as you would expect (however if it would it will still give you "bad" results as you'll get loans that have the required toy id and any other item with status new in their items array).
So you have two options to work around this:
If you don't care about the other items in the lookup'd document you can add an $unwind stage at the start of the lookup pipeline like so:
{
from: 'loans',
let: {
'toyid': '$_id'
},
pipeline: [
{
$unwind: "$items"
},
{
$match: {
$expr: {
$and: [
{$eq: ['$items.toy', '$$toyid']},
{$eq: ['$items.status', 'new']} // changed from $in to $eq for simplicity
]
}
}
}
],
as: 'loans'
}
If you do care about them just iterate the array in one of the possible ways to get a 'correct' match, here is an example using $filter
{
from: 'loads',
let: {
'toyid': '$_id'
},
pipeline: [
{
$addFields: {
temp: {
$filter: {
input: "$items",
as: "item",
cond: {
$and: [
{$eq: ["$$item.toy", "$$toyid"]},
{$eq: ["$$item.status", "new"]}
]
}
}
}
}
}, {$match: {"temp.0": {exists: true}}}
],
as: 'loans'
}