I'm using Python with pymongo to query from the database.
I have 3 different collections:
1st:
# Projects collection
{
"_id": "A",
},
{
"_id": "B",
},
{
"_id": "C"
},
..
2nd:
# Episodes collection
{
"_id": "A/Episode01",
"project": "A",
"name": "Episode01"
},
{
"_id": "A/Episode02",
"project": "A",
"name": "Episode02"
},
{
"_id": "B/Episode01",
"project": "B",
"name": "Episode01"
},
..
3rd:
# Sequences collection
{
"_id": "A/Episode01/Sequence01",
"project": "A",
"episode": "Episode01",
"name": "Sequence01"
},
{
"_id": "A/Episode02/Sequence02",
"project": "A",
"episode": "Episode02",
"name": "Sequence02"
},
{
"_id": "B/Episode01/Sequence01",
"project": "B",
"episode": "Episode01",
"name": "Sequence01"
},
..
I want to use aggregate to query project A and get all of its corresponding episodes and sequences like this:
{
"_id": "A",
"episodes":
[
{
"_id": "A/Episode01",
"project": "A",
"name": "Episode01",
"sequences":
[
{
"_id": "A/Episode01/Sequence01",
"project": "A",
"episode": "Episode01",
"name": "Sequence01"
},
]
},
{
"_id": "A/Episode02",
"project": "A",
"name": "Episode02",
"sequences":
[
{
"_id": "A/Episode02/Sequence02",
"project": "A",
"episode": "Episode02",
"name": "Sequence02"
},
]
},
]
}
I can get as far as getting the proper episodes, but I'm not sure how to add an embed field for any matching sequences. Is it possible to do this all in a single pipeline query?
Right now my query is looking like this:
[
{"$match": {
"_id": "A"}
},
{"$lookup": {
"from": "episodes",
"localField": "_id",
"foreignField": "project",
"as": "episodes"}
},
{"$group": {
"_id": {
"_id": "$_id",
"episodes": "$episodes"}
}}
]
You can do like following
use $match to match the document
use uncorrelated queries to join two collection. But normal joining also possible as you have written. This is easier when we get some complex situations.
Mongo script is given below
[
{
"$match": {
"_id": "A"
}
},
{
$lookup: {
from: "Episodes",
let: {
id: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$project",
"$$id"
]
}
}
},
{
$lookup: {
from: "Sequences",
let: {
epi: "$name"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$episode",
"$$epi"
]
}
}
}
],
as: "sequences"
}
}
],
as: "episodes"
}
}
]
Working Mongo playground
Update 01
Using standard lookup
[
{
"$match": {
"_id": "A"
}
},
{
"$lookup": {
"from": "Episodes",
"localField": "_id",
"foreignField": "project",
"as": "episodes"
}
},
{
$unwind: "$episodes"
},
{
"$lookup": {
"from": "Sequences",
"localField": "episodes.name",
"foreignField": "episode",
"as": "episodes.sequences"
}
},
{
$group: {
_id: "$episodes._id",
episodes: {
$addToSet: "$episodes"
}
}
}
]
Working Mongo playground
Related
I am having a collection which contains the data like the following and want to have the desirable output which I have mentioned below.
db={
collectionA: [
{
"id": ObjectId("63b7c24c06ebe7a8fd11777b"),
"uniqueRefId": "UUID-2023-0001",
"products": [
{
"productIndex": 1,
"productCategory": ObjectId("63b7c24c06ebe7a8fd11777b"),
"productOwners": [
ObjectId("63b7c2fd06ebe7a8fd117781")
]
},
{
"productIndex": 2,
"productCategory": ObjectId("63b7c24c06ebe7a8fd11777b"),
"productOwners": [
ObjectId("63b7c2fd06ebe7a8fd117781"),
ObjectId("63b7c12706ebe7a8fd117778")
]
},
{
"productIndex": 3,
"productCategory": "",
"productOwners": ""
}
]
}
],
collectionB: [
{
"_id": ObjectId("63b7c2fd06ebe7a8fd117781"),
"fullname": "Jim Corbett",
"email": "jim.corbett#pp.com"
},
{
"_id": ObjectId("63b7c12706ebe7a8fd117778"),
"fullname": "Carry Minatti",
"email": "carry.minatty#pp.com"
},
]
}
Desirable Output = [
{
"id": ObjectId("507f1f77bcf86cd799439011"),
"uniqueRefId": "UUID-2023-0001",
"products": [
{
"productIndex": 1,
"productCategory": ObjectId('614g2f77bff86cd755439021'),
"productOwners": [
{
"_id": ObjectId("63ac1e59c0afb8b6f2d41acd"),
"fullname": "Jim Corbett",
"email": "jim.corbett#pp.com"
}
]
},
{
"productIndex": 2,
"productCategory": ObjectId('614g2f77bff86cd755439021'),
"productOwners": [
{
"_id": ObjectId("63ac1e59c0afb8b6f2d41acd"),
"fullname": "Jim Corbett",
"email": "jim.corbett#pp.com"
},
{
"_id": ObjectId("63ac1e59c0afb8b6f2d41ace"),
"fullname": "Carry Minatti",
"email": "carry.minatty#pp.com"
}
]
},
{
"productIndex": 3,
"productCategory": "",
"productOwners": ""
}
]
}
]
In the collectionA we are having other documents as well, its not just one document.
Similarly for collectionB we are having other documents too.
How we can get this desirable output?
I am expecting the mongodb query for getting this solution.
I have implemented the lookup like the following
db.collectionA.aggregate([
{
"$lookup": {
"from": "collectionB",
"localField": "products.productOwners",
"foreignField": "_id",
"as": "inventory_docs"
}
}
])
You can try this:
db.collectionA.aggregate([
{
"$unwind": "$products"
},
{
"$lookup": {
"from": "collectionB",
"localField": "products.productOwners",
"foreignField": "_id",
"as": "products.productOwners"
}
},
{
"$group": {
"_id": {
id: "$id",
uniqueRefId: "$uniqueRefId"
},
"products": {
"$push": "$products"
}
}
},
{
"$project": {
id: "$_id.id",
uniqueRefId: "$_id.uniqueRefId",
products: 1,
_id: 0
}
}
])
Playground link.
In this query, we do the following:
First we unwind the products array, using $unwind.
Then we calculate productOwners, using $lookup.
Then we group the unwinded elements, using $group.
Finally we, project the desired output using $project.
I need to join the faults array
{
"_id": "99812930-37CE-456F-A9D9-837E9E3F712A",
"faultsChanged": [
{
"_id": "7C628A46-7E80-4615-8B08-10C5E9A6B1D7",
"faults": [
"BF221A71-0217-42E7-B853-53112EDA9694",
"E4A54172-7E93-49C4-840B-8E6116116979"
],
"isDeleted": false,
"partition": "indego",
"sessionUuid": "A83CE9A1-7539-493F-8BA4-6FBE25B18B57",
"source": "1",
"timestamp": {
"$date": {
"$numberLong": "1630603342700"
}
},
"unmigratedNote": null,
"uuid": "7C628A46-7E80-4615-8B08-10C5E9A6B1D7"
}
]
}
So that it gets replaced by a document from another collection OOFaultEntry.
New to the aggregation pipeline, I have tried
{$lookup:{
from: "OOFaultEntry",
localField: 'faultsChanged.faults',
foreignField: "_id",
let:{faults:"$faultsChanged.faults"},
pipeline:[],
as:"faults"
}}
but this just created a key on the result and was not embedded inside of each respective faultChanged object
expected result
OOFaultEntry would be JOINED on _id . So ,it should look something like
{
"_id": "99812930-37CE-456F-A9D9-837E9E3F712A",
"faultsChanged": [
{
"_id": "7C628A46-7E80-4615-8B08-10C5E9A6B1D7",
"faults": [
{
"_id": "BF221A71-0217-42E7-B853-53112EDA9694",
"some_key": "value"
},
{
"_id": "E4A54172-7E93-49C4-840B-8E6116116979",
"some_key": "value"
}
],
"isDeleted": false,
"partition": "indego",
"sessionUuid": "A83CE9A1-7539-493F-8BA4-6FBE25B18B57",
"source": "1",
"timestamp": {
"$date": {
"$numberLong": "1630603342700"
}
},
"unmigratedNote": null,
"uuid": "7C628A46-7E80-4615-8B08-10C5E9A6B1D7"
}
]
}
One way would be $unwind the faultsChanged array first. Perform the $lookup and regroup the results.
db.Faults.aggregate([
{
"$unwind": "$faultsChanged"
},
{
"$lookup": {
"from": "00FaultEntry",
"localField": "faultsChanged.faults",
"foreignField": "_id",
"as": "faultsChanged.faults"
}
},
{
"$group": {
"_id": "$_id",
"faultsChanged": {
$push: "$faultsChanged"
}
}
}
])
Mongo Playground
I need a MongoDB query to return the aggregation result from a collection of events, users and confirmations.
db.events.aggregate([
{
"$match": {
"_id": "1"
}
},
{
"$lookup": {
"from": "confirmations",
"as": "confirmations",
"let": {
"eventId": "$_id"
},
"pipeline": [
{
"$match": {
"$expr": {
"$eq": [
"$eventId",
"$$eventId"
]
}
}
},
{
"$lookup": {
"from": "users",
"as": "user",
"let": {
"userId": "$confirmations.userId"
},
"pipeline": [
{
"$match": {
"$expr": {
"$eq": [
"$_id",
"$$userId"
]
}
}
},
]
},
},
]
}
}
])
Desired
[
{
"_id": "1",
"confirmations": [
{
"_id": "1",
"eventId": "1",
"user": {
"_id": "1",
"name": "X"
},
"userId": "1"
},
{
"_id": "2",
"eventId": "1",
"user": {
"_id": "2",
"name": "Y"
},
"userId": "2"
}
],
"title": "foo"
}
]
Everything works except the embedded user in confirmations array. I need the output to show the confirmations.user, not an empty array.
Playgound: https://mongoplayground.net/p/jp49FW59WCv
You made mistake in variable declaration of inner $lookup. Try this Solution:
db.events.aggregate([
{
"$match": {
"_id": "1"
}
},
{
$lookup: {
from: "confirmations",
let: { "eventId": "$_id" },
pipeline: [
{
$match: {
"$expr": {
$eq: ["$eventId", "$$eventId"]
}
}
},
{
$lookup: {
from: "users",
let: { "userId": "$userId" },
pipeline: [
{
$match: {
$expr: {
$eq: ["$_id", "$$userId"]
}
}
}
],
as: "user"
}
},
{ $unwind: "$user" }
],
as: "confirmations"
}
}
])
Also instead of $unwind of user inside inner $lookup you can use:
{
$addFields: {
user: { $arrayElemAt: ["$user", 0] }
}
}
since $unwind will not preserve empty results from previous stage by default.
I have a problem with how to lookup nested array, for example i have 4 collections.
User Collection
"user": [
{
"_id": "1234",
"name": "Tony",
"language": [
{
"_id": "111",
"language_id": "919",
"level": "Expert"
},
{
"_id": "111",
"language_id": "920",
"level": "Basic"
}
]
}
]
Language Collection
"language": [
{
"_id": "919",
"name": "English"
},
{
"_id": "920",
"name": "Chinese"
}
]
Job
"job": [
{
"_id": "10",
"title": "Programmer",
"location": "New York"
}
],
CvSubmit Collection
"cvsubmit": [
{
"_id": "11",
"id_user": "1234",
"id_job": "11"
}
]
And my query aggregation is:
db.cvsubmit.aggregate([
{
$lookup: {
from: "user",
localField: "id_user",
foreignField: "_id",
as: "id_user"
}
},
{
$lookup: {
from: "language",
localField: "id_user.language.language_id",
foreignField: "_id",
as: "id_user.language.language_id"
}
},
])
But the result is:
[
{
"_id": "11",
"id_job": "11",
"id_user": {
"language": {
"language_id": [
{
"_id": "919",
"name": "English"
},
{
"_id": "920",
"name": "Chinese"
}
]
}
}
}
]
I want the result like this, also showing all user data detail like name:
[
{
"_id": "11",
"id_job": "11",
"id_user": {
"_id": "1234",
"name": "Tony"
"language": [
{
"_id": "919",
"name": "English",
"Level": "Expert"
},
{
"_id": "920",
"name": "Chinese",
"level": "Basic"
}
]
}
}
]
Mongo Playground link https://mongoplayground.net/p/i0yCucjruey
Thanks before.
$lookup with user collection
$unwind deconstruct id_user array
$lookup with language collection and return in languages field
$map to iterate look of id_user.language array
$reduce to iterate loop of languages array returned from collection, check condition if language_id match then return name
db.cvsubmit.aggregate([
{
$lookup: {
from: "user",
localField: "id_user",
foreignField: "_id",
as: "id_user"
}
},
{ $unwind: "$id_user" },
{
$lookup: {
from: "language",
localField: "id_user.language.language_id",
foreignField: "_id",
as: "languages"
}
},
{
$addFields: {
languages: "$$REMOVE",
"id_user.language": {
$map: {
input: "$id_user.language",
as: "l",
in: {
_id: "$$l._id",
level: "$$l.level",
name: {
$reduce: {
input: "$languages",
initialValue: "",
in: {
$cond: [
{ $eq: ["$$this._id", "$$l.language_id"] },
"$$this.name",
"$$value"
]
}
}
}
}
}
}
}
}
])
Playground
You database structure is not accurate as per NoSQL, there should be max 2 collections, loot of join using $lookup and $unwind will cause performance issues.
I am using below query to get combined data from users and project collections:
db.collection.aggregate([
{
"$group": {
"_id": "$userId",
"projectId": { "$push": "$projectId" }
}
},
{
"$lookup": {
"from": "users",
"let": { "userId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$userId" ] }}},
{ "$project": { "firstName": 1 }}
],
"as": "user"
}
},
{ "$unwind": "$user" },
{
"$lookup": {
"from": "projects",
"let": { "projectId": "$projectId" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$projectId" ] }}},
{ "$project": { "projectName": 1 }}
],
"as": "projects"
}
}
])
and it results like below:
[
{
"_id": "5c0a29e597e71a0d28b910aa",
"projectId": [
"5c0a2a8897e71a0d28b910ac",
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29e597e71a0d28b910aa",
"firstName": "Amit"
},
"projects": [
{
"_id": "5c0a2a8897e71a0d28b910ac",
"projectName": "LN-PM"
},
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery"
}
]
},
{
"_id": "5c0a29c697e71a0d28b910a9",
"projectId": [
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29c697e71a0d28b910a9",
"firstName": "Rajat"
},
"projects": [
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery"
}
]
}
]
Now i have another table "Worksheets" and want to include hours field in projects Array, which will be calculated from the worksheets table by specifying the projectId which is _id in the projects array. It will be find in worksheet table and hours will be incremented how many times this _id has in worksheets table. Below is my worksheet collection:
{
"_id" : ObjectId("5c0a4efa91b5021228681f7a"),
"projectId" : ObjectId("5c0a4083753a321c6c4ee024"),
"hours" : 8,
"userId" : ObjectId("5c0a29c697e71a0d28b910a9"),
"__v" : 0
}
{
"_id" : ObjectId("5c0a4f4191b5021228681f7c"),
"projectId" : ObjectId("5c0a2a8897e71a0d28b910ac"),
"hours" : 6,
"userId" : ObjectId("5c0a29e597e71a0d28b910aa"),
"__v" : 0
}
The result will look like below:
{
"_id": "5c0a29c697e71a0d28b910a9",
"projectId": [
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29c697e71a0d28b910a9",
"firstName": "Rajat"
},
"projects": [
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery",
"hours":8
}
]
}
You can use below aggregation
$lookup 3.6 nested syntax allows you to join nested collection inside the $lookup pipeline. You can perform all the aggregation inside the nested $lookup pipline
db.collection.aggregate([
{ "$group": {
"_id": "$userId",
"projectId": { "$push": "$projectId" }
}},
{ "$lookup": {
"from": "users",
"let": { "userId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$userId" ] }}},
{ "$project": { "firstName": 1 }}
],
"as": "user"
}},
{ "$unwind": "$user" },
{ "$lookup": {
"from": "projects",
"let": { "projectId": "$projectId" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$projectId" ] }}},
{ "$lookup": {
"from": "worksheets",
"let": { "projectId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$projectId", "$$projectId" ] }}},
{ "$group": {
"_id": "$projectId",
"totalHours": { "$sum": "$hours" }
}}
],
"as": "workHours"
}}
{ "$project": {
"projectName": 1,
"hours": { "$arrayElemAt": ["$workHours.totalHours", 0] }
}}
],
"as": "projects"
}}
])