MongoDB conditional $lookup with aggregration framework - mongodb

I'm trying to do a conditional lookup with aggregration framework. In my local and foreign collections I have two fields: fieldA and fieldB. If fieldA != 0 my $lookup should be:
{ from: 'collectionA', localField: 'fieldA', foreignField: 'fieldA', as: 'agg' }
Otherwise, if fieldA = 0 my $lookup should be:
{ from: 'collectionA', localField: 'fieldB', foreignField: 'fieldB', as: 'agg' }
Is it possible to combine these conditions with a single $lookup?

It's not really possible OOB, but you can work around this in multiple ways.
for example add a new "temporary" field based on this condition:
db.colleciton.aggregate([
{
$lookup: {
from: 'collectionA',
let: {
fieldA: "$fieldA", fieldB: "$fieldB"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
{
$cond: [
{
$eq: [
'$$fieldA',
0,
],
},
'$$fieldB',
'$$fieldA',
],
},
{
$cond: [
{
$eq: [
'$$fieldA',
0,
],
},
'$fieldB',
'$fieldA',
],
}
],
},
},
},
],
as: 'agg',
},
}
])
The issue with this approach is that indexes won't be utilized for the lookup for older Mongo versions, which in some cases can be crucial.
You can work around for performance purposes like so:
db.collection.aggregate([
{
$facet: {
one: [
{
$match: {
fieldA: { $ne: 0 },
},
},
{
$lookup: { from: 'collectionA', localField: 'fieldA', foreignField: 'fieldA', as: 'agg' },
},
{
$match: {
'agg.0': { $exists: true },
},
},
],
two: [
{
$match: {
fieldA: { $eq: 0 },
},
},
{
$lookup: { from: 'collectionA', localField: 'fieldB', foreignField: 'fieldB', as: 'agg' },
},
{
$match: {
'agg.0': { $exists: true },
},
},
],
},
},
{
$addFieldS: {
combined: {
$concatArrays: [
'$one',
'$two',
],
},
},
},
{
$unwind: '$combined',
},
{
$replaceRoot: {
newRoot: "$combined"
},
},
]);
While there is some overhead here it will still work faster than an unindexed lookup.

Related

Mongo multi-level lookup pipeline variable access

I'm not sure this has been asked before, I tried looking for similar questions here on SO but unfortunately I don't find anything - perhaps for a lack of better googling skills or a lack of better wording on my part.
I have a multi-lookup query that looks like this:
const auction_id = "94a3cfb7b05c73fb5e746e21";
const record = await AuctionModel.aggregate([
{ $match: { _id: new Types.ObjectId(auction_id) } },
{
$lookup: {
from: "lots",
as: "lots",
let: { LOTID: "$_id" },
pipeline: [
{
$match: { auction_id: new Types.ObjectId(auction_id) },
},
{
$lookup: {
from: "bids",
as: "bids",
pipeline: [
{
$match: { lot_id: "$$LOTID" },
},
],
},
},
],
},
},
]);
I want to access that variable LOTID inside the inner pipeline.
Is this possible? I also tried this:
{
$lookup: {
from: "lots",
...
let: { LOTID1: "$_id" },
pipeline: [
...
{
$lookup: {
from: "bids",
...
let: { LOTID2: "$$LOTID1" },
pipeline: [
{
$match: { lot_id: "$$LOTID2" },
},
],
},
},
],
},
},
I also read here in the Mongo Docs:
https://www.mongodb.com/docs/manual/reference/operator/aggregation/lookup/#std-label-lookup-concise-correlated-subquery-let
A $match stage requires the use of an $expr operator to access the variables. The $expr operator allows the use of aggregation expressions inside of the $match syntax.
{
$lookup: {
from: "lots",
...
let: { LOTID1: "$_id" },
pipeline: [
...
{
$lookup: {
from: "bids",
...
let: { LOTID2: "$$LOTID1" },
pipeline: [
{
$match: { $expr: { $eq: ["$lot_id", "$$LOTID1"] } },
},
],
},
},
],
},
},
Also not working.
Is there any other way? Any assistance would be greatly appreciated!

Aggregate multiple lookups return no data

I have documents like this in DB. And I need to grab the data of each item based on the itemType from their own collection.
{ listId: 2, itemType: 'book', itemId: 5364 },
{ listId: 2, itemType: 'car', itemId: 354 },
{ listId: 2, itemType: 'laptop', itemId: 228 }
Based on MongoDB docs and some search, I figured out that I need to use let and $expr in lookup, to make some condition.
ListItemsModel.aggregate([
{ $match: { listId: 2 } },
{ $lookup:
{
from: 'books',
localField: 'itemId',
foreignField: '_id',
let: { "itemType": "$itemType" },
pipeline: [
{ $project: { _id: 1, title: 1 }},
{ $match: { $expr: { $eq: ["$$itemType", "book"] } }}
],
as: 'data'
}
},
{ $lookup:
{
from: 'cars',
localField: 'itemId',
foreignField: '_id',
let: { "itemType": "$itemType" },
pipeline: [
{ $project: { _id: 1, title: 1 }},
{ $match: { $expr: { $eq: ["$$itemType", "car"] } }}
],
as: 'data'
}
},
{ $lookup:
{
from: 'laptops',
localField: 'itemId',
foreignField: '_id',
let: { "itemType": "$itemType" },
pipeline: [
{ $project: { _id: 1, title: 1 }},
{ $match: { $expr: { $eq: ["$$itemType", "laptop"] } }}
],
as: 'data'
}
}
]);
The problem is, in the result all data fields are empty as data: [].
The syntax seems correct to me. What's wrong?
Any subsequent reassignment of field values will eliminate any previous value.
So, for your aggregation pipeline, you need to assign different values to each "$lookup" "as" field.
For example:
// ...
{ $lookup:
{
from: 'books',
// ...
as: 'booksData'
}
},
{ $lookup:
{
from: 'cars',
// ...
as: 'carsData'
}
},
{ $lookup:
{
from: 'laptops',
// ...
as: 'laptopsData'
}
},
// ...

Mongo query for lookup array of keys which is itself an item in a nested array

My first collection is as below, I am searching the document with the email and match the particular jobid inside the jobs array. Then insert the document of second collection by matching _id with jobs.Process.profile_id.
{
"_id": {
"$oid": "6229d3cfdbfc81a8777e4821"
},
"jobs": [
{
"job_ID": {
"$oid": "62289ded8079821eb24760e0"
},
"Process": [
{
"profile_id": {
"$oid": "6285e571681188e83d434797"
}
},
{
"profile_id": {
"$oid": "6285e571681188e83d434799"
}
}
],
},
{
"job_ID": {
"$oid": "6228a252fb4554dd5c48202a"
},
"Process": [
{
"profile_id": {
"$oid": "62861067dc9771331e61df5b"
}
}
],
},
{
"job_ID": {
"$oid": "622af1c391b290d34701af9f"
},
"Process": [
""
],
}
],
"email": "********#gmail.com"
}
and my second collection is, I need to insert this document in my first collection by matching with jobs.Process.profile_id.
{
"_id": {
"$oid": "6285e571681188e83d434797"
},
"Name": "Lakshdwanan",
"Location":"California"
}
I have tried with query,
aggregate([
{ $match: { email: email } },
{
$lookup: {
from: 'user__profiles',
localField: 'jobs.Process.profile_id',
foreignField: '_id',
as: 'jobings',
},
},
{
$addFields: {
jobings: {
$map: {
input: {
$filter: {
input: '$jobs',
as: 'm',
cond: {
$eq: ['$$m.job_ID', objInstance],
},
},
},
as: 'm',
in: {
$mergeObjects: [
{
$arrayElemAt: [
{
$filter: {
input: '$jobings',
cond: {
$eq: ['$$this._id', '$$m.Process.profile_id'],
},
},
},
0,
],
},
'$$m',
],
},
},
},
},
},
{
$project: {
jobings: 1,
_id: 0,
},
},
]);
My output should only display second collection document based on the first collection document matching.
EDIT: If you want the data for a specific job only, it is better to $filter the jobs before the $lookup step. After the $lookup, just $unwind and format:
db.firstCol.aggregate([
{
$match: {email: email}
},
{
$project: {
jobs: {
$filter: {
input: "$jobs",
as: "item",
cond: {$eq: ["$$item.job_ID", objInstance]}
}
},
_id: 0
}
},
{
$lookup: {
from: "user__profiles",
localField: "jobs.Process.profile_id",
foreignField: "_id",
as: "jobings"
}
},
{
$project: {res: "$jobings", _id: 0}
},
{
$unwind: "$res"
},
{
$replaceRoot: {newRoot: "$res"}
}
])
Playground
The jobs.Process.profile_id is the user__profiles _id, so no need to merge anything...The results are documents from user__profiles collection "as is" but they can be formatted as wanted..._id key name can be renamed profile_id easily.

Any stragegies to improve mongodb queries that are slow when using advaced lookups?

I've built 3 queries and I am trying to optimize one of them because in my opinion is the best way to do it. Unfortunately, I don't get the performance I expected.
Fastest query:
Simple lookups only, but you need to unset a lot of data in the last stage. Imagine having to do that for 20 or 30 fields you do not want. If you use $project you will mess up with the todos collection data, so unset is the only way to do it.
Execution time for my set of data is: 176ms with my data set offcourse
db.todos
.aggregate([
{
$lookup: {
from: 'users',
localField: 'user_id',
foreignField: '_id',
as: 'user',
},
},
{
$unwind: {
path: '$user',
preserveNullAndEmptyArrays: true,
},
},
{
$lookup: {
from: 'user_data',
localField: 'user.data_id',
foreignField: '_id',
as: 'user.data',
},
},
{
$unwind: {
path: '$user.data',
preserveNullAndEmptyArrays: true,
},
},
{ $unset: ['user._id', 'user.data_id', 'user.deleted', 'user.active', 'user.data._id'] },
])
.explain();
My Favorite:
Advanced nested lookups. I like it cause it is clean and clear but I don't think this one uses indexes probably because $expr is used even if it is done on _id.
Execution time for my set of data is: 713ms
db.todos
.aggregate([
{
$lookup: {
from: 'users',
let: { user_id: '$user_id' },
pipeline: [
{ $match: { $expr: { $eq: ['$_id', '$$user_id'] } } },
{
$lookup: {
from: 'user_data',
let: { data_id: '$data_id' },
pipeline: [
{ $match: { $expr: { $eq: ['$_id', '$$data_id'] } } },
{ $project: { _id: 0, name: 1 } },
],
as: 'data',
},
},
{ $unwind: '$data' },
{ $project: { _id: 0, email: 1, data: 1 } },
],
as: 'user',
},
},
{
$unwind: {
path: '$user',
preserveNullAndEmptyArrays: true,
},
},
])
.explain();
Worst one:
Advanced lookups(not nested). Slowest query.
Execution time for my set of data is: 777ms
db.todos
.aggregate([
{
$lookup: {
from: 'users',
let: { user_id: '$user_id' },
pipeline: [
{ $match: { $expr: { $eq: ['$_id', '$$user_id'] } } },
{ $project: { _id: 0, email: 1, data_id: 1 } },
],
as: 'user',
},
},
{
$unwind: {
path: '$user',
preserveNullAndEmptyArrays: true,
},
},
{
$lookup: {
from: 'user_data',
let: { data_id: '$user.data_id' },
pipeline: [
{ $match: { $expr: { $eq: ['$_id', '$$data_id'] } } },
{ $project: { _id: 0, name: 1 } },
],
as: 'user.data',
},
},
{
$unwind: {
path: '$user.data',
preserveNullAndEmptyArrays: true,
},
},
{ $unset: ['user.data_id'] },
])
.explain();
I was surprised (being a Mongo beginner) that the way I thought about building queries is not the most effective way. Is there a way to improve the second option or am I stuck with the first one?
A todo
{"_id":{"$oid":"612156ec810895cfe9f406bd"},"user_id":{"$oid":"61214827810895cfe9f406ac"},"title":"todo 1"}
A user
{"_id":{"$oid":"61216d36810895cfe9f40713"},"active":true,"deleted":false,"data_id":{"$oid":"61216d42810895cfe9f40716"},"email":"test2#test2.com"}
A user data
{"_id":{"$oid":"6121488f810895cfe9f406ad"},"name":{"first":"Fanny","last":"Lenny"}}
Thank you

how to reduce unnecessary unwind stages from aggregation pipeline

Like if i'm applying many lookup stages in aggregation pipeline and each lookup is followed by an unwind(just to covert into object) first question does it affect query performance? and if yes how to do that in optimised manner
Note: all lookup's will return only one object
For Ex:
xyz.aggregate([
{ $lookup:{ ----}} //first lookup
{$unwind :{----}} //first unwind
{ $lookup:{ ----}} //second lookup
{$unwind :{----}} //second unwind
{ $lookup:{ ----}} //third lookup
{$unwind :{----}} //third unwind
{ $lookup:{ ----}} //fourth lookup
{$unwind :{----}} //fourth unwind
])
In reference to comments, here is advanced $lookup:
$lookup: {
from: 'accounts',
let: { "localAccountField": "$account" },
pipeline: [
{
$match: {
$expr: {
$eq: ["$_id", "$$localAccountField"]
}
}
},
{
$project: {
_id: 1,
user: 1
}
},
{
$lookup: {
from: 'users',
let: { 'localUserField': "$user" },
pipeline: [
{
$match: {
$expr: {
$eq: ["$_id", "$$localUserField"]
}
}
},
{
$project: {
_id: 1,
username: "$uid",
phone:"$phoneNumber",
email: "$email.add",
name: {
$concat: [
"$profile.name.first",
' ',
"$profile.name.last"
]
},
}
}
],
as: "users"
}
},
{
$lookup: {
from: 'documents',
let: { 'localDocumentField': "$user" },
pipeline: [
{
$match: {
$expr: {
$eq: ["$user", "$$localDocumentField"]
},
status:"verified",
"properties.expirydate": { $exists: true, $ne: "" },
name: "idcard"
}
},
{
$project: {
_id: 0,
cnic: "$properties.number"
}
}
],
as: "documents"
}
}
],
as: 'account'
}