Operation timeout for a MongoDB aggregation pipeline - mongodb

I have a MongodDB database on MongoDB Atlas.
It has an "orders", "products", "itemTypes" and "brands".
"orders" only keep track of product id ordered.
"products" only keep track of brand id and itemType id
"itemTypes" keep track of item type name
"brands" keep track of brand name.
If I aggregate orders + products + itemTypes it is ok:
[{
$unwind: {
path: '$orders'
}
}, {
$lookup: {
from: 'products',
localField: 'orders.productId',
foreignField: 'productId',
as: 'products'
}
}, {
$lookup: {
from: 'itemTypes',
localField: 'products.typeId',
foreignField: 'typeId',
as: 'itemTypes'
}
}, {
$set: {
'orders.price': {
$arrayElemAt: ['$products.price', 0]
},
'orders.brandId': {
$arrayElemAt: ['$products.brandId', 0]
},
'orders.typeId': {
$arrayElemAt: ['$products.typeId', 0]
},
'orders.typeName': {
$arrayElemAt: ['$itemTypes.name', 0]
}
}
}, {
$group: {
_id: '$_id',
createdAt: {
$first: '$createdAt'
},
status: {
$first: '$status'
},
retailerId: {
$first: '$retailerId'
},
retailerName: {
$first: '$retailerName'
},
orderId: {
$first: '$orderId'
},
orders: {
$push: '$orders'
}
}
}]
If I aggregate orders + products + itemTypes + brands, either Mongo Compass or the web UI of Mongo Atlas aggregation builder will give operation timeout error.
[{
$unwind: {
path: '$orders'
}
}, {
$lookup: {
from: 'products',
localField: 'orders.productId',
foreignField: 'productId',
as: 'products'
}
}, {
$lookup: {
from: 'itemTypes',
localField: 'products.typeId',
foreignField: 'typeId',
as: 'itemTypes'
}
}, {
$lookup: {
from: 'brands',
localField: 'products.brandId',
foreignField: 'brandId',
as: 'brands'
}
}, {
$set: {
'orders.price': {
$arrayElemAt: ['$products.price', 0]
},
'orders.brandId': {
$arrayElemAt: ['$products.brandId', 0]
},
'orders.typeId': {
$arrayElemAt: ['$products.typeId', 0]
},
'orders.typeName': {
$arrayElemAt: ['$itemTypes.name', 0]
},
'orders.brandName': {
$arrayElemAt: ['$brands.name', 0]
}
}
}, {
$group: {
_id: '$_id',
createdAt: {
$first: '$createdAt'
},
status: {
$first: '$status'
},
retailerId: {
$first: '$retailerId'
},
retailerName: {
$first: '$retailerName'
},
orderId: {
$first: '$orderId'
},
orders: {
$push: '$orders'
}
}
}]
This is a demo of the aggregation that timed out:
https://mongoplayground.net/p/Jj6EhSl58MS
We have approximately 50k orders, 14k products, 200 brands, 89 item types.
Is there anyway to optimise this aggregation so that it won't timeout?
P/s: My ultimate goal is to visualise popular brands and item types ordered using beautiful chart in the Mongodb Charts function.

If you are on Mongo Atlas, you can use Triggers to run the aggregation query in the background - either when the database is updated or as a scheduled trigger (https://docs.mongodb.com/realm/triggers/).
When the trigger runs, you can save the result of the aggregation pipeline in a new collection using the "$merge" operation.
exports = function() {
const mongodb = context.services.get(CLUSTER_NAME);
const orders = mongodb.db(DATABASE_NAME).collection("orders");
const ordersSummary = mongodb.db(DATABASE_NAME).collection("orders.summary");
const pipeline = [
{
YOUR_PIPELINE
},
{ $merge: { into: "orders.summary", on: "_id", whenMatched: "replace", whenNotMatched: "insert" } }
];
orders.aggregate(pipeline);
};
This way, your charts will be very fast, since they only have to do a simple query from the new collection.

Do you have index on the collections you $lookup from:
products (productId) + itemTypes (typeId) + brands (brandId).
Otherwise, the lookups can take a long time to complete.

Related

mongoDB, mongoose - aggregation an array of objects

I have 3 collections to aggregate.
1st is colors collection
{
{
_id: 1, <- mongoose objectId
name: red
},
{
_id: 2, <- mongoose objectId
name: green
}
}
2nd is products
{
{
_id: Id777, <- mongoose objectId
productName: test prod 777
},
{
_id: Id888, <- mongoose objectId
productName: test prod 888
}
}
and 3rd it move collection
{
....other fields here
items: [
{
_id: an mongoose id,
itemId: Id777 <- in products collection,
itemColor: 1 <- id in colors collection,
coutn: 7,
....other fields
},
{
_id: an mongoose id,
itemId: Id888 <- in products collection,
itemColor: 2 <- id in colors collection
cout: 10
....other fields
}
]
}
I need to have an output like this:
{
////information from collection
items: [
{
itemId: test prod 777, itemColor: red, count: 7
},
{
itemId: test prod 888, itemColor: green, count: 10
}
]
}
My code is:
const moves = await ProductMoves.aggregate([
{ $match: query }, // this is my query
{
$lookup: {
from: 'products',
localField: 'items.itemId',
foreignField: '_id',
as: 'productName'
}
},
{
$unwind: { path: "$productName" , preserveNullAndEmptyArrays: true }
},
{
$lookup: {
from: 'colors',
localField: 'items.itemColor',
foreignField: '_id',
as: 'cName'
}
},
{
$unwind: { path: "$cName" , preserveNullAndEmptyArrays: true }
},
{
$addFields: {
mItems: {
prName: "$productName.productName",
prColor: "$cName.colorName"
},
productName: 0,
cName: 0
}
}
])
.sort({addedDate: -1})
.skip(+req.query.offset)
.limit(+req.query.limit)
but it returns only 1 element from the object array. probably I need something like a for loop, but i couldn't do it.
thank you for your responses, and have a good day!
$unwind deconstruct items array
$lookup with products collection
$lookup with colors collection
$addFields, $arrayElemAt to get first element from lookup result
$group by _id and reconstruct items array and pass other fields as well
there is no external methods in an aggregate function, you have to use stages for sort, skip and limit like below
$sort by addedDate in descending order
$skip and $limit result
const moves = await ProductMoves.aggregate([
{ $match: query }, // this is my query
{ $unwind: "$items" },
{
$lookup: {
from: "products",
localField: "items.itemId",
foreignField: "_id",
as: "itemId"
}
},
{
$lookup: {
from: "colors",
localField: "items.itemColor",
foreignField: "_id",
as: "itemColor"
}
},
{
$addFields: {
"items.itemId": { $arrayElemAt: ["$itemId.productName", 0] },
"items.itemColor": { $arrayElemAt: ["$itemColor.name", 0] }
}
},
{
$group: {
_id: "$_id",
items: { $push: "$items" },
addedDate: { $first: "$addedDate" }
// add other fields that you want in result like "addedDate"
}
},
{ $sort: { addedDate: -1 } },
{ $skip: +req.query.offset },
{ $limit: +req.query.limit }
])
Playground

Mongoose lookup across 3 collections using foreign key

I have found a few questions that relate to this (here and here) but I have been unable to interpret the answers in a way that I can understand how to do what I need.
I have 3 collections: Organisations, Users, and Projects. Every project belongs to one user, and every user belongs to one organisation. From the user's id, I need to return all the projects that belong to the organisation that the logged-in user belongs to.
Returning the projects from the collection that belong to the user is easy, with this query:
const projects = await Project.find({ user: req.user.id }).sort({ createdAt: -1 })
Each user has an organisation id as a foreign key, and I think I need to do something with $lookup and perhaps $unwind mongo commands, but unlike with SQL queries I really struggle to understand what's going on so I can construct queries correctly.
EDIT: Using this query
const orgProjects = User.aggregate(
[
{
$match: { _id: req.user.id }
},
{
$project: { _id: 0, org_id: 1 }
},
{
$lookup: {
from: "users",
localField: "organisation",
foreignField: Organisation._id,
as: "users_of_org"
}
},
{
$lookup: {
from: "projects",
localField: "users_of_org._id",
foreignField: "user",
as: "projects"
}
},
{
$unset: ["organisation", "users_of_org"]
},
{
$unwind: "$projects"
},
{
$replaceWith: "$projects"
}
])
Seems to almost work, returning the following:
Aggregate {
_pipeline: [
{ '$match': [Object] },
{ '$project': [Object] },
{ '$lookup': [Object] },
{ '$lookup': [Object] },
{ '$unset': [Array] },
{ '$unwind': '$projects' },
{ '$replaceWith': '$projects' }
],
_model: Model { User },
options: {}
}
assuming your documents have a schema like this, you could do an aggregation pipeline like below with 2 $lookup stages.
db.users.aggregate(
[
{
$match: { _id: "user1" }
},
{
$project: { _id: 0, org_id: 1 }
},
{
$lookup: {
from: "users",
localField: "org_id",
foreignField: "org_id",
as: "users_of_org"
}
},
{
$lookup: {
from: "projects",
localField: "users_of_org._id",
foreignField: "user_id",
as: "projects"
}
},
{
$unset: ["org_id", "users_of_org"]
},
{
$unwind: "$projects"
},
{
$replaceWith: "$projects"
}
])

Mongodb $group and $lookup aggregation

I have two Collections Upload and Notification and I want to populate imageid of Notification with _id of Upload Collection. However $lookup returns an empty array
const user = req.user.id;
const aggregation = await Notification.aggregate(
[
{
$group: {
_id: { imageid: '$imageid' },
to_userid: { $first: user },
opened: { $first: '$opened' },
imageid: { $first: '$imageid' },
notification_date: { $first: '$notification_date' },
total: { $sum: 1 }
}
},
{
$lookup: {
from: "Upload",
localField: "imageid",
foreignField: "_id",
as: "images"
}
},
]
)

How to do lookup on an aggregated collection in mongodb that is being grouped?

For some reason, I can't retrieve the author name from another collection on my aggregate query.
db.getCollection('books').aggregate([
{
$match: {
authorId: { $nin: [ObjectId('5b9a008575c50f1e6b02b27b'), ObjectId('5ba0fb3275c50f1e6b02b2f5'), ObjectId('5bc058b6ae9a2a4d6df330b1')]},
isBorrowed: { $in: [null, false] },
status: 'ACTIVE',
},
},
{
$lookup: {
from: "authors",
localField: "authorId", // key of author id in "books" collection
foreignField: "_id", // key of author id in "authors" collection
as: "bookAuthor",
}
},
{
$group: {
_id: {
author: '$authorId',
},
totalSalePrice: {
$sum: '$sale.amount',
},
},
},
{
$project: {
author: '$_id.author',
totalSalePrice: '$totalSalePrice',
authorName: '$bookAuthor.name', // I can't make this appear
_id: 0,
},
},
{ $sort: { totalSalePrice: -1 } },
])
Any advice on where I had it wrong? Thanks for the help.
Two things that are missing here: you need $unwind to convert bookAuthor from an array into single object and then you need to add that object to your $group stage (so that it will be available in next stages), try:
db.getCollection('books').aggregate([
{
$match: {
authorId: { $nin: [ObjectId('5b9a008575c50f1e6b02b27b'), ObjectId('5ba0fb3275c50f1e6b02b2f5'), ObjectId('5bc058b6ae9a2a4d6df330b1')]},
isBorrowed: { $in: [null, false] },
status: 'ACTIVE',
},
},
{
$lookup: {
from: "authors",
localField: "authorId",
foreignField: "_id",
as: "bookAuthor", // this will be an array
}
},
{
$unwind: "$bookAuthor"
},
{
$group: {
_id: {
author: '$authorId',
},
bookAuthor: { $first: "$bookAuthor" },
totalSalePrice: {
$sum: '$sale.amount',
},
},
},
{
$project: {
author: '$_id.author',
totalSalePrice: '$totalSalePrice',
authorName: '$bookAuthor.name',
_id: 0,
},
},
{ $sort: { totalSalePrice: -1 } },
])
Actually you have lost the bookAuthor field in the $group stage. You have to use $first accumulator to get it in the next $project stage.
{ "$group": {
"_id": { "author": "$authorId" },
"totalSalePrice": { "$sum": "$sale.amount" },
"authorName": { "$first": "$bookAuthor" }
}},
{ "$project": {
"author": "$_id.author",
"totalSalePrice": "$totalSalePrice",
"authorName": { "$arrayElemAt": ["$bookAuthor.name", 0] }
"_id": 0,
}}

Meteor Mongo Aggregate $lookup specify output field

I have two collections: Products and Stocks.
The relation between these two collections is one to one.
Products structure:
{
_id:
sku:
....
}
Stocks structure :
{
_id:
sku:
availability: []
....
}
My query:
Products.aggregate([
{
$match: cAux
}, {
$lookup: {
from: "Stocks",
localField: "sku",
foreignField: "sku",
as: "availability"
}
}, {
$sort: PRODUCT_SORT
}
]);
The result from this "join" is
{
_id:
sku:
availability: {_id:, sku:, **availabity**: []}
...
}
The join is okay, but I would like to only have the availability array field being joined and not the whole Stock document. Whats the best way to accomplish this? Any help would be appreciated.
Solution
Products.aggregate([
{
$match: cAux
}, {
$lookup: {
from: "Stocks",
localField: "sku",
foreignField: "sku",
as: "availability"
}
}, {
$project: {
...PRODUCT_FIELDS,
availability: {
$arrayElemAt: ['$availability.availability', 0]
}
}
}, {
$sort: PRODUCT_SORT
}
]);