MongoDB: how to aggregate from multiple collections with same aggregation pipeline - mongodb

I'm trying to get aggregations with same aggregation pipeline including $match and $group operations from multiple collections.
For example,
with a users collection and collections of questions, answers and comments where every document has authorId and created_at field,
db = [
'users': [{ _id: 123 }, { _id: 456} ],
'questions': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T00:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T00:00:00Z') },
],
'answers': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-05T08:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-01T08:00:00Z') },
],
'comments': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T16:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T16:00:00Z') },
],
]
I want to get counts of documents from each collections with created_at between a given range and grouped by authorId.
A desired aggregation result may look like below. The _ids here are ObjectIds of documents in users collection.
\\ match: { createdAt: { $gt: ISODate('2022-09-03T00:00:00Z) } }
[
{ _id: ObjectId('123'), questionCount: 0, answerCount: 1, commentCount: 0 },
{ _id: ObjectId('456'), questionCount: 1, answerCount: 0, commentCount: 1 }
]
Currently, I am running aggregation below for each collection, combining the results in the backend service. (I am using Spring Data MongoDB Reactive.) This seems very inefficient.
db.collection.aggregate([
{ $match: {
created_at: { $gt: ISODate('2022-09-03T00:00:00Z') }
}},
{ $group : {
_id: '$authorId',
count: {$sum: 1}
}}
])
How can I get the desired result with one aggregation?
I thought $unionWith or $lookup may help but I'm stuck here.

You can try something like this, using $lookup, here we join users, with all the three collections one-by-one, and then calculate the count:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "comments"
}
},
{
"$project": {
"questionCount": {
"$size": "$questions"
},
"answersCount": {
"$size": "$answers"
},
"commentsCount": {
"$size": "$comments"
}
}
}
])
Playground link. In the above query, we use pipelined form of $lookup, to perform join on some custom logic. Learn more about $lookup here.
Another way is this, perform normal lookup and then filter out the elements:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"localField": "_id",
"foreignField": "authorId",
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"localField": "_id",
"foreignField": "authorId",
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"localField": "_id",
"foreignField": "authorId",
"as": "comments"
}
},
{
"$project": {
questionCount: {
"$size": {
"$filter": {
"input": "$questions",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
answerCount: {
"$size": {
"$filter": {
"input": "$answers",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
commentsCount: {
"$size": {
"$filter": {
"input": "$comments",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
}
}
}
])
Playground link.

Related

MongoDB multiple/nested aggregations

I have these collections:
users
{
_id: "userId1",
// ...
tracks: ["trackId1", "trackId2"],
};
tracks
{
_id: "trackId1",
// ...
creatorId: "userId1",
categoryId: "categoryId1"
}
categories
{
_id: "categoryId1",
// ...
tracks: ["trackId1", "trackId15", "trackId20"],
};
by using the following code, I am able to get a track by its ID and add the creator
tracks.aggregate([
{
$match: { _id: ObjectId(trackId) },
},
{
$lookup: {
let: { userId: { $toObjectId: "$creatorId" } },
from: "users",
pipeline: [{ $match: { $expr: { $eq: ["$_id", "$$userId"] } } }],
as: "creator",
},
},
{ $limit: 1 },
])
.toArray();
Response:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
// ...
"tracks": [
"trackId5",
"trackId10",
"trackId65"
]
}
}
but what I am struggling with is that I want the creator.tracks to aggregate also returning the tracks by their ID (e.g up to last 5), and also to get the last 5 tracks from the categoryId
expected result:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
"tracks": [
{
"_id": "trackId5",
// the rest object without the creator
},
{
"_id": "trackId10",
// the rest object without the creator
},
{
"_id": "trackId65",
// the rest object without the creator
},
]
},
// without trackId1 which is the one that is being viewed
"relatedTracks": [
{
"_id": "trackId15",
// the rest object without the creator
},
{
"_id": "trackId20",
// the rest object without the creator
},
]
}
I would appreciate any explanation/help to understand what is the best one to do it and still keep the good performance
Query
start from a track
join with users using the trackId get all the tracks of the creator
(creator-tracks)
join with categories using the categoryId to get all the tracks of the category (related tracks)
remove from related-tracks the tracks of the creator
take the last 5 from both using $slice (creator-tracks and related-tracks)
*i added 2 extra lookups to get all info of the tracks, its empty arrays because i dont have enough data(i have only trackId1), with all the data it will work
PlayMongo
db.tracks.aggregate([
{
"$match": {
"_id": "trackId1"
}
},
{
"$lookup": {
"from": "users",
"localField": "creatorId",
"foreignField": "_id",
"as": "creator-tracks"
}
},
{
"$set": {
"creator-tracks": {
"$arrayElemAt": [
"$creator-tracks.tracks",
0
]
}
}
},
{
"$lookup": {
"from": "categories",
"localField": "categoryId",
"foreignField": "_id",
"as": "related-tracks"
}
},
{
"$set": {
"related-tracks": {
"$arrayElemAt": [
"$related-tracks.tracks",
0
]
}
}
},
{
"$set": {
"related-tracks": {
"$filter": {
"input": "$related-tracks",
"cond": {
"$not": [
{
"$in": [
"$$this",
"$creator-tracks"
]
}
]
}
}
}
}
},
{
"$set": {
"creator-tracks": {
"$slice": [
{
"$filter": {
"input": "$creator-tracks",
"cond": {
"$ne": [
"$$this",
"$_id"
]
}
}
},
-5
]
}
}
},
{
"$set": {
"related-tracks": {
"$slice": [
"$related-tracks",
-5
]
}
}
},
{
"$lookup": {
"from": "tracks",
"localField": "creator-tracks",
"foreignField": "_id",
"as": "creator-tracks-all-info"
}
},
{
"$lookup": {
"from": "tracks",
"localField": "related-tracks",
"foreignField": "_id",
"as": "related-tracks-all-info"
}
}
])

how to filter the product and optimize mongodb query?

How to filter the product and optimize mongodb query,
We would like to get popular products base on some conditions i.e which products are orders, view and likes.
db.products.aggregate([
{
"$lookup": {
"from": "orders",
"localField": "_id",
"foreignField": "product_id",
"as": "orders"
}
},
{
"$addFields": {
"orderCount": {
"$size": {
"$cond": [
{
"$isArray": "$orders"
},
"$orders",
[]
]
}
}
}
},
{
"$addFields": {
"likeCount": {
"$size": {
"$cond": [
{
"$isArray": "$likes"
},
"$likes",
[]
]
}
}
}
},
{
"$addFields": {
"sumCount": {
"$sum": [
"$orderCount",
"$likeCount",
"$view"
]
}
}
},
{
$sort: {
"sumCount": -1
}
}
])
https://mongoplayground.net/p/fIG3-yHGuV6
Have to use multiple $addFields what would be best option to achieve the products that have the most orders, likes and views. please guide
thanks
I would suggest 2 corrections,
orders size does not need verification if it is an array or not condition because $lookup stage will always return in array
You can do both operations for orderCount and likeCount in a single $addFields stage
You final query would be,
db.products.aggregate([
{
"$lookup": {
"from": "orders",
"localField": "_id",
"foreignField": "product_id",
"as": "orders"
}
},
{
"$addFields": {
"orderCount": { "$size": "$orders" },
"likeCount": {
"$size": {
"$cond": [{ "$isArray": "$likes" }, "$likes", []]
}
}
}
},
{
"$addFields": {
"sumCount": {
"$sum": ["$orderCount", "$likeCount", "$view"]
}
}
},
{ "$sort": { "sumCount": -1 } }
])
Playground
You can also use projection to minimize the code
{
"$project": {
"likes": 1,
"orderCount": {
"$size": {
"$cond": {
"if": {
"$isArray": [
"$orders"
]
},
"then": "$orders",
"else": []
}
}
},
"likeCount": {
"$size": {
"$cond": {
"if": {
"$isArray": [
"$likes"
]
},
"then": "$likes",
"else": []
}
}
},
"views": {
"$ifNull": [
"$view",
0
]
}
}
},
https://mongoplayground.net/p/qUNftLP_-PN
check the mongoplayground.

How to generate object ids when $unwinding with aggregate in mongodb

I'm having the following query
db.getCollection('matches').aggregate([{
"$lookup": {
"from": "player",
"localField": "players.account_id",
"foreignField": "account_id",
"as": "players2"
}
}, {
"$addFields": {
"players": {
"$map": {
"input": "$players",
"in": {
"$mergeObjects": [
"$$this", {
"$arrayElemAt": [
"$players2", {
"$indexOfArray": [
"$players.account_id",
"$$this.account_id"
]
}
]
}
]
}
}
}
}
}, {
"$set": {
"players.match_id": "$match_id",
"players.radiant_win": "$radiant_win"
}
}, {
"$unwind": "$players"
}, {
"$replaceRoot": {
"newRoot": "$players"
}
}, {
"$project": {
"_id": 1,
"match_id": 1,
"account_id": 1,
"hero_id": 1,
"radiant_win": 1
}
}
])
which is supposed to match an inner array with another collection, merge the objects in the arrays by the matching and then unwrap ($unwind) the array into a new collection.
Unfortunately, I'm getting duplicate Object ids which is sort of a problem for when I want to export this collection.
How can I ensure unique Object_Ids for the aggregation?
Thanks in advance!

mongodb aggregation pipeline not returning proper result and slow

I have three collections users, products and orders , orders type has two possible values "Cash" and "Online". One users can have single/multiple products and products have none/single/multiple orders. I want to text search on users collection on name. Now I want to write a query which will return all matching users on text search highest text score first, it might be possible one user's name is returning top score but don't have any products and orders.
I have written a query but it's not returning users who has text score highest but don't have any products/orders. It's only returning users who has record present in all three collections. And also performance of this query is not great taking long time if a user has lot of products for example more than 3000 products. Any help appreciated.
db.users.aggregate(
[
{
"$match": {
"$text": {
"$search": "john"
}
}
},
{
"$addFields": {
"score": {
"$meta": "textScore"
}
}
},
{
"$sort": {
"Score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 6
},
{
"$lookup": {
"from": "products",
"localField": "userId",
"foreignField": "userId",
"as": "products"
}
},
{ $unwind: '$products' },
{
"$lookup": {
"from": "orders",
"let": {
"products": "$products"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$in": [
"$productId",
["$$products.productId"]
]
},
{
"$eq": [
"$orderType",
"Cash"
]
}
]
}
}
}
],
"as": "orders"
}
},
{ $unwind: 'orders' },
{
$group: {
_id: "$_id",
name: { $first: "$name" },
userId: { $first: "$userId" },
products: { $addToSet: "$products" },
orders: { $addToSet: "$orders" },
score: { $first: "$score" },
}
},
{ $sort: { "score": -1 } }
]
);
Issue:
Every lookup produces an array which holds the matched documents. When no documents are found, the array would be empty. Unwinding that empty array would break the pipeline immediately. That's the reason, we are not getting user records with no products/orders. We would need to preserve such arrays so that the pipeline execution can continue.
Improvements:
In orders lookup, the $eq can be used instead of $in, as we already
unwinded the products array and each document now contains only
single productId
Create an index on userId in products collection to make the query more efficient
Following is the updated query:
db.users.aggregate([
{
"$match": {
"$text": {
"$search": "john"
}
}
},
{
"$addFields": {
"score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 6
},
{
"$lookup": {
"from": "products",
"localField": "userId",
"foreignField": "userId",
"as": "products"
}
},
{
$unwind: {
"path":"$products",
"preserveNullAndEmptyArrays":true
}
},
{
"$lookup": {
"from": "orders",
"let": {
"products": "$products"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$eq": [
"$productId",
"$$products.productId"
]
},
{
"$eq": [
"$orderType",
"Cash"
]
}
]
}
}
}
],
"as": "orders"
}
},
{
$unwind: {
"path":"$orders"
"preserveNullAndEmptyArrays":true
}
},
{
$group: {
_id: "$_id",
name: {
$first: "$name"
},
userId: {
$first: "$userId"
},
products: {
$addToSet: "$products"
},
orders: {
$addToSet: "$orders"
},
score: {
$first: "$score"
}
}
},
{
$sort: {
"score": -1
}
}
]);
To get more information on unwind, please check https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

How to perform nested lookup in mongo query?

This is my aggregation query
db.user.aggregate([
{ $addFields: { user_id: { $toString: "$_id" } } },
{
$lookup: {
from: "walletData",
let: { id: "$user_id" },
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: ["$userId", "$$id"]
},
{
$gt: ["$lastBalance", 0]
}
]
}
}
}
],
as: "balance"
}
}
])
I get the desired output from this result but need to join one more collection in this query. How can i achieve that?
For example consider these collections:
user : {
"_id": ObjectId("xyz")
}
walletData:{
"userId": "xyz",
"lastBalance": 5
}
AnotherWalletdata:{
"userId": "xyz",
"lastBalance": 6
}
I got the result after joining first two tables how do i join the third table only if the balance of the second table(walletData) is greater than zero?
Expected Output :
{"id":"xyz",
"walletdataBal":5,
"AnotherWalletDataBal":6
}
You can join any number of collections by using only $lookup and $unwind one after another followed by Conditional Projection for whatever that's required at last. Below is the well-tested and working solution for the same :
db.user.aggregate([
{$lookup: {from: "walletData", localField: "_id", foreignField: "userId", as: "walletDataBal"}},
{$unwind: "$walletDataBal"},
{$lookup: {from: "anotherwalletData", localField: "_id", foreignField: "userId", as: "anotherWalletDataBal"}},
{$unwind: "$anotherWalletDataBal"},
{$project: {"id": "$_id", "_id": 0, walletDataBal: "$walletDataBal.lastBalance",
anotherWalletDataBal: {$cond:
{if: { $gt: [ "$walletDataBal.lastBalance", 0 ] },
then: "$anotherWalletDataBal.lastBalance",
else: "$$REMOVE" }}}
]).pretty();
You can add another $lookup stage to achieve the output
db.user.aggregate([
{ "$addFields": { "user_id": { "$toString": "$_id" } } },
{ "$lookup": {
"from": "walletData",
"let": { "id": "$user_id" },
"pipeline": [
{ "$match": {
"$expr": {
"$and": [
{ "$eq": ["$userId", "$$id"] },
{ "$gt": ["$lastBalance", 0] }
]
}
}}
],
"as": "balance"
}},
{ "$lookup": {
"from": "anotherWalletData",
"let": { "id": "$user_id" },
"pipeline": [
{ "$match": {
"$expr": {
"$and": [
{ "$eq": ["$userId", "$$id"] },
{ "$gt": ["$lastBalance", 0] }
]
}
}}
],
"as": "anotherWalletData"
}},
{ "$project": {
"walletdataBal": { "$arrayElemAt": ["$balance.lastBalance", 0] },
"anotherwalletdataBal": {
"$arrayElemAt": ["$anotherWalletData.lastBalance", 0]
}
}}
])
{ $unwind: "$balance" },
{ $lookup: {
from: "walletData",
localField: "balance.userId",
foreignField: "userId",
as:"anotherwalletData"
}}
])
I solved my query, I had to apply unwind after the above lookup.