MongoDB multiple/nested aggregations - mongodb

I have these collections:
users
{
_id: "userId1",
// ...
tracks: ["trackId1", "trackId2"],
};
tracks
{
_id: "trackId1",
// ...
creatorId: "userId1",
categoryId: "categoryId1"
}
categories
{
_id: "categoryId1",
// ...
tracks: ["trackId1", "trackId15", "trackId20"],
};
by using the following code, I am able to get a track by its ID and add the creator
tracks.aggregate([
{
$match: { _id: ObjectId(trackId) },
},
{
$lookup: {
let: { userId: { $toObjectId: "$creatorId" } },
from: "users",
pipeline: [{ $match: { $expr: { $eq: ["$_id", "$$userId"] } } }],
as: "creator",
},
},
{ $limit: 1 },
])
.toArray();
Response:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
// ...
"tracks": [
"trackId5",
"trackId10",
"trackId65"
]
}
}
but what I am struggling with is that I want the creator.tracks to aggregate also returning the tracks by their ID (e.g up to last 5), and also to get the last 5 tracks from the categoryId
expected result:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
"tracks": [
{
"_id": "trackId5",
// the rest object without the creator
},
{
"_id": "trackId10",
// the rest object without the creator
},
{
"_id": "trackId65",
// the rest object without the creator
},
]
},
// without trackId1 which is the one that is being viewed
"relatedTracks": [
{
"_id": "trackId15",
// the rest object without the creator
},
{
"_id": "trackId20",
// the rest object without the creator
},
]
}
I would appreciate any explanation/help to understand what is the best one to do it and still keep the good performance

Query
start from a track
join with users using the trackId get all the tracks of the creator
(creator-tracks)
join with categories using the categoryId to get all the tracks of the category (related tracks)
remove from related-tracks the tracks of the creator
take the last 5 from both using $slice (creator-tracks and related-tracks)
*i added 2 extra lookups to get all info of the tracks, its empty arrays because i dont have enough data(i have only trackId1), with all the data it will work
PlayMongo
db.tracks.aggregate([
{
"$match": {
"_id": "trackId1"
}
},
{
"$lookup": {
"from": "users",
"localField": "creatorId",
"foreignField": "_id",
"as": "creator-tracks"
}
},
{
"$set": {
"creator-tracks": {
"$arrayElemAt": [
"$creator-tracks.tracks",
0
]
}
}
},
{
"$lookup": {
"from": "categories",
"localField": "categoryId",
"foreignField": "_id",
"as": "related-tracks"
}
},
{
"$set": {
"related-tracks": {
"$arrayElemAt": [
"$related-tracks.tracks",
0
]
}
}
},
{
"$set": {
"related-tracks": {
"$filter": {
"input": "$related-tracks",
"cond": {
"$not": [
{
"$in": [
"$$this",
"$creator-tracks"
]
}
]
}
}
}
}
},
{
"$set": {
"creator-tracks": {
"$slice": [
{
"$filter": {
"input": "$creator-tracks",
"cond": {
"$ne": [
"$$this",
"$_id"
]
}
}
},
-5
]
}
}
},
{
"$set": {
"related-tracks": {
"$slice": [
"$related-tracks",
-5
]
}
}
},
{
"$lookup": {
"from": "tracks",
"localField": "creator-tracks",
"foreignField": "_id",
"as": "creator-tracks-all-info"
}
},
{
"$lookup": {
"from": "tracks",
"localField": "related-tracks",
"foreignField": "_id",
"as": "related-tracks-all-info"
}
}
])

Related

MongoDB: how to aggregate from multiple collections with same aggregation pipeline

I'm trying to get aggregations with same aggregation pipeline including $match and $group operations from multiple collections.
For example,
with a users collection and collections of questions, answers and comments where every document has authorId and created_at field,
db = [
'users': [{ _id: 123 }, { _id: 456} ],
'questions': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T00:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T00:00:00Z') },
],
'answers': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-05T08:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-01T08:00:00Z') },
],
'comments': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T16:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T16:00:00Z') },
],
]
I want to get counts of documents from each collections with created_at between a given range and grouped by authorId.
A desired aggregation result may look like below. The _ids here are ObjectIds of documents in users collection.
\\ match: { createdAt: { $gt: ISODate('2022-09-03T00:00:00Z) } }
[
{ _id: ObjectId('123'), questionCount: 0, answerCount: 1, commentCount: 0 },
{ _id: ObjectId('456'), questionCount: 1, answerCount: 0, commentCount: 1 }
]
Currently, I am running aggregation below for each collection, combining the results in the backend service. (I am using Spring Data MongoDB Reactive.) This seems very inefficient.
db.collection.aggregate([
{ $match: {
created_at: { $gt: ISODate('2022-09-03T00:00:00Z') }
}},
{ $group : {
_id: '$authorId',
count: {$sum: 1}
}}
])
How can I get the desired result with one aggregation?
I thought $unionWith or $lookup may help but I'm stuck here.
You can try something like this, using $lookup, here we join users, with all the three collections one-by-one, and then calculate the count:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "comments"
}
},
{
"$project": {
"questionCount": {
"$size": "$questions"
},
"answersCount": {
"$size": "$answers"
},
"commentsCount": {
"$size": "$comments"
}
}
}
])
Playground link. In the above query, we use pipelined form of $lookup, to perform join on some custom logic. Learn more about $lookup here.
Another way is this, perform normal lookup and then filter out the elements:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"localField": "_id",
"foreignField": "authorId",
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"localField": "_id",
"foreignField": "authorId",
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"localField": "_id",
"foreignField": "authorId",
"as": "comments"
}
},
{
"$project": {
questionCount: {
"$size": {
"$filter": {
"input": "$questions",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
answerCount: {
"$size": {
"$filter": {
"input": "$answers",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
commentsCount: {
"$size": {
"$filter": {
"input": "$comments",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
}
}
}
])
Playground link.

How to generate object ids when $unwinding with aggregate in mongodb

I'm having the following query
db.getCollection('matches').aggregate([{
"$lookup": {
"from": "player",
"localField": "players.account_id",
"foreignField": "account_id",
"as": "players2"
}
}, {
"$addFields": {
"players": {
"$map": {
"input": "$players",
"in": {
"$mergeObjects": [
"$$this", {
"$arrayElemAt": [
"$players2", {
"$indexOfArray": [
"$players.account_id",
"$$this.account_id"
]
}
]
}
]
}
}
}
}
}, {
"$set": {
"players.match_id": "$match_id",
"players.radiant_win": "$radiant_win"
}
}, {
"$unwind": "$players"
}, {
"$replaceRoot": {
"newRoot": "$players"
}
}, {
"$project": {
"_id": 1,
"match_id": 1,
"account_id": 1,
"hero_id": 1,
"radiant_win": 1
}
}
])
which is supposed to match an inner array with another collection, merge the objects in the arrays by the matching and then unwrap ($unwind) the array into a new collection.
Unfortunately, I'm getting duplicate Object ids which is sort of a problem for when I want to export this collection.
How can I ensure unique Object_Ids for the aggregation?
Thanks in advance!

MongoDB Aggregation: "Populate" (resolve) Ref in Nested Array

Background
I have a collection Items with documents such as
{
"_id": "5d9e3a5ced27230f68032e21",
... more fields
"foos": [
{
"_id": "5d9e3a5ced27230f68032e25",
... more fields
"bars": [
"5d9dab461bbb4db66db41f93"
],
},
{
"id": "5d9e3a5ced27230f68032e24",
... more fields
"bars": [
"5d9dab461bbb4db66db41f93",
"5d9e3a23ed27230f68032e1a"
]
}
]
}
with bars referring to another collection Bars.
Goal
I'd like to get a list of all documents (with all their fields) in Items, but with bars resolved to the document in Bars.
Small Catch
I want to be able to create a generic function to which I simply pass the path to resolve (e.g. foos.bars) and the collection from which to resolve (Bars) so that I can use it with different collections and arbitrary levels of nesting.
Initial Approach
I've found a rather complicated way to do this for my example, but before I generalize it, I'd like to know whether there really isn't a simpler way. Input is highly appreciated!
Here's what I've got:
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$unwind": {
"path": "$foos.bars"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$unwind": {
"path": "$foos.bars"
}
},
{
"$group": {
"_id": {
"id": "$_id",
"foo_index": "$foos_index"
},
"foos": {
"$first": "$foos"
},
"bars": {
"$push": "$foos.bars"
}
}
},
{
"$addFields": {
"foos": {
"$mergeObjects": [
"$foos",
{
"bars": "$bars"
}
]
}
}
},
{
"$group": {
"_id": "$_id.id",
"foos": {
"$push": "$foos"
}
}
},
{
"$lookup": {
"from": "Items",
"localField": "_id",
"foreignField": "_id",
"as": "original_doc"
}
},
{
"$unwind": {
"path": "$original_doc"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$original_doc",
{
"foos": "$foos"
}
]
}
}
}
]
Update: First Iteration
I've realized I don't need the "leaf" level unwound, so I've now got a simplified version (but for deeper nesting, I'll still need what I had before, right?):
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$group": {
"_id": "$_id",
"foos": {
"$push": "$foos"
}
}
},
{
"$lookup": {
"from": "Items",
"localField": "_id",
"foreignField": "_id",
"as": "original_doc"
}
},
{
"$unwind": {
"path": "$original_doc"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$original_doc",
{
"foos": "$foos"
}
]
}
}
}
]
Maybe someone still finds something better, but until then, if someone comes across this, the following seems reasonable to me by now:
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$group": {
"_id": "$_id",
"savepoint": {
"$first": "$$ROOT"
},
"foos": {
"$push": "$foos"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$savepoint",
{
"foos": "$foos"
}
]
}
}
}
]

Merge two $lookup collections in MongoDB

Here i am try to get aggregated result from my challenge collection with challengeusers and challengeusers has the user_id and i used $lookup to join the users too.
When i use this query, i am getting following output.
"challenges": [
{
"_id": "5b7bf6fd87ec106308d7e3c1",
"start_date": "2018-08-09T12:40:21.470Z",
"end_date": "2018-08-05T12:40:21.470Z",
"challnegedusers": [
{
"chalenge_id": "5b7bf6fd87ec106308d7e3c1",
"user_id": "5b75623db457045e3bb12e0a",
"status": 1
},
{
"user_id": "5b75643c0a97791bcc9ed64c",
"status": 1
},
{
"user_id": "5b756144b457045e3bb12e08",
"status": 1
}
],
"users": [
{
"_id": "5b756144b457045e3bb12e08",
"first_name": "XYZ"
},
{
"_id": "5b75623db457045e3bb12e0a",
"first_name": "BAC"
},
{
"_id": "5b75643c0a97791bcc9ed64c",
"first_name": "YTA"
}
]
}
]
But i want the challengeusers and users to merge in a single object.
Most of all i want the status of challengeusers with user's info.
expected output:
"challenges": [
{
"_id": "5b7bf6fd87ec106308d7e3c1",
"start_date": "2018-08-09T12:40:21.470Z",
"end_date": "2018-08-05T12:40:21.470Z",
"challnegedusers": [
{
"user_id": "5b75623db457045e3bb12e0a",
"status": 1,
"first_name": "BAC"
},
{
"user_id": "5b75643c0a97791bcc9ed64c",
"status": 1,
"first_name": "YTA"
},
{
"user_id": "5b756144b457045e3bb12e08",
"status": 1,
"first_name": "XYZ"
}
]
}
]
MongoDB Aggregate Query that i am using.
let challenges = await ChallengeModel.aggregate([
{ $match: criteria },
{ $lookup: {
from: 'challengeusers',
localField: '_id',
foreignField: 'challenge_id',
as: 'challnegedusers'
} },
{ $lookup: {
from: 'appusers',
localField: 'challnegedusers.user_id',
foreignField: '_id',
as: 'users'
} },
{ $sort: {created_at: -1}}
]);
You can try below aggregation in mongodb 3.6
ChallengeModel.aggregate([
{ "$match": criteria },
{ "$lookup": {
"from": "challengeusers",
"let": { "challengeusersId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$challenge_id", "$$challengeusersId" ] } } },
{ "$lookup": {
"from": "appusers",
"let": { "user_id": "$user_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$user_id" ] } } },
],
"as": "user"
}},
{ "$unwind": "$user" },
{ "$addFields": { "first_name": "$user.first_name" }},
{ "$project": { "user": 0 }}
],
"as": "challnegedusers"
}}
])
And with your approach you can try this
ChallengeModel.aggregate([
{ "$match": criteria },
{ "$lookup": {
"from": "challengeusers",
"localField": "_id",
"foreignField": "challenge_id",
"as": "challnegedusers"
}},
{ "$unwind": "challnegedusers" },
{ "$lookup": {
"from": "appusers",
"localField": "challnegedusers.user_id",
"foreignField": "_id",
"as": "challnegedusers.user"
}},
{ "$unwind": "challnegedusers.user" },
{ "$addFields": { "challnegedusers.first_name": "$challnegedusers.user.first_name" }},
{ "$sort": { "created_at": -1 }},
{ "$group": {
"_id": "$_id",
"start_date": { "$first": "$start_date" }
"end_date": { "$first": "$end_date" },
"challnegedusers": { "$push": "$challnegedusers" }
}}
])

Multiple sorting in aggregate with mongo

When I use a multiple sorting in aggregate method with mongo, results aren't sorting in the right way. This is my query :
db.MyCollection.aggregate(
{
"$unwind": "$objects"
},
{
"$lookup": {
"from": "CollectionA",
"localField": "objects.itemId",
"foreignField": "_id",
"as": "itemOne"
}
},
{
"$lookup": {
"from": "CollectionB",
"localField": "user_id",
"foreignField": "id",
"as": "users"
}
},
{
"$lookup": {
"from": "CollectionC",
"localField": "objects.itemName",
"foreignField": "name",
"as": "itemTwo"
}
},
{
"$addFields": {
"item": {
"$arrayElemAt": [
"$itemOne",
0
]
},
"user": {
"$arrayElemAt": [
"$users",
0
]
},
"itemP": {
"$arrayElemAt": [
"$itemTwo",
0
]
}
}
},
{
"$addFields": {
"itemName": {
"$ifNull": [
"$item.name",
"$objects.itemName"
]
},
"userName": {
"$concat": [
"$user.firstname",
" ",
"$user.lastname"
]
}
}
},
{
"$match": {
"client_id": 2
}
},
{
"$skip": 1
},
{
"$limit": 10
},
{
"$project": {
"date": "$objects.date",
"state": "$objects.state"
}
},
{
"$sort": {
"objects.state": 1,
"objects.date": 1,
}
}
)
To precise: "date" field is Date type and "state" field is number type.
If I use only one sort : result order is correct. But if I use 2 sorts, results are not order correctly. Have you got any ideas, why ?
As #Neil Lunn says :
They don't sort correctly because you renamed the fields in $project. So it should be { $sort: { state: 1, date: 1 } }