MongoDB Aggregation: "Populate" (resolve) Ref in Nested Array - mongodb

Background
I have a collection Items with documents such as
{
"_id": "5d9e3a5ced27230f68032e21",
... more fields
"foos": [
{
"_id": "5d9e3a5ced27230f68032e25",
... more fields
"bars": [
"5d9dab461bbb4db66db41f93"
],
},
{
"id": "5d9e3a5ced27230f68032e24",
... more fields
"bars": [
"5d9dab461bbb4db66db41f93",
"5d9e3a23ed27230f68032e1a"
]
}
]
}
with bars referring to another collection Bars.
Goal
I'd like to get a list of all documents (with all their fields) in Items, but with bars resolved to the document in Bars.
Small Catch
I want to be able to create a generic function to which I simply pass the path to resolve (e.g. foos.bars) and the collection from which to resolve (Bars) so that I can use it with different collections and arbitrary levels of nesting.
Initial Approach
I've found a rather complicated way to do this for my example, but before I generalize it, I'd like to know whether there really isn't a simpler way. Input is highly appreciated!
Here's what I've got:
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$unwind": {
"path": "$foos.bars"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$unwind": {
"path": "$foos.bars"
}
},
{
"$group": {
"_id": {
"id": "$_id",
"foo_index": "$foos_index"
},
"foos": {
"$first": "$foos"
},
"bars": {
"$push": "$foos.bars"
}
}
},
{
"$addFields": {
"foos": {
"$mergeObjects": [
"$foos",
{
"bars": "$bars"
}
]
}
}
},
{
"$group": {
"_id": "$_id.id",
"foos": {
"$push": "$foos"
}
}
},
{
"$lookup": {
"from": "Items",
"localField": "_id",
"foreignField": "_id",
"as": "original_doc"
}
},
{
"$unwind": {
"path": "$original_doc"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$original_doc",
{
"foos": "$foos"
}
]
}
}
}
]
Update: First Iteration
I've realized I don't need the "leaf" level unwound, so I've now got a simplified version (but for deeper nesting, I'll still need what I had before, right?):
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$group": {
"_id": "$_id",
"foos": {
"$push": "$foos"
}
}
},
{
"$lookup": {
"from": "Items",
"localField": "_id",
"foreignField": "_id",
"as": "original_doc"
}
},
{
"$unwind": {
"path": "$original_doc"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$original_doc",
{
"foos": "$foos"
}
]
}
}
}
]

Maybe someone still finds something better, but until then, if someone comes across this, the following seems reasonable to me by now:
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$group": {
"_id": "$_id",
"savepoint": {
"$first": "$$ROOT"
},
"foos": {
"$push": "$foos"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$savepoint",
{
"foos": "$foos"
}
]
}
}
}
]

Related

MongoDB multiple/nested aggregations

I have these collections:
users
{
_id: "userId1",
// ...
tracks: ["trackId1", "trackId2"],
};
tracks
{
_id: "trackId1",
// ...
creatorId: "userId1",
categoryId: "categoryId1"
}
categories
{
_id: "categoryId1",
// ...
tracks: ["trackId1", "trackId15", "trackId20"],
};
by using the following code, I am able to get a track by its ID and add the creator
tracks.aggregate([
{
$match: { _id: ObjectId(trackId) },
},
{
$lookup: {
let: { userId: { $toObjectId: "$creatorId" } },
from: "users",
pipeline: [{ $match: { $expr: { $eq: ["$_id", "$$userId"] } } }],
as: "creator",
},
},
{ $limit: 1 },
])
.toArray();
Response:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
// ...
"tracks": [
"trackId5",
"trackId10",
"trackId65"
]
}
}
but what I am struggling with is that I want the creator.tracks to aggregate also returning the tracks by their ID (e.g up to last 5), and also to get the last 5 tracks from the categoryId
expected result:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
"tracks": [
{
"_id": "trackId5",
// the rest object without the creator
},
{
"_id": "trackId10",
// the rest object without the creator
},
{
"_id": "trackId65",
// the rest object without the creator
},
]
},
// without trackId1 which is the one that is being viewed
"relatedTracks": [
{
"_id": "trackId15",
// the rest object without the creator
},
{
"_id": "trackId20",
// the rest object without the creator
},
]
}
I would appreciate any explanation/help to understand what is the best one to do it and still keep the good performance
Query
start from a track
join with users using the trackId get all the tracks of the creator
(creator-tracks)
join with categories using the categoryId to get all the tracks of the category (related tracks)
remove from related-tracks the tracks of the creator
take the last 5 from both using $slice (creator-tracks and related-tracks)
*i added 2 extra lookups to get all info of the tracks, its empty arrays because i dont have enough data(i have only trackId1), with all the data it will work
PlayMongo
db.tracks.aggregate([
{
"$match": {
"_id": "trackId1"
}
},
{
"$lookup": {
"from": "users",
"localField": "creatorId",
"foreignField": "_id",
"as": "creator-tracks"
}
},
{
"$set": {
"creator-tracks": {
"$arrayElemAt": [
"$creator-tracks.tracks",
0
]
}
}
},
{
"$lookup": {
"from": "categories",
"localField": "categoryId",
"foreignField": "_id",
"as": "related-tracks"
}
},
{
"$set": {
"related-tracks": {
"$arrayElemAt": [
"$related-tracks.tracks",
0
]
}
}
},
{
"$set": {
"related-tracks": {
"$filter": {
"input": "$related-tracks",
"cond": {
"$not": [
{
"$in": [
"$$this",
"$creator-tracks"
]
}
]
}
}
}
}
},
{
"$set": {
"creator-tracks": {
"$slice": [
{
"$filter": {
"input": "$creator-tracks",
"cond": {
"$ne": [
"$$this",
"$_id"
]
}
}
},
-5
]
}
}
},
{
"$set": {
"related-tracks": {
"$slice": [
"$related-tracks",
-5
]
}
}
},
{
"$lookup": {
"from": "tracks",
"localField": "creator-tracks",
"foreignField": "_id",
"as": "creator-tracks-all-info"
}
},
{
"$lookup": {
"from": "tracks",
"localField": "related-tracks",
"foreignField": "_id",
"as": "related-tracks-all-info"
}
}
])

How to generate object ids when $unwinding with aggregate in mongodb

I'm having the following query
db.getCollection('matches').aggregate([{
"$lookup": {
"from": "player",
"localField": "players.account_id",
"foreignField": "account_id",
"as": "players2"
}
}, {
"$addFields": {
"players": {
"$map": {
"input": "$players",
"in": {
"$mergeObjects": [
"$$this", {
"$arrayElemAt": [
"$players2", {
"$indexOfArray": [
"$players.account_id",
"$$this.account_id"
]
}
]
}
]
}
}
}
}
}, {
"$set": {
"players.match_id": "$match_id",
"players.radiant_win": "$radiant_win"
}
}, {
"$unwind": "$players"
}, {
"$replaceRoot": {
"newRoot": "$players"
}
}, {
"$project": {
"_id": 1,
"match_id": 1,
"account_id": 1,
"hero_id": 1,
"radiant_win": 1
}
}
])
which is supposed to match an inner array with another collection, merge the objects in the arrays by the matching and then unwrap ($unwind) the array into a new collection.
Unfortunately, I'm getting duplicate Object ids which is sort of a problem for when I want to export this collection.
How can I ensure unique Object_Ids for the aggregation?
Thanks in advance!

Dynamic from in $lookup

I am trying to see if i can change the from in the $lookup or rearrange my query to somehow retrieve from three potential collections. So far i have managed to set up the query like so:
const search = db.collection("search");
search.aggregate([
{
'$match': {
'id_int': 0
}
}, {
'$project': {
'_id': 0,
'collection': 1,
'id_int': 1
}
}, {
'$lookup': {
'from': 'arxiv',
'localField': 'id_int',
'foreignField': 'id_int',
'as': 'arxiv'
}
}
], function(err, cursor) ... )
The $match and then $project pipeline stages return a result with the following properties:
collection:"arxiv"
id_int:0
The collection value will always be one of three arxiv, crossref or pmc_test. Therefore i'd like my $lookup from to use this property value programmatically as opposed having it hard coded.
'$lookup': {
'from': 'arxiv' or 'crossref' or 'pmc_test', // Dynamic based on result
...
}
Thanks
Edit
id_int will get passed in and collection will not, thats why a query is made to the search collection.
Sadly this is not possible currently, there is an open feature request on it here so you can keep track of it if you wish.
Right now thought you have two options.
Split your call into 2 queries and add that bit of logic to your code, which is what i personally recommend.
Use this aggregate which looks up all 3 collections:
search.aggregate([
{
'$match': {
'id_int': 0
}
},
{
'$project': {
'_id': 0,
'collection': 1,
'id_int': 1
}
},
{
"$facet": {
"arxiv": [
{
"$lookup": {
"from": "arxiv",
"localField": "id_int",
"foreignField": "id_int",
"as": "arxiv"
}
}
],
"crossref": [
{
"$lookup": {
"from": "crossref",
"localField": "id_int",
"foreignField": "id_int",
"as": "crossref"
}
}
],
"pmc_test": [
{
"$lookup": {
"from": "pmc_test",
"localField": "id_int",
"foreignField": "id_int",
"as": "pmc_test"
}
}
]
}
},
{
"$addFields": {
"newRoot": [
{
"k": "$collection",
"v": {
"$cond": [
{
"$eq": [
"$collection",
"arxiv"
]
},
"$arxiv",
{
"$cond": [
{
"$eq": [
"$collection",
"crossref"
]
},
"$crossref",
"$pmc_test"
]
}
]
}
},
{
"k": "collection", "v": "$collection"
},
{
"k": "id_int", "v": "$id_int"
}
]
}
},
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": "$newRoot"
}
}
}
}
])
As you might have noticed the pipeline isn't exactly sexy, if you don't care about the field name in the end result you can dump most of it.

Merge two $lookup collections in MongoDB

Here i am try to get aggregated result from my challenge collection with challengeusers and challengeusers has the user_id and i used $lookup to join the users too.
When i use this query, i am getting following output.
"challenges": [
{
"_id": "5b7bf6fd87ec106308d7e3c1",
"start_date": "2018-08-09T12:40:21.470Z",
"end_date": "2018-08-05T12:40:21.470Z",
"challnegedusers": [
{
"chalenge_id": "5b7bf6fd87ec106308d7e3c1",
"user_id": "5b75623db457045e3bb12e0a",
"status": 1
},
{
"user_id": "5b75643c0a97791bcc9ed64c",
"status": 1
},
{
"user_id": "5b756144b457045e3bb12e08",
"status": 1
}
],
"users": [
{
"_id": "5b756144b457045e3bb12e08",
"first_name": "XYZ"
},
{
"_id": "5b75623db457045e3bb12e0a",
"first_name": "BAC"
},
{
"_id": "5b75643c0a97791bcc9ed64c",
"first_name": "YTA"
}
]
}
]
But i want the challengeusers and users to merge in a single object.
Most of all i want the status of challengeusers with user's info.
expected output:
"challenges": [
{
"_id": "5b7bf6fd87ec106308d7e3c1",
"start_date": "2018-08-09T12:40:21.470Z",
"end_date": "2018-08-05T12:40:21.470Z",
"challnegedusers": [
{
"user_id": "5b75623db457045e3bb12e0a",
"status": 1,
"first_name": "BAC"
},
{
"user_id": "5b75643c0a97791bcc9ed64c",
"status": 1,
"first_name": "YTA"
},
{
"user_id": "5b756144b457045e3bb12e08",
"status": 1,
"first_name": "XYZ"
}
]
}
]
MongoDB Aggregate Query that i am using.
let challenges = await ChallengeModel.aggregate([
{ $match: criteria },
{ $lookup: {
from: 'challengeusers',
localField: '_id',
foreignField: 'challenge_id',
as: 'challnegedusers'
} },
{ $lookup: {
from: 'appusers',
localField: 'challnegedusers.user_id',
foreignField: '_id',
as: 'users'
} },
{ $sort: {created_at: -1}}
]);
You can try below aggregation in mongodb 3.6
ChallengeModel.aggregate([
{ "$match": criteria },
{ "$lookup": {
"from": "challengeusers",
"let": { "challengeusersId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$challenge_id", "$$challengeusersId" ] } } },
{ "$lookup": {
"from": "appusers",
"let": { "user_id": "$user_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$user_id" ] } } },
],
"as": "user"
}},
{ "$unwind": "$user" },
{ "$addFields": { "first_name": "$user.first_name" }},
{ "$project": { "user": 0 }}
],
"as": "challnegedusers"
}}
])
And with your approach you can try this
ChallengeModel.aggregate([
{ "$match": criteria },
{ "$lookup": {
"from": "challengeusers",
"localField": "_id",
"foreignField": "challenge_id",
"as": "challnegedusers"
}},
{ "$unwind": "challnegedusers" },
{ "$lookup": {
"from": "appusers",
"localField": "challnegedusers.user_id",
"foreignField": "_id",
"as": "challnegedusers.user"
}},
{ "$unwind": "challnegedusers.user" },
{ "$addFields": { "challnegedusers.first_name": "$challnegedusers.user.first_name" }},
{ "$sort": { "created_at": -1 }},
{ "$group": {
"_id": "$_id",
"start_date": { "$first": "$start_date" }
"end_date": { "$first": "$end_date" },
"challnegedusers": { "$push": "$challnegedusers" }
}}
])

Multiple sorting in aggregate with mongo

When I use a multiple sorting in aggregate method with mongo, results aren't sorting in the right way. This is my query :
db.MyCollection.aggregate(
{
"$unwind": "$objects"
},
{
"$lookup": {
"from": "CollectionA",
"localField": "objects.itemId",
"foreignField": "_id",
"as": "itemOne"
}
},
{
"$lookup": {
"from": "CollectionB",
"localField": "user_id",
"foreignField": "id",
"as": "users"
}
},
{
"$lookup": {
"from": "CollectionC",
"localField": "objects.itemName",
"foreignField": "name",
"as": "itemTwo"
}
},
{
"$addFields": {
"item": {
"$arrayElemAt": [
"$itemOne",
0
]
},
"user": {
"$arrayElemAt": [
"$users",
0
]
},
"itemP": {
"$arrayElemAt": [
"$itemTwo",
0
]
}
}
},
{
"$addFields": {
"itemName": {
"$ifNull": [
"$item.name",
"$objects.itemName"
]
},
"userName": {
"$concat": [
"$user.firstname",
" ",
"$user.lastname"
]
}
}
},
{
"$match": {
"client_id": 2
}
},
{
"$skip": 1
},
{
"$limit": 10
},
{
"$project": {
"date": "$objects.date",
"state": "$objects.state"
}
},
{
"$sort": {
"objects.state": 1,
"objects.date": 1,
}
}
)
To precise: "date" field is Date type and "state" field is number type.
If I use only one sort : result order is correct. But if I use 2 sorts, results are not order correctly. Have you got any ideas, why ?
As #Neil Lunn says :
They don't sort correctly because you renamed the fields in $project. So it should be { $sort: { state: 1, date: 1 } }