When I use a multiple sorting in aggregate method with mongo, results aren't sorting in the right way. This is my query :
db.MyCollection.aggregate(
{
"$unwind": "$objects"
},
{
"$lookup": {
"from": "CollectionA",
"localField": "objects.itemId",
"foreignField": "_id",
"as": "itemOne"
}
},
{
"$lookup": {
"from": "CollectionB",
"localField": "user_id",
"foreignField": "id",
"as": "users"
}
},
{
"$lookup": {
"from": "CollectionC",
"localField": "objects.itemName",
"foreignField": "name",
"as": "itemTwo"
}
},
{
"$addFields": {
"item": {
"$arrayElemAt": [
"$itemOne",
0
]
},
"user": {
"$arrayElemAt": [
"$users",
0
]
},
"itemP": {
"$arrayElemAt": [
"$itemTwo",
0
]
}
}
},
{
"$addFields": {
"itemName": {
"$ifNull": [
"$item.name",
"$objects.itemName"
]
},
"userName": {
"$concat": [
"$user.firstname",
" ",
"$user.lastname"
]
}
}
},
{
"$match": {
"client_id": 2
}
},
{
"$skip": 1
},
{
"$limit": 10
},
{
"$project": {
"date": "$objects.date",
"state": "$objects.state"
}
},
{
"$sort": {
"objects.state": 1,
"objects.date": 1,
}
}
)
To precise: "date" field is Date type and "state" field is number type.
If I use only one sort : result order is correct. But if I use 2 sorts, results are not order correctly. Have you got any ideas, why ?
As #Neil Lunn says :
They don't sort correctly because you renamed the fields in $project. So it should be { $sort: { state: 1, date: 1 } }
Related
I'm trying to get aggregations with same aggregation pipeline including $match and $group operations from multiple collections.
For example,
with a users collection and collections of questions, answers and comments where every document has authorId and created_at field,
db = [
'users': [{ _id: 123 }, { _id: 456} ],
'questions': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T00:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T00:00:00Z') },
],
'answers': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-05T08:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-01T08:00:00Z') },
],
'comments': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T16:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T16:00:00Z') },
],
]
I want to get counts of documents from each collections with created_at between a given range and grouped by authorId.
A desired aggregation result may look like below. The _ids here are ObjectIds of documents in users collection.
\\ match: { createdAt: { $gt: ISODate('2022-09-03T00:00:00Z) } }
[
{ _id: ObjectId('123'), questionCount: 0, answerCount: 1, commentCount: 0 },
{ _id: ObjectId('456'), questionCount: 1, answerCount: 0, commentCount: 1 }
]
Currently, I am running aggregation below for each collection, combining the results in the backend service. (I am using Spring Data MongoDB Reactive.) This seems very inefficient.
db.collection.aggregate([
{ $match: {
created_at: { $gt: ISODate('2022-09-03T00:00:00Z') }
}},
{ $group : {
_id: '$authorId',
count: {$sum: 1}
}}
])
How can I get the desired result with one aggregation?
I thought $unionWith or $lookup may help but I'm stuck here.
You can try something like this, using $lookup, here we join users, with all the three collections one-by-one, and then calculate the count:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "comments"
}
},
{
"$project": {
"questionCount": {
"$size": "$questions"
},
"answersCount": {
"$size": "$answers"
},
"commentsCount": {
"$size": "$comments"
}
}
}
])
Playground link. In the above query, we use pipelined form of $lookup, to perform join on some custom logic. Learn more about $lookup here.
Another way is this, perform normal lookup and then filter out the elements:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"localField": "_id",
"foreignField": "authorId",
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"localField": "_id",
"foreignField": "authorId",
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"localField": "_id",
"foreignField": "authorId",
"as": "comments"
}
},
{
"$project": {
questionCount: {
"$size": {
"$filter": {
"input": "$questions",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
answerCount: {
"$size": {
"$filter": {
"input": "$answers",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
commentsCount: {
"$size": {
"$filter": {
"input": "$comments",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
}
}
}
])
Playground link.
I have these collections:
users
{
_id: "userId1",
// ...
tracks: ["trackId1", "trackId2"],
};
tracks
{
_id: "trackId1",
// ...
creatorId: "userId1",
categoryId: "categoryId1"
}
categories
{
_id: "categoryId1",
// ...
tracks: ["trackId1", "trackId15", "trackId20"],
};
by using the following code, I am able to get a track by its ID and add the creator
tracks.aggregate([
{
$match: { _id: ObjectId(trackId) },
},
{
$lookup: {
let: { userId: { $toObjectId: "$creatorId" } },
from: "users",
pipeline: [{ $match: { $expr: { $eq: ["$_id", "$$userId"] } } }],
as: "creator",
},
},
{ $limit: 1 },
])
.toArray();
Response:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
// ...
"tracks": [
"trackId5",
"trackId10",
"trackId65"
]
}
}
but what I am struggling with is that I want the creator.tracks to aggregate also returning the tracks by their ID (e.g up to last 5), and also to get the last 5 tracks from the categoryId
expected result:
"track": {
"_id": "trackId1",
// ...
"categoryId": "categoryId1",
"creatorId": "userId1",
"creator": {
"_id": "userId1",
"tracks": [
{
"_id": "trackId5",
// the rest object without the creator
},
{
"_id": "trackId10",
// the rest object without the creator
},
{
"_id": "trackId65",
// the rest object without the creator
},
]
},
// without trackId1 which is the one that is being viewed
"relatedTracks": [
{
"_id": "trackId15",
// the rest object without the creator
},
{
"_id": "trackId20",
// the rest object without the creator
},
]
}
I would appreciate any explanation/help to understand what is the best one to do it and still keep the good performance
Query
start from a track
join with users using the trackId get all the tracks of the creator
(creator-tracks)
join with categories using the categoryId to get all the tracks of the category (related tracks)
remove from related-tracks the tracks of the creator
take the last 5 from both using $slice (creator-tracks and related-tracks)
*i added 2 extra lookups to get all info of the tracks, its empty arrays because i dont have enough data(i have only trackId1), with all the data it will work
PlayMongo
db.tracks.aggregate([
{
"$match": {
"_id": "trackId1"
}
},
{
"$lookup": {
"from": "users",
"localField": "creatorId",
"foreignField": "_id",
"as": "creator-tracks"
}
},
{
"$set": {
"creator-tracks": {
"$arrayElemAt": [
"$creator-tracks.tracks",
0
]
}
}
},
{
"$lookup": {
"from": "categories",
"localField": "categoryId",
"foreignField": "_id",
"as": "related-tracks"
}
},
{
"$set": {
"related-tracks": {
"$arrayElemAt": [
"$related-tracks.tracks",
0
]
}
}
},
{
"$set": {
"related-tracks": {
"$filter": {
"input": "$related-tracks",
"cond": {
"$not": [
{
"$in": [
"$$this",
"$creator-tracks"
]
}
]
}
}
}
}
},
{
"$set": {
"creator-tracks": {
"$slice": [
{
"$filter": {
"input": "$creator-tracks",
"cond": {
"$ne": [
"$$this",
"$_id"
]
}
}
},
-5
]
}
}
},
{
"$set": {
"related-tracks": {
"$slice": [
"$related-tracks",
-5
]
}
}
},
{
"$lookup": {
"from": "tracks",
"localField": "creator-tracks",
"foreignField": "_id",
"as": "creator-tracks-all-info"
}
},
{
"$lookup": {
"from": "tracks",
"localField": "related-tracks",
"foreignField": "_id",
"as": "related-tracks-all-info"
}
}
])
Background
I have a collection Items with documents such as
{
"_id": "5d9e3a5ced27230f68032e21",
... more fields
"foos": [
{
"_id": "5d9e3a5ced27230f68032e25",
... more fields
"bars": [
"5d9dab461bbb4db66db41f93"
],
},
{
"id": "5d9e3a5ced27230f68032e24",
... more fields
"bars": [
"5d9dab461bbb4db66db41f93",
"5d9e3a23ed27230f68032e1a"
]
}
]
}
with bars referring to another collection Bars.
Goal
I'd like to get a list of all documents (with all their fields) in Items, but with bars resolved to the document in Bars.
Small Catch
I want to be able to create a generic function to which I simply pass the path to resolve (e.g. foos.bars) and the collection from which to resolve (Bars) so that I can use it with different collections and arbitrary levels of nesting.
Initial Approach
I've found a rather complicated way to do this for my example, but before I generalize it, I'd like to know whether there really isn't a simpler way. Input is highly appreciated!
Here's what I've got:
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$unwind": {
"path": "$foos.bars"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$unwind": {
"path": "$foos.bars"
}
},
{
"$group": {
"_id": {
"id": "$_id",
"foo_index": "$foos_index"
},
"foos": {
"$first": "$foos"
},
"bars": {
"$push": "$foos.bars"
}
}
},
{
"$addFields": {
"foos": {
"$mergeObjects": [
"$foos",
{
"bars": "$bars"
}
]
}
}
},
{
"$group": {
"_id": "$_id.id",
"foos": {
"$push": "$foos"
}
}
},
{
"$lookup": {
"from": "Items",
"localField": "_id",
"foreignField": "_id",
"as": "original_doc"
}
},
{
"$unwind": {
"path": "$original_doc"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$original_doc",
{
"foos": "$foos"
}
]
}
}
}
]
Update: First Iteration
I've realized I don't need the "leaf" level unwound, so I've now got a simplified version (but for deeper nesting, I'll still need what I had before, right?):
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$group": {
"_id": "$_id",
"foos": {
"$push": "$foos"
}
}
},
{
"$lookup": {
"from": "Items",
"localField": "_id",
"foreignField": "_id",
"as": "original_doc"
}
},
{
"$unwind": {
"path": "$original_doc"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$original_doc",
{
"foos": "$foos"
}
]
}
}
}
]
Maybe someone still finds something better, but until then, if someone comes across this, the following seems reasonable to me by now:
[
{
"$unwind": {
"path": "$foos",
"includeArrayIndex": "foos_index"
}
},
{
"$lookup": {
"from": "Bars",
"localField": "foos.bars",
"foreignField": "_id",
"as": "foos.bars"
}
},
{
"$group": {
"_id": "$_id",
"savepoint": {
"$first": "$$ROOT"
},
"foos": {
"$push": "$foos"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$savepoint",
{
"foos": "$foos"
}
]
}
}
}
]
I want all the values returned from "Items" and when there is a match I want "isActive" to be true.
exports.listUserItems = function (req, res) {
// Aggregate results
User.aggregate([{
"$match": {
"username": req.params.username
}
}, {
"$lookup": {
"from": "Items",
"localField": "itemIds",
"foreignField": "_id",
"as": "items"
}
}, {
"$unwind": {
"path": "$items"
}
}, {
"$project": {
"item": "$items.item_name",
"isActive": '1',
"_id": 0
}
}], (err, result) => res.json(result));
};
What is the best way to go about accomplishing this?
I was going to return all the items and users items seperately, then compare them and object.value them etc. etc... that seems like overkill. Can it be done on the model side?
I'm unable to post the document structure because stackoverflow doesn't let me, but you should get the idea.
Edit:
User Document
{
"username" : "anonuser",
"items" : [
ObjectId("5ba8345f1e56fe8e6caaaa07"),
ObjectId("5ba706d64e82292e72e9ae71")
]
}
Then I have an "Items" collection which has 3 documents like this.
{
"_id" : ObjectId("5ba706d64e82292e72e9ae71"),
"item_name" : "Salary"
}
My expected json api output is to be.
[{"_id":"5ba706d64e82292e72e9ae71","item_name":"Salary","isActive":true},{"_id":"5ba8345f1e56fe8e6caaaa07","item_name":"Fulltime","isActive":true},{"_id":"5ba9af6c1e56fe8e6cab521e","item_name":"Advisor","isActive":false}]
You can try below aggregation
Items.aggregate([
{ "$lookup": {
"from": "users",
"let": { "itemsId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$in": ["$$itemsId", "$items"] }}},
{ "$project": { "isActive": { "$literal": true }}}
],
"as": "items"
}},
{ "$addFields": {
"isActive": {
"$ifNull": [{ "$arrayElemAt": ["$items.isActive", 0] }, { "$literal": false }]
}
}},
{ "$project": { "items": 0 }}
])
Here i am try to get aggregated result from my challenge collection with challengeusers and challengeusers has the user_id and i used $lookup to join the users too.
When i use this query, i am getting following output.
"challenges": [
{
"_id": "5b7bf6fd87ec106308d7e3c1",
"start_date": "2018-08-09T12:40:21.470Z",
"end_date": "2018-08-05T12:40:21.470Z",
"challnegedusers": [
{
"chalenge_id": "5b7bf6fd87ec106308d7e3c1",
"user_id": "5b75623db457045e3bb12e0a",
"status": 1
},
{
"user_id": "5b75643c0a97791bcc9ed64c",
"status": 1
},
{
"user_id": "5b756144b457045e3bb12e08",
"status": 1
}
],
"users": [
{
"_id": "5b756144b457045e3bb12e08",
"first_name": "XYZ"
},
{
"_id": "5b75623db457045e3bb12e0a",
"first_name": "BAC"
},
{
"_id": "5b75643c0a97791bcc9ed64c",
"first_name": "YTA"
}
]
}
]
But i want the challengeusers and users to merge in a single object.
Most of all i want the status of challengeusers with user's info.
expected output:
"challenges": [
{
"_id": "5b7bf6fd87ec106308d7e3c1",
"start_date": "2018-08-09T12:40:21.470Z",
"end_date": "2018-08-05T12:40:21.470Z",
"challnegedusers": [
{
"user_id": "5b75623db457045e3bb12e0a",
"status": 1,
"first_name": "BAC"
},
{
"user_id": "5b75643c0a97791bcc9ed64c",
"status": 1,
"first_name": "YTA"
},
{
"user_id": "5b756144b457045e3bb12e08",
"status": 1,
"first_name": "XYZ"
}
]
}
]
MongoDB Aggregate Query that i am using.
let challenges = await ChallengeModel.aggregate([
{ $match: criteria },
{ $lookup: {
from: 'challengeusers',
localField: '_id',
foreignField: 'challenge_id',
as: 'challnegedusers'
} },
{ $lookup: {
from: 'appusers',
localField: 'challnegedusers.user_id',
foreignField: '_id',
as: 'users'
} },
{ $sort: {created_at: -1}}
]);
You can try below aggregation in mongodb 3.6
ChallengeModel.aggregate([
{ "$match": criteria },
{ "$lookup": {
"from": "challengeusers",
"let": { "challengeusersId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$challenge_id", "$$challengeusersId" ] } } },
{ "$lookup": {
"from": "appusers",
"let": { "user_id": "$user_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$user_id" ] } } },
],
"as": "user"
}},
{ "$unwind": "$user" },
{ "$addFields": { "first_name": "$user.first_name" }},
{ "$project": { "user": 0 }}
],
"as": "challnegedusers"
}}
])
And with your approach you can try this
ChallengeModel.aggregate([
{ "$match": criteria },
{ "$lookup": {
"from": "challengeusers",
"localField": "_id",
"foreignField": "challenge_id",
"as": "challnegedusers"
}},
{ "$unwind": "challnegedusers" },
{ "$lookup": {
"from": "appusers",
"localField": "challnegedusers.user_id",
"foreignField": "_id",
"as": "challnegedusers.user"
}},
{ "$unwind": "challnegedusers.user" },
{ "$addFields": { "challnegedusers.first_name": "$challnegedusers.user.first_name" }},
{ "$sort": { "created_at": -1 }},
{ "$group": {
"_id": "$_id",
"start_date": { "$first": "$start_date" }
"end_date": { "$first": "$end_date" },
"challnegedusers": { "$push": "$challnegedusers" }
}}
])