MongoDB : near + lookup in aggregation query - mongodb

I would like to know if this is even possible.
Actually my query is : a near query on Users table + a forEach loop of find queries on UserSports table where user._id = userId of UserSports (it's like a join done in a foreach) :
//geonear query
db.users.find({
geopoint_mongo:
{$near:
{$geometry: {type: "Point", coordinates: [userGeopoint.lng, userGeopoint.lat]},
$maxDistance: maxDistance * 1000
}
}
},
{_id: 1, geopoint: 1, url_avatar: 1, bool_coach: 1, bool_ambassador: 1, gender: 1, bool_handicap: 1}
).toArray(function (err, usersFound) {
if (usersFound && usersFound.length > 0) {
var count = 0;
var total = usersFound.length;
var users = [];
// start the loop
usersFound.forEach(function (user) {
// find with condition query
db.userSports.find({
userId: user._id,
sportId: {$in: sportIds}
},
{_id: 1, user_level: 1, sportId: 1}
).toArray(function (err, userSports) {
count++;
if (userSports && userSports.length > 0) {
user.userSports = userSports;
users.push(user);
}
if (count === total) {
console.log('Done');
}
});
});
}
});
I heard you can use $lookup and aggregate to do the join, because actually my queries take too much time - but I wasn't able to find any example of this.
At least, is there a solution to run this code faster?

You can use below aggregation query with $lookup stage in 3.4 version.
db.users.aggregate([
{"$match":{ // Match to find users in location
"geopoint_mongo":{
"$near":{
"$geometry":{"type":"Point","coordinates":[userGeopoint.lng, userGeopoint.lat]},
"$maxDistance":maxDistance*1000
}
}
}},
{"$project":{ // Keep fields from users collection
"geopoint":1,"url_avatar":1,"bool_coach":1,"bool_ambassador":1,"gender":1,"bool_handicap":1
}},
{"$lookup":{ // Join user with user sports collection
"from":"userSports",
"localField":"_id",
"foreignField":"userId",
"as":"sports"
}},
{"$unwind":"$sports"}, // $unwind + $match to get matching sports
{"$match":{"sports.sportId":{"$in":sportIds}}},
{"$addFields":{"sports._id":1,"sports.user_level":1,"sports.sportId":1}}, // Keep fields from user sports collection
{"$group":{
"_id":"$_id",
"geopoint":{"$first":"$geopoint"},// Keep fields from users collection
"bool_coach":{"$first":"$bool_coach"},
"bool_ambassador":{"$first":"$bool_ambassador"},
"gender":{"$first":"$gender"},
"bool_handicap":{"$first":"$bool_handicap"},
"sports":{"$push":"$sports"} // Collect matching sports
}}
])

In such a aggregation query we have to consider The aggregation pipeline. Bellow code will help you to identify syntax for this query
db.driversLocation.aggregate([
{
$geoNear: {
near: {type: "Point", coordinates:[parseFloat(req.query.latitude), parseFloat(req.query.longitude )]},
distanceField: "dist.calculated",
maxDistance:10000,
spherical: true
}
},
{
$lookup:
{
from: "drivers",
localField: "driverId",
foreignField: "oid",
as: "mas"
},
},{
$match:{
"mas.vehicle.bodyType": "lorry"
}
},
]

Related

Mongoose updateMany based on result of previous query

I have two collections energyOffers and energyOfferLogs. When a user deactivated their account I'm looking for all the remaining active energyOffers where the entity of the user is in the assignees array, not in the declinedEntities array and the offerValidTill date is less than the current timestamp.
const [energyOffers] = await EnergyOffer.find([{
'assignees.id': entityID,
declinedEntities: {
$ne: leadID
},
offerValidTill: { $gt: Date.now() }
}], { session });
Based on these energyOffers I need to update the corresponding energyOfferLogs. I can find these with { entityID: entityID, 'offer.offerID': offer._id } but how can I look for all these offers in the same query?
If I loop through the energyOffers I will have to perform multiple updates while my guess is that this can be done in one updateMany. I was looking into the $lookup aggregate operator (https://www.mongodb.com/docs/v6.0/reference/operator/aggregation/lookup/) but it seems that the EnergyOffer find query is too complex to perform in this.
await EnergyOfferLog.updateMany({ ??? }, {
$set: {
'offer.action': 'declined',
'offer.action_date': Math.floor(Date.now()),
'offer.action_user': user.first_name,
'offer.action_user_id': userID
}
});
Get all offer ids from the first query, e.g.
let ids = energyOffers.map(o => o._id)
Use $in to match logs for all matching offers:
await EnergyOfferLog.updateMany({ entityID: entityID, 'offer.offerID': {$in: ids} }, {
$set: {
'offer.action': 'declined',
'offer.action_date': Math.floor(Date.now()),
'offer.action_user': user.first_name,
'offer.action_user_id': userID
}
});
If you want to do it with one query only, it is not complex. You can use $lookup with a pipeline for this:
Start with your $match query on the energyOffers collection
Use '$lookupto get the matchingenergyOfferLogs`
Clean the pipeline to contain only the energyOfferLogs docs
Perform the $set
Use $merge to save it back to energyOfferLogs collection
db.energyOffers.aggregate([
{$match: {
"assignees.id": entityID,
declinedEntities: {$ne: leadID},
offerValidTill: {$gt: Date.now()}
}
},
{$lookup: {
from: "energyOfferLogs",
let: {offerId: "$_id"},
pipeline: [
{$match: {
$and: [
{entityID: entityID},
{$expr: {$eq: ["$offer.offerID", "$$offerId"]}}
]
}
}
],
as: "energyOfferLogs"
}
},
{$unwind: "$energyOfferLogs"},
{$replaceRoot: {newRoot: "$energyOfferLogs"}},
{$set: {
"offer.action": "declined",
"offer.action_date": Math.floor(Date.now()),
"offer.action_user": user.first_name,
"offer.action_user_id": userID
}
},
{$merge: {into: "$energyOfferLogs"}}
])
See how it works on the playground example
Answer was updated according to a remark by #Alex_Blex

Mongodb Aggregation limit by percentage of number of results

I have a collection of orders as:
Orders:
{
//some typical order fields
cancelationMoment: {
type: Date
},
consumer: {
type: Schema.Types.ObjectId,
required: 'consumer id required'
}
}
I want to obtain the consumers who have more cancelled orders (top 10%).
My current implementation is:
db.orders.aggregate([
{$match:{ cancelationMoment: { $exists: true } }},
{"$group" : {_id:"$consumer", ordersCanceled:{$sum:1}}
},
{$sort:{"ordersCanceled":-1}},
{ $limit: 2}
]);
But I want to obtain not just the top 2, but the top 10%. Is this possible?
Thanks.
You could save a variable with the range of the collection and then manipulate as you needed.
var total = db.orders.find({ }).count();
db.orders.aggregate([
{
$limit: total * 0.1
}
]);
Update:
Well using the stage "$count" will affect the projection of the result, but you can directly use the previous code into your aggregation stages.
db.orders.aggregate([
{
$limit: db.orders.find({ }).count() * 0.1
}
]);

Poor lookup aggregation performance

I have two collections
Posts:
{
"_Id": "1",
"_PostTypeId": "1",
"_AcceptedAnswerId": "192",
"_CreationDate": "2012-02-08T20:02:48.790",
"_Score": "10",
...
"_OwnerUserId": "6",
...
},
...
and users:
{
"_Id": "1",
"_Reputation": "101",
"_CreationDate": "2012-02-08T19:45:13.447",
"_DisplayName": "Geoff Dalgas",
...
"_AccountId": "2"
},
...
and I want to find users who write between 5 and 15 posts.
This is how my query looks like:
db.posts.aggregate([
{
$lookup: {
from: "users",
localField: "_OwnerUserId",
foreignField: "_AccountId",
as: "X"
}
},
{
$group: {
_id: "$X._AccountId",
posts: { $sum: 1 }
}
},
{
$match : {posts: {$gte: 5, $lte: 15}}
},
{
$sort: {posts: -1 }
},
{
$project : {posts: 1}
}
])
and it works terrible slow. For 6k users and 10k posts it tooks over 40 seconds to get response while in relational database I get response in a split second.
Where's the problem? I'm just getting started with mongodb and it's quite possible that I messed up this query.
from https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/
foreignField Specifies the field from the documents in the from
collection. $lookup performs an equality match on the foreignField to
the localField from the input documents. If a document in the from
collection does not contain the foreignField, the $lookup treats the
value as null for matching purposes.
This will be performed the same as any other query.
If you don't have an index on the field _AccountId, it will do a full tablescan query for each one of the 10,000 posts. The bulk of the time will be spent in that tablescan.
db.users.ensureIndex("_AccountId", 1)
speeds up the process so it's doing 10,000 index hits instead of 10,000 table scans.
In addition to bauman.space's suggestion to put an index on the _accountId field (which is critical), you should also do your $match stage as early as possible in the aggregation pipeline (i.e. as the first stage). Even though it won't use any indexes (unless you index the posts field), it will filter the result set before doing the $lookup (join) stage.
The reason why your query is terribly slow is that for every post, it is doing a non-indexed lookup (sequential read) for every user. That's around 60m reads!
Check out the Pipeline Optimization section of the MongoDB Aggregation Docs.
First use $match then $lookup. $match filter the rows need to be examined to $lookup. It's efficient.
as long as you're going to group by user _AccountId, you should do the $group first by _OwnerUserId then lookup only after filtering accounts having 10<postsCount<15 this will reduce lookups:
db.posts.aggregate([{
$group: {
_id: "$_OwnerUserId",
postsCount: {
$sum: 1
},
posts: {
$push: "$$ROOT"
} //if you need to keep original posts data
}
},
{
$match: {
postsCount: {
$gte: 5,
$lte: 15
}
}
},
{
$lookup: {
from: "users",
localField: "_id",
foreignField: "_AccountId",
as: "X"
}
},
{
$unwind: "$X"
},
{
$sort: {
postsCount: -1
}
},
{
$project: {
postsCount: 1,
X: 1
}
}
])

Query and Update Mongo Document

The query below increases score by one.
db.people.findAndModify({
query: { name: "Andy" },
update: { $inc: { score: 1 } }
})
But, is it possible to do more than just increase the score. I would like to, increase the score and also compute avg_field for same document.
db.people.findAndModify({
query: { name: "Andy" },
update: { $inc: { score: 1 }, avg_field : {x divide by new score value} }
})
I might be able to use function to compute all that, but still that will not help inserting updated values. I would like to keep the operation atomic and hence trying to update in the same query.
Suggestions?
Maybe you could do it through aggregatioin, with operator $add and $divide as below. However, the aggregation does not update the document, so you should return the cursor from aggregation, then update the document one by one. Here are the sample codes.
// increase score than compute the avg_field, then return the cursor.
var cur = db.people.aggregate([
{$match: { name: "Andy" }},
{ "$project":
{
"_id": "$_id",
"score": {$add: ['$score', 1]}, // add score by 1
"avg_field": {$divide: ['$v1', {$add: ['$score', 1]}]} // compute the new avg_field
}
}
]);
// Iterate through results and update each people.
cur.forEach(function(doc) {
var doc = cur.next();
db.people.update({ _id: doc._id },
{ "$set": { avg_field: doc.avg_field, score: doc.score}});
});

Mongoose Aggregate pagination and total number [duplicate]

I am interested in optimizing a "pagination" solution I'm working on with MongoDB. My problem is straight forward. I usually limit the number of documents returned using the limit() functionality. This forces me to issue a redundant query without the limit() function in order for me to also capture the total number of documents in the query so I can pass to that to the client letting them know they'll have to issue an additional request(s) to retrieve the rest of the documents.
Is there a way to condense this into 1 query? Get the total number of documents but at the same time only retrieve a subset using limit()? Is there a different way to think about this problem than I am approaching it?
Mongodb 3.4 has introduced $facet aggregation
which processes multiple aggregation pipelines within a single stage
on the same set of input documents.
Using $facet and $group you can find documents with $limit and can get total count.
You can use below aggregation in mongodb 3.4
db.collection.aggregate([
{ "$facet": {
"totalData": [
{ "$match": { }},
{ "$skip": 10 },
{ "$limit": 10 }
],
"totalCount": [
{ "$group": {
"_id": null,
"count": { "$sum": 1 }
}}
]
}}
])
Even you can use $count aggregation which has been introduced in mongodb 3.6.
You can use below aggregation in mongodb 3.6
db.collection.aggregate([
{ "$facet": {
"totalData": [
{ "$match": { }},
{ "$skip": 10 },
{ "$limit": 10 }
],
"totalCount": [
{ "$count": "count" }
]
}}
])
No, there is no other way. Two queries - one for count - one with limit. Or you have to use a different database. Apache Solr for instance works like you want. Every query there is limited and returns totalCount.
MongoDB allows you to use cursor.count() even when you pass limit() or skip().
Lets say you have a db.collection with 10 items.
You can do:
async function getQuery() {
let query = await db.collection.find({}).skip(5).limit(5); // returns last 5 items in db
let countTotal = await query.count() // returns 10-- will not take `skip` or `limit` into consideration
let countWithConstraints = await query.count(true) // returns 5 -- will take into consideration `skip` and `limit`
return { query, countTotal }
}
Here's how to do this with MongoDB 3.4+ (with Mongoose) using $facets. This examples returns a $count based on the documents after they have been matched.
const facetedPipeline = [{
"$match": { "dateCreated": { $gte: new Date('2021-01-01') } },
"$project": { 'exclude.some.field': 0 },
},
{
"$facet": {
"data": [
{ "$skip": 10 },
{ "$limit": 10 }
],
"pagination": [
{ "$count": "total" }
]
}
}
];
const results = await Model.aggregate(facetedPipeline);
This pattern is useful for getting pagination information to return from a REST API.
Reference: MongoDB $facet
Times have changed, and I believe you can achieve what the OP is asking by using aggregation with $sort, $group and $project. For my system, I needed to also grab some user info from my users collection. Hopefully this can answer any questions around that as well. Below is an aggregation pipe. The last three objects (sort, group and project) are what handle getting the total count, then providing pagination capabilities.
db.posts.aggregate([
{ $match: { public: true },
{ $lookup: {
from: 'users',
localField: 'userId',
foreignField: 'userId',
as: 'userInfo'
} },
{ $project: {
postId: 1,
title: 1,
description: 1
updated: 1,
userInfo: {
$let: {
vars: {
firstUser: {
$arrayElemAt: ['$userInfo', 0]
}
},
in: {
username: '$$firstUser.username'
}
}
}
} },
{ $sort: { updated: -1 } },
{ $group: {
_id: null,
postCount: { $sum: 1 },
posts: {
$push: '$$ROOT'
}
} },
{ $project: {
_id: 0,
postCount: 1,
posts: {
$slice: [
'$posts',
currentPage ? (currentPage - 1) * RESULTS_PER_PAGE : 0,
RESULTS_PER_PAGE
]
}
} }
])
there is a way in Mongodb 3.4: $facet
you can do
db.collection.aggregate([
{
$facet: {
data: [{ $match: {} }],
total: { $count: 'total' }
}
}
])
then you will be able to run two aggregate at the same time
By default, the count() method ignores the effects of the
cursor.skip() and cursor.limit() (MongoDB docs)
As the count method excludes the effects of limit and skip, you can use cursor.count() to get the total count
const cursor = await database.collection(collectionName).find(query).skip(offset).limit(limit)
return {
data: await cursor.toArray(),
count: await cursor.count() // this will give count of all the documents before .skip() and limit()
};
It all depends on the pagination experience you need as to whether or not you need to do two queries.
Do you need to list every single page or even a range of pages? Does anyone even go to page 1051 - conceptually what does that actually mean?
Theres been lots of UX on patterns of pagination - Avoid the pains of pagination covers various types of pagination and their scenarios and many don't need a count query to know if theres a next page. For example if you display 10 items on a page and you limit to 13 - you'll know if theres another page..
MongoDB has introduced a new method for getting only the count of the documents matching a given query and it goes as follows:
const result = await db.collection('foo').count({name: 'bar'});
console.log('result:', result) // prints the matching doc count
Recipe for usage in pagination:
const query = {name: 'bar'};
const skip = (pageNo - 1) * pageSize; // assuming pageNo starts from 1
const limit = pageSize;
const [listResult, countResult] = await Promise.all([
db.collection('foo')
.find(query)
.skip(skip)
.limit(limit),
db.collection('foo').count(query)
])
return {
totalCount: countResult,
list: listResult
}
For more details on db.collection.count visit this page
It is possible to get the total result size without the effect of limit() using count() as answered here:
Limiting results in MongoDB but still getting the full count?
According to the documentation you can even control whether limit/pagination is taken into account when calling count():
https://docs.mongodb.com/manual/reference/method/cursor.count/#cursor.count
Edit: in contrast to what is written elsewhere - the docs clearly state that "The operation does not perform the query but instead counts the results that would be returned by the query". Which - from my understanding - means that only one query is executed.
Example:
> db.createCollection("test")
{ "ok" : 1 }
> db.test.insert([{name: "first"}, {name: "second"}, {name: "third"},
{name: "forth"}, {name: "fifth"}])
BulkWriteResult({
"writeErrors" : [ ],
"writeConcernErrors" : [ ],
"nInserted" : 5,
"nUpserted" : 0,
"nMatched" : 0,
"nModified" : 0,
"nRemoved" : 0,
"upserted" : [ ]
})
> db.test.find()
{ "_id" : ObjectId("58ff00918f5e60ff211521c5"), "name" : "first" }
{ "_id" : ObjectId("58ff00918f5e60ff211521c6"), "name" : "second" }
{ "_id" : ObjectId("58ff00918f5e60ff211521c7"), "name" : "third" }
{ "_id" : ObjectId("58ff00918f5e60ff211521c8"), "name" : "forth" }
{ "_id" : ObjectId("58ff00918f5e60ff211521c9"), "name" : "fifth" }
> db.test.count()
5
> var result = db.test.find().limit(3)
> result
{ "_id" : ObjectId("58ff00918f5e60ff211521c5"), "name" : "first" }
{ "_id" : ObjectId("58ff00918f5e60ff211521c6"), "name" : "second" }
{ "_id" : ObjectId("58ff00918f5e60ff211521c7"), "name" : "third" }
> result.count()
5 (total result size of the query without limit)
> result.count(1)
3 (result size with limit(3) taken into account)
Try as bellow:
cursor.count(false, function(err, total){ console.log("total", total) })
core.db.users.find(query, {}, {skip:0, limit:1}, function(err, cursor){
if(err)
return callback(err);
cursor.toArray(function(err, items){
if(err)
return callback(err);
cursor.count(false, function(err, total){
if(err)
return callback(err);
console.log("cursor", total)
callback(null, {items: items, total:total})
})
})
})
Thought of providing a caution while using the aggregate for the pagenation. Its better to use two queries for this if the API is used frequently to fetch data by the users. This is atleast 50 times faster than getting the data using aggregate on a production server when more users are accessing the system online. The aggregate and $facet are more suited for Dashboard , reports and cron jobs that are called less frequently.
We can do it using 2 query.
const limit = parseInt(req.query.limit || 50, 10);
let page = parseInt(req.query.page || 0, 10);
if (page > 0) { page = page - 1}
let doc = await req.db.collection('bookings').find().sort( { _id: -1 }).skip(page).limit(limit).toArray();
let count = await req.db.collection('bookings').find().count();
res.json({data: [...doc], count: count});
I took the two queries approach, and the following code has been taken straight out of a project I'm working on, using MongoDB Atlas and a full-text search index:
return new Promise( async (resolve, reject) => {
try {
const search = {
$search: {
index: 'assets',
compound: {
should: [{
text: {
query: args.phraseToSearch,
path: [
'title', 'note'
]
}
}]
}
}
}
const project = {
$project: {
_id: 0,
id: '$_id',
userId: 1,
title: 1,
note: 1,
score: {
$meta: 'searchScore'
}
}
}
const match = {
$match: {
userId: args.userId
}
}
const skip = {
$skip: args.skip
}
const limit = {
$limit: args.first
}
const group = {
$group: {
_id: null,
count: { $sum: 1 }
}
}
const searchAllAssets = await Models.Assets.schema.aggregate([
search, project, match, skip, limit
])
const [ totalNumberOfAssets ] = await Models.Assets.schema.aggregate([
search, project, match, group
])
return await resolve({
searchAllAssets: searchAllAssets,
totalNumberOfAssets: totalNumberOfAssets.count
})
} catch (exception) {
return reject(new Error(exception))
}
})
I had the same problem and came across this question. The correct solution to this problem is posted here.
You can do this in one query. First you run a count and within that run the limit() function.
In Node.js and Express.js, you will have to use it like this to be able to use the "count" function along with the toArray's "result".
var curFind = db.collection('tasks').find({query});
Then you can run two functions after it like this (one nested in the other)
curFind.count(function (e, count) {
// Use count here
curFind.skip(0).limit(10).toArray(function(err, result) {
// Use result here and count here
});
});