Mongodb aggregation taking more than 15 seconds - mongodb

I have more than 100k records in my collections, and for every 5 seconds it will add a record into collection. I have a aggregate query to get 720(approx) records from last one year data.
The aggregate query:
db.collectionName.aggregate([
{"$match": {
"Id": "****-id-****",
"receivedDate": {
"$gte": ISODate("2016-06-26T18:30:00.463Z"),
"$lt": ISODate("2017-06-26T18:30:00.463Z")
}
}
},
{"$group": {
"_id": {
"$add": [
{"$subtract": [
{"$subtract": ["$receivedDate", ISODate("1970-01-01T00:00:00.000Z")]},
{"$mod": [
{"$subtract": ["$receivedDate", ISODate("1970-01-01T00:00:00.000Z")]},
43200000
]}
]},
ISODate("1970-01-01T00:00:00.000Z")
]
},
"_rid": {"$first": "$_id"},
"_data": {"$first": "$receivedData.data"},
"count": {"$sum": 1}
}
},
{"$sort": {"_id": -1}},
{"$project": {
"_id": "$_rid",
"receivedDate": "$_id",
"receivedData": {"data": "$_data"}
}
}
])
I am not sure why its taking more than 15 seconds, when I try to get data for 1 month it is working fine.

Its too late to answer this question, This would be helpful for others,
Might be the compound index can help in this situation, Compound indexes can support queries that match on multiple fields.
You can create compound index on Id and receivedDate fields,
db.collectionName.createIndex({ Id: -1, receivedDate: -1 });
The order of the fields listed in a compound index is important. The index will contain references to documents sorted first by the values of the Id field and, within each value of the Id field, sorted by values of the receivedDate field.

Related

Limit number of objects pushed to array in MongoDB aggregation

I've been trying to find a way to limit the number of objects i'm pushing to arrays I'm creating while using "aggregate" on a MongoDB collection.
I have a collection of students - each has these relevant keys:
class number it takes this semester (only one value),
percentile in class (exists if is enrolled in class, null if not),
current score in class (> 0 if enrolled in class, else - 0),
total average (GPA),
max grade
I need to group all students who never failed, per class, in one array that contains those with a GPA higher than 80, and another array containing those without this GPA, sorted by their score in this specific class.
This is my query:
db.getCollection("students").aggregate([
{"$match": {
"class_number":
{"$in": [49, 50, 16]},
"grades.curr_class.percentile":
{"$exists": true},
"grades.min": {"$gte": 80},
}},
{"$sort": {"grades.curr_class.score": -1}},
{"$group": {"_id": "$class_number",
"studentsWithHighGPA":
{"$push":
{"$cond": [{"$gte": ["$grades.gpa", 80]},
{"id": "$_id"},
"$$REMOVE"]
}
},
"studentsWithoutHighGPA":
{"$push":
{"$cond": [{"$lt": ["$grades.gpa", 80]},
{"id": "$_id"},
"$$REMOVE"]
},
},
},
},
])
What i'm trying to do is limit the number of students in each of these arrays. I only want the top 16 in each array, but i'm not sure how to approach this.
Thanks in advance!
I've tried using limit in different variations, and slice too, but none seem to work.
Since mongoDb version 5.0, one option is to use $setWindowFields for this, and in particular, its $rank option. This will allow to keep only the relevant students and limit their count even before the $group step:
$match only relevant students as suggested by the OP
$set the groupId for the setWindowFields (as it can currently partition by one key only
$setWindowFields to define the rank of each student in their array
$match only students with the wanted rank
$group by class_number as suggested by the OP:
db.collection.aggregate([
{$match: {
class_number: {$in: [49, 50, 16]},
"grades.curr_class.percentile": {$exists: true},
"grades.min": {$gte: 80}
}},
{$set: {
groupId: {$concat: [
{$toString: "$class_number"},
{$toString: {$toBool: {$gte: ["$grades.gpa", 80]}}}
]}
}},
{$setWindowFields: {
partitionBy: "$groupId",
sortBy: {"grades.curr_class.score": -1},
output: {rank: {$rank: {}}}
}},
{$match: {rank: {$lte: rankLimit}}},
{$group: {
_id: "$class_number",
studentsWithHighGPA: {$push: {
$cond: [{$gte: ["$grades.gpa", 80]}, {id: "$_id"}, "$$REMOVE"]}},
studentsWithoutHighGPA: {$push: {
$cond: [{$lt: ["$grades.gpa", 80]}, {id: "$_id"}, "$$REMOVE"]}}
}}
])
See how it works on the playground example
*This solution will limit the rank of the students, so there is an edge case of more than n students in the array (In case there are multiple students with the exact rank of n). it can be simply solved by adding a $slice step
Maybe MongoDB $facets are a solution. You can specify different output pipelines in one aggregation call.
Something like this:
const pipeline = [
{
'$facet': {
'studentsWithHighGPA': [
{ '$match': { 'grade': { '$gte': 80 } } },
{ '$sort': { 'grade': -1 } },
{ '$limit': 16 }
],
'studentsWithoutHighGPA': [
{ '$match': { 'grade': { '$lt': 80 } } },
{ '$sort': { 'grade': -1 } },
{ '$limit': 16 }
]
}
}
];
coll.aggregate(pipeline)
This should end up with one document including two arrays.
studentsWithHighGPA (array)
0 (object)
1 (object)
...
studentsWithoutHighGPA (array)
0 (object)
1 (object)
See each facet as an aggregation pipeline on its own. So you can also include $group to group by classes or something else.
https://www.mongodb.com/docs/manual/reference/operator/aggregation/facet/
I don't think there is a mongodb-provided operator to apply a limit inside of a $group stage.
You could use $accumulator, but that requires server-side scripting to be enabled, and may have performance impact.
Limiting studentsWithHighGPA to 16 throughout the grouping might look something like:
"studentsWithHighGPA": {
"$accumulator": {
init: "function(){
return {combined:[]};
}",
accumulate: "function(state, id, score){
if (score >= 80) {
state.combined.push({_id:id, score:score})
};
return {combined:state.combined.slice(0,16)}
}",
accumulateArgs: [ "$_id", "$grades.gpa"],
merge: "function(A,B){
return {combined:
A.combined.concat(B.combined).sort(
function(SA,SB){
return (SB.score - SA.score)
})
}
}",
finalize: "function(s){
return s.combined.slice(0,16).map(function(A){
return {_id:A._id}
})
}",
lang: "js"
}
}
Note that the score is also carried through until the very end so that partial result sets from different shards can be combined properly.

How to get best 5 results in $group method in mongodb?

On production server I use mongodb 4.4
I have a query that works well
db.step_tournaments_results.aggregate([
{ "$match": { "tournament_id": "6377f2f96174982ef89c48d2" } },
{ "$sort": { "total_points": -1, "time_spent": 1 } },
{
$group: {
_id: "$club_name",
'total_points': { $sum: "$total_points"},
'time_spent': { $sum: "$time_spent"}
},
},
])
But the problem is in $group operator, because it sums all the points of every group for total_points, but I need only best 5 of every group. How to achieve that?
Query
like your query, match and sort
on group instead of sum, gather all members inside one array
(i collected the $ROOT but you can collect only the 2 fields you need inside a {}, if the documents have many fields)
take the first 5 of them
take the 2 sums you need from the first 5
remove the temp fields
*with mongodb 6, you can do this in the group, without need to collect th members in an array, in mongodb 5 you can also do those with window-fields without group, but for mongodb 4.4 i think this is a way to do it
aggregate(
[{"$match": {"tournament_id": {"$eq": "6377f2f96174982ef89c48d2"}}},
{"$sort": {"total_points": -1, "time_spent": 1}},
{"$group": {"_id": "$club_name", "group-members": {"$push": "$$ROOT"}}},
{"$set":
{"first-five": {"$slice": ["$group-members", 5]},
"group-members": "$$REMOVE"}},
{"$set":
{"total_points": {"$sum": "$first-five.total_points"},
"time_spent": {"$sum": "$first-five.time_spent"},
"first-five": "$$REMOVE"}}])

Scala / MongoDB - removing duplicate

I have seen very similar questions with solutions to this problem, but I am unsure how I would incorporate it in to my own query. I'm programming in Scala and using a MongoDB Aggregates "framework".
val getItems = Seq (
Aggregates.lookup(Store...)...
Aggregates.lookup(Store.STORE_NAME, "relationship.itemID", "uniqueID", "item"),
Aggregates.unwind("$item"),
// filter duplicates here ?
Aggregates.lookup(Store.STORE_NAME, "item.content", "ID", "content"),
Aggregates.unwind("$content"),
Aggregates.project(Projections.fields(Projections.include("store", "item", "content")))
)
The query returns duplicate objects which is undesirable. I would like to remove these. How could I go about incorporating Aggregates.group and "$addToSet" to do this? Or any other reasonable solution would be great too.
Note: I have to omit some details about the query, so the store lookup aggregate is not there. However, I want to remove the duplicates later in the query so it hopefully shouldn't matter.
Please let me know if I need to provide more information.
Thanks.
EDIT: 31/ 07/ 2019: 13:47
I have tried the following:
val getItems = Seq (
Aggregates.lookup(Store...)...
Aggregates.lookup(Store.STORE_NAME, "relationship.itemID", "uniqueID", "item"),
Aggregates.unwind("$item"),
Aggregates.group("$item.itemID,
Accumulators.first("ID", "$ID"),
Accumulators.first("itemName", "$itemName"),
Accumulators.addToSet("item", "$item")
Aggregates.unwind("$items"),
Aggregates.lookup(Store.STORE_NAME, "item.content", "ID", "content"),
Aggregates.unwind("$content"),
Aggregates.project(Projections.fields(Projections.include("store", "items", "content")))
)
But my query now returns zero results instead of the duplicate result.
You can use $first to remove the duplicates.
Suppose I have the following data:
[
{"_id": 1,"item": "ABC","sizes": ["S","M","L"]},
{"_id": 2,"item": "EFG","sizes": []},
{"_id": 3, "item": "IJK","sizes": "M" },
{"_id": 4,"item": "LMN"},
{"_id": 5,"item": "XYZ","sizes": null
}
]
Now, let's aggregate it using $first and $unwind and see the difference:
First let's aggregate it using $first
db.collection.aggregate([
{ $sort: {
item: 1
}
},
{ $group: {
_id: "$item",firstSize: {$first: "$sizes"}}}
])
Output
[
{"_id": "XYZ","firstSize": null},
{"_id": "ABC","firstSize": ["S","M","L" ]},
{"_id": "IJK","firstSize": "M"},
{"_id": "EFG","firstSize": []},
{"_id": "LMN","firstSize": null}
]
Now, Let's aggregate it using $unwind
db.collection.aggregate([
{
$unwind: "$sizes"
}
])
Output
[
{"_id": 1,"item": "ABC","sizes": "S"},
{"_id": 1,"item": "ABC","sizes": "M"},
{"_id": 1,"item": "ABC","sizes": "L},
{"_id": 3,"item": "IJK","sizes": "M"}
]
You can see $first removes the duplicates where as $unwind keeps the duplicates.
Using $unwind and $first together.
db.collection.aggregate([
{ $unwind: "$sizes"},
{
$group: {
_id: "$item",firstSize: {$first: "$sizes"}}
}
])
Output
[
{"_id": "IJK", "firstSize": "M"},
{"_id": "ABC","firstSize": "S"}
]
group then addToSet is an effective way to deal with your problem !
it looks like this in mongoshell
db.sales.aggregate(
[
{
$group:
{
_id: { day: { $dayOfYear: "$date"}, year: { $year: "$date" } },
itemsSold: { $addToSet: "$item" }
}
}
]
)
in scala you can do it like
Aggregates.group("$groupfield", Accumulators.addToSet("fieldName","$expression"))
if you have multiple field to group
Aggregates.group(new BasicDBObject().append("fieldAname","$fieldA").append("fieldBname","$fieldB")), Accumulators.addToSet("fieldName","expression"))
then unwind

mongo aggregate group and find one

I have collections in mongodb: which stores as:
{"tag":"count1","value":100,"ts":1544423706} {"tag":"count2","value":1002,"ts":1544423706} {"tag":"count1","value":101,"ts":1544423806} {"tag":"count2","value":1003,"ts":1544423806} {"tag":"count1","value":102,"ts":1544423906} {"tag":"count2","value":1004,"ts":1544423906}
so my problem is how can I get the result out of "tag" is count1 , "ts" is larger than 1544423800's first item. As I describled: I want to find the result as:
{"tag":"count1","value":101,"ts":1544423806} {"tag":"count2","value":1003,"ts":1544423806}
do I need use aggregate to group the tag and then get the first item which larger than given "ts", I am new to aggregate function in MongoDB.
db.index.aggregate([{"$match": {"tag": {"$in":["count1","count2"]},"ts": {"$gt":1544423800}}
},
{"$group": {"_id": "$tag",
"tags": {"$push": "$$ROOT"}}
},
])
which the result is not one item for each tag so I want to limit one item , what am I have to do
thank you I have solve this by :
db.index.aggregate(
[
{"$match": {"tag": {"$in":["count1","count2"]},
"ts": {"$gt":1545730000}}
},
{"$group": {"_id": "$tag",
"value": {"$first": "$value"}
}
},
]
)

MongoDB - Select all documents by the count of an array field

In my current project I have a structure like this:
"squad": {
"members": [
{
"name": "xyz",
"empty": true
},
{
"name": "xyz",
"empty": true
},
{
"name": "xyz",
"empty": true
}
]
}
Now I want to query every squad with mongodb which have at least, lets say 3 empty member slots. I've googled and only found aggregate and $size, which seem to only select an array count not something per field.
Any idea how to do it?
You can try this query :
db.getCollection('collectionName').aggregate([
{$unwind:"$squad.members"},
{$group:{_id:"$_id",count:{$sum:{$cond: [{$eq: ['$squad.members.empty', true]}, 1, 0]}}}},
{$match: {count: {$gte: 3}}}
])
In this query applied conditional sum and then check the count is greater than or equal 3
It will return all documents will empty slots greater than 3
db.squad.aggregate([
{$unwind:"$squad.members"},
{$match:{"squad.members.empty": true}},
{$group:{_id:"$_id",count:{$sum:1}}},
{$match: {count: {$gt: 3}}}
])