MongoDB: What index should I use? - mongodb

I got a highscore mongodb table that contains documents such as
{username:"Bob",score:10,category:"mostLikes"}
{username:"John",score:32,category:"mostLikes"}
{username:"Bob",score:2,category:"leastDeaths"}
The goal is to fetch the top 100 (sorted) of a specific category.
Important: Certain highscore categories are ascending (lower is better ex: leastDeaths) and others are descending (bigger is better ex: mostLikes). This means that depending on the category, I want either the 100 biggest scores or the 100 lowest scores.
There are two main queries in my application:
db.highscore.find({category:category}, {}).limit(100).sort({ score: 1 /*or -1*/ });
db.highscore.find({username:username});
What index would you recommend?
Would keeping ascending category and descending categories in different tables result in better performance?
Note: I do not want to have one table per category.

I did some test on my local with some sample datasets and i think the best option would be to create an index on "category_1_score_1_username_1"
Creating an index on the following fields gives you a covered query and thus the documents are returned from the index directly.
Find below my analysis
> db.usr.find();
{ "_id" : ObjectId("57bd20630744bd376277a795"), "username" : "Bob", "score" : 10, "category" : "mostLikes" }
{ "_id" : ObjectId("57bd20630744bd376277a796"), "username" : "John", "score" : 32, "category" : "mostLikes" }
{ "_id" : ObjectId("57bd20630744bd376277a797"), "username" : "Bob1", "score" : 2, "category" : "leastDeaths" }
{ "_id" : ObjectId("57bd20630744bd376277a798"), "username" : "John2", "score" : 132, "category" : "mostLikes" }
{ "_id" : ObjectId("57bd20630744bd376277a799"), "username" : "Bob3", "score" : 20, "category" : "leastDeaths" }
{ "_id" : ObjectId("57bd20630744bd376277a79a"), "username" : "John4", "score" : 132, "category" : "mostLikes" }
{ "_id" : ObjectId("57bd20630744bd376277a79b"), "username" : "Bob5", "score" : 22, "category" : "leastDeaths" }
{ "_id" : ObjectId("57bd20630744bd376277a79c"), "username" : "John6", "score" : 322, "category" : "mostLikes" }
{ "_id" : ObjectId("57bd20630744bd376277a79d"), "username" : "Bob7", "score" : 232, "category" : "leastDeaths" }
{ "_id" : ObjectId("57bd20630744bd376277a79e"), "username" : "John8", "score" : 3112, "category" : "mostLikes" }
{ "_id" : ObjectId("57bd20630744bd376277a79f"), "username" : "Bob4", "score" : 222, "category" : "leastDeaths" }
{ "_id" : ObjectId("57bd20630744bd376277a7a0"), "username" : "John22", "score" : 3210, "category" : "mostLikes" }
{ "_id" : ObjectId("57bd20630744bd376277a7a1"), "username" : "Bob33", "score" : 2111, "category" : "leastDeaths" }
Indexes:
> db.usr.getIndexes();
{
"v" : 1,
"key" : {
"category" : 1,
"score" : 1,
"username" : 1
},
"name" : "category_1_score_1_username_1",
"ns" : "test.usr"
}
]
>
Now you can modify your query slightly to make it return a covered query.
db.usr.find({"category":"mostLikes"},{"_id":0,"score":-1,"category":1,"username":1}).sort({"score":1}).explain("executionStats");
Output of Execution Stats:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.usr",
"indexFilterSet" : false,
"parsedQuery" : {
"category" : {
"$eq" : "mostLikes"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"_id" : 0,
"score" : -1,
"category" : 1,
"username" : 1
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"category" : 1,
"score" : 1,
"username" : 1
},
"indexName" : "category_1_score_1_username_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"category" : [
"[\"mostLikes\", \"mostLikes\"]"
],
"score" : [
"[MinKey, MaxKey]"
],
"username" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 7,
"executionTimeMillis" : 0,
"totalKeysExamined" : 7,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 7,
"executionTimeMillisEstimate" : 0,
"works" : 8,
"advanced" : 7,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"_id" : 0,
"score" : -1,
"category" : 1,
"username" : 1
},
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 7,
"executionTimeMillisEstimate" : 0,
"works" : 8,
"advanced" : 7,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"category" : 1,
"score" : 1,
"username" : 1
},
"indexName" : "category_1_score_1_username_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"category" : [
"[\"mostLikes\", \"mostLikes\"]"
],
"score" : [
"[MinKey, MaxKey]"
],
"username" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 7,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "L4156409",
"port" : 27017,
"version" : "3.2.5",
"gitVersion" : "34e65e5383f7ea1726332cb175b73077ec4a1b02"
},
"ok" : 1
}
>
Thus as you can see the output the no of documents scanned is 0 while the records are fetched directly from the index. Thus choosing this index would be your best bet for the first query.
For the second query, that should be simple to create an index on the username field and that should solve the second query for you.
HTH.

Related

mongodb is not using an index for a find command

I have approximately 40M documents in a mongo collection. There is an index on the location.country field:
MongoDB Enterprise cluster-0-shard-0:PRIMARY> db.cases.getIndexes()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_"
},
//...
{
"v" : 2,
"key" : {
"location.country" : -1
},
"name" : "countriesIdx",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
//...
]
But queries don't use it:
MongoDB Enterprise cluster-0-shard-0:PRIMARY> db.cases.find({'location.country':'ghana'}).explain({verbosity: 'executionStats'})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "covid19.cases",
"indexFilterSet" : false,
"parsedQuery" : {
"location.country" : {
"$eq" : "ghana"
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"location.country" : {
"$eq" : "ghana"
}
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 195892,
"totalKeysExamined" : 0,
"totalDocsExamined" : 39264034,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"location.country" : {
"$eq" : "ghana"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 99032,
"works" : 39264036,
"advanced" : 0,
"needTime" : 39264035,
"needYield" : 0,
"saveState" : 39503,
"restoreState" : 39503,
"isEOF" : 1,
"direction" : "forward",
"docsExamined" : 39264034
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "cluster-0-shard-00-01-vwhx6.mongodb.net",
"port" : 27017,
"version" : "4.4.8",
"gitVersion" : "83b8bb8b6b325d8d8d3dfd2ad9f744bdad7d6ca0"
},
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1629732226, 1),
"signature" : {
"hash" : BinData(0,"piKWDwLDv7FRcnwCe51PZDLR4UM="),
"keyId" : NumberLong("6958739380580122625")
}
},
"operationTime" : Timestamp(1629732226, 1)
}
Do I need to set up the index differently or do something else to get mongo to use the index? I have tried to hint that it should, but it still does a COLLSCAN. While the examples I've shown above are using mongosh, the behaviour is the same in my node app using mongoose.

Improve slow query count mongodb

I'm trying to improve the performance of a count query (to calculate pagination to display on a screen) on a collection of 1138633 documents. The query analyze 391232 document for 364497 returned but it takes ~2sc to be executed and i think it's too long.
My query looks like this:
db.myCollection.count({
"$or" : [
{
"field_1" : {
"$lte" : 1.0
}
},
{"field_1" : {
"$eq" : null
}
}
],
"field_2" : {
"$eq" : false
},
"field_3" : {
"$ne" : true
},
"field_4" : {
"$eq" : "fr-FR"
},
"field_5" : {
"$ne" : null
},
"field_6" : {
"$ne" : null
},
"field_7" : {
"$gte" : ISODate("2016-10-14T00:00:00.000Z")
}
})
field_1 is a number , field_2 and field_3 a boolean, field_5 a string and field_6 an object ID which refer to a collection of 2 documents.
Here are my indexes (db.myCollection.getIndexes() ) :
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "db.myCollection"
},
{
"v" : 2,
"key" : {
"field_6" : 1,
"field_7" : -1
},
"name" : "field_6_1_field_7_-1",
"ns" : "db.myCollection",
"background" : true
},
{
"v" : 2,
"key" : {
"field_7" : 1
},
"name" : "field_7_1",
"background" : true,
"ns" : "db.myCollection"
},
{
"v" : 2,
"key" : {
"field_6" : 1
},
"name" : "field_6_1",
"ns" : "db.myCollection",
"background" : true
},
{
"v" : 2,
"key" : {
"field_1" : 1.0
},
"name" : "field_1_1",
"ns" : "db.myCollection"
}
]
I tried everything , like force indexe using hint , change the order of the query ( and the order of the multi key index) but nothing work.
Someone have an idea on what can I try to improve the execution time of this query? Do you need more details? like informations of the executionStats?
Thanks.
EDIT : More Detail, i calculated how much document are concerned by the clause and here is my result :
field 6 : 391232
field 1 lte 1 :721005
field 1 eq null : 417625
field 5 : 819688
field 4: 1123301
field 2 : 1138620
field 7: 1138630 (all document)
field 3: 1138630 (all document)
i reordered my query in the above order and i get ~1.82sc (0.2sc winned xD)
I assume the problem is because of the indexes which are maybe wrong.
For the detail index in explain do you know what section i have to check? here is what i found in execution plan about my indexes :
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 391232,
"executionTimeMillisEstimate" : 427,
"works" : 391234,
"advanced" : 391232,
"needTime" : 1,
"needYield" : 0,
"saveState" : 3060,
"restoreState" : 3060,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"field_6" : 1,
"field_7" : -1
},
"indexName" : "field_6_1_field_7_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"field_6" : [],
"field_7" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"field_6" : [
"[MinKey, null)",
"(null, MaxKey]"
],
"field_7" : [
"[new Date(9223372036854775807), new Date(1491350400000)]"
]
},
"keysExamined" : 391233,
"seeks" : 2,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}

Why a covered count query is slow?

I'm running a mongodb v3.2.12 sharded cluster. Shard key is _id which is a md5 hash.
The problem is, that a covered count query takes much time.
The used index is about 5 GB on each mongodb node. The total size of all indexes is 32 GB and fits perfectly in RAM since each node has 128 GB RAM.
The query is: db.offer.count({ "shopId": 275419, "missingSince": null})
The used index has been created as: db.offer.createIndex({shopId:1, missingSince:1, merchantId:1, _id:1}, {background:true})
As you can see, the index is not sparse, so even null values are present in the index.
Running db.currentOp() while the query is running shows that the query is using the right index, However, it is running for over 2814 seconds already:
{
"desc" : "conn56062",
"threadId" : "140131556767488",
"connectionId" : 56062,
"client_s" : "x.x.x.x:39177",
"active" : true,
"opid" : "offerStoreIT02:1075309911",
"secs_running" : 2814,
"microsecs_running" : NumberLong("2814791918"),
"op" : "command",
"ns" : "offerStore.offer",
"query" : {
"query" : {
"count" : "offer",
"query" : {
"missingSince" : null,
"shopId" : 275419
}
},
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"planSummary" : "IXSCAN { shopId: 1.0, missingSince: 1.0, merchantId: 1.0, _id: 1.0 }",
"numYields" : 249244,
"locks" : {
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(498490)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(249245)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(249245)
}
}
}
}
Iterating a 5 GB in-memory index never takes so much time. While the query is running, each mongodb primary is reading constantly 75-100 MB/sec from disk. When the query is not running, only 5-10 MB/sec are read from disk, so my assumption is that mongodb fetches the documents from SSD into memory in order to count them.
But why is it so? The query should be covered by the index since all fields, including the shardkey, is present in the index, which should be sufficient to cover the query according to mongodb documentation:
https://docs.mongodb.com/manual/core/query-optimization/#covered-queries
Follow-up:
I broke down the problem to a minimalistic, unsharded setup. I inserted the following types of documents:
a) 3 documents without both fields shopId and missingSince
b) 5 documents with field shopId:1 without field missingSince
c) 7 documents with field shopId:1 and missingSince:null
d) 13 documents with field shopId:1 and missingSince:ISODate("2017-05-22T07:52:40.831Z")
I created the index {shopId:1, missingSince:1}.
The execution plan of the query count({"shopId":1, "missingSince":null}) indicated "totalDocsExamined" : 12 which means that 12 documents had to be fetched. These must be the 5 documents of b) plus the 7 documents of c). All these 12 documents should be in the index with shopId:1, missingSince:null, thus satisfying the query.
But why does mongodb still need to fetch and examine these 12 documents?
Here is my test collection:
rs1:PRIMARY> db.offer.find()
{ "_id" : 1, "v" : 1 }
{ "_id" : 2, "v" : 1 }
{ "_id" : 3, "v" : 1 }
{ "_id" : 4, "shopId" : 1, "v" : 1 }
{ "_id" : 5, "shopId" : 1, "v" : 1 }
{ "_id" : 6, "shopId" : 1, "v" : 1 }
{ "_id" : 7, "shopId" : 1, "v" : 1 }
{ "_id" : 8, "shopId" : 1, "v" : 1 }
{ "_id" : 9, "shopId" : 1, "missingSince" : null, "v" : 1 }
{ "_id" : 10, "shopId" : 1, "missingSince" : null, "v" : 1 }
{ "_id" : 11, "shopId" : 1, "missingSince" : null, "v" : 1 }
{ "_id" : 12, "shopId" : 1, "missingSince" : null, "v" : 1 }
{ "_id" : 13, "shopId" : 1, "missingSince" : null, "v" : 1 }
{ "_id" : 14, "shopId" : 1, "missingSince" : null, "v" : 1 }
{ "_id" : 15, "shopId" : 1, "missingSince" : null, "v" : 1 }
{ "_id" : 16, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 17, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 18, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 19, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 20, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 21, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 22, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 23, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 24, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 25, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 26, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 27, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
{ "_id" : 28, "shopId" : 1, "missingSince" : ISODate("2017-05-22T07:52:40.831Z"), "v" : 1 }
Here is the output of explain():
rs1:PRIMARY> db.offer.explain(true).count({"shopId":1, "missingSince":null})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.offer",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"missingSince" : {
"$eq" : null
}
},
{
"shopId" : {
"$eq" : 1
}
}
]
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"missingSince" : {
"$eq" : null
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"shopId" : 1,
"missingSince" : 1
},
"indexName" : "shopId_1_missingSince_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"shopId" : [
"[1.0, 1.0]"
],
"missingSince" : [
"[null, null]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 0,
"totalKeysExamined" : 12,
"totalDocsExamined" : 12,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 13,
"advanced" : 0,
"needTime" : 12,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 12,
"nSkipped" : 0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"missingSince" : {
"$eq" : null
}
},
"nReturned" : 12,
"executionTimeMillisEstimate" : 0,
"works" : 13,
"advanced" : 12,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 12,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 12,
"executionTimeMillisEstimate" : 0,
"works" : 13,
"advanced" : 12,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"shopId" : 1,
"missingSince" : 1
},
"indexName" : "shopId_1_missingSince_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"shopId" : [
"[1.0, 1.0]"
],
"missingSince" : [
"[null, null]"
]
},
"keysExamined" : 12,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "Kays MacBook Pro",
"port" : 27017,
"version" : "3.2.6",
"gitVersion" : "05552b562c7a0b3143a729aaa0838e558dc49b25"
},
"ok" : 1
}
Since nobody could find a valid reason for this issue, I opened yesterday a mongodb bug report: https://jira.mongodb.org/browse/SERVER-29326
Mongodb engineers confirmed that's a bug. Unfortunately it's not mentioned in mongodb's documentation which would have saved us many hours tracing down the issue and deploy another schema design from the beginning.
Try this index:
If your existing index db.offer.createIndex({shopId:1, missingSince:1, merchantId:1, _id:1}, {background:true})
is used for another purpose, try creating following index on offer collection:
db.offer.createIndex({missingSince:1,shopId:1}, {background:true})
This will optimize the query itself and thus count.

MongoDB, can query fields slow down a query even if they form a partition?

Assuming I have only male and females in my user collection. Is the following :
User.find({ gender: { $in: ['male','female'] }})
slower than this one :
User.find()
I feel like it would be, but I don't really know how MongoDB works internally. Both requests return the entire collection. I'm building a filter feature and I'd like to simplify my api code by considering that every call is filtered somehow.
it is a good question as it touches basic query planning capabilites.
Comparing explain results we can see that using IN invokes collection scan by specified query parameter - which is more expensive than basic document dump, when querying without parameters.
db.User.find({ gender: { $in: ['male','female'] }}).explain("executionStats")
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.User",
"indexFilterSet" : false,
"parsedQuery" : {
"gender" : {
"$in" : [
"female",
"male"
]
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"gender" : {
"$in" : [
"female",
"male"
]
}
},
"direction" : "forward"
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 24,
"executionTimeMillis" : 0,
"totalKeysExamined" : 0,
"totalDocsExamined" : 24,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"gender" : {
"$in" : [
"female",
"male"
]
}
},
"nReturned" : 24,
"executionTimeMillisEstimate" : 0,
"works" : 26,
"advanced" : 24,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 24
}
},
"serverInfo" : {
"host" : "greg",
"port" : 27017,
"version" : "3.2.3",
"gitVersion" : "b326ba837cf6f49d65c2f85e1b70f6f31ece7937"
},
"ok" : 1
}
db.User.find().explain("executionStats")
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.User",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : []
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : []
},
"direction" : "forward"
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 24,
"executionTimeMillis" : 0,
"totalKeysExamined" : 0,
"totalDocsExamined" : 24,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : []
},
"nReturned" : 24,
"executionTimeMillisEstimate" : 0,
"works" : 26,
"advanced" : 24,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 24
}
},
"serverInfo" : {
"host" : "greg",
"port" : 27017,
"version" : "3.2.3",
"gitVersion" : "b326ba837cf6f49d65c2f85e1b70f6f31ece7937"
},
"ok" : 1
}
When querying without a condition, it return all the documents without checking. But if you and a condition. Simply it compile the condition into BSON and match with the data in the database, Which is slower. But if you create an index on gender. You can not see any difference in time (in both cases)

Mongodb $and and $sort on different fields and getting last matching record

I have a collection with around 50 lake records.Below is one sample document
{
"_id" : NumberLong(4253223),
"locId" : 59,
"startIpNum" : NumberLong("3287940726"),
"endIpNum" : NumberLong("3287940761"),
"maxmind_location" : {
"locId" : 59,
"country" : "DK",
"region" : "",
"city" : "",
"postalCode" : "",
"latitude" : "56.0000",
"longitude" : "10.0000",
"metroCode" : "",
"areaCode" : "\n"
}
}
Below is Query I am trying to perform.I want to find last record from the matching condition.
find({
$and: [
{startIpNum: { $lte: 459950297 }},
{endIpNum: { $gte: 459950297 }}
]
}).sort({_id : -1}).limit(1)
I have septate ascending index on startIpNum and endIpNum. I have replaced _id with incremental id value like Mysql.
When I do query without sort and with limit 1. It gives me result in 0ms. As soon as I put sort (I need sort since I want last matching record ) the query get hang forever.
I have also tried below query, but It takes around 700ms. With compound
index on {startIpNum :1 , endIpNum : 1 , _id : -1 } with the sort on _id.
find({
startIpNum : { $lte: 459950297 },
endIpNum : { $gte: 459950297 }
}).sort({
startIpNum :1,
endIpNum :1 ,
_id : -1
}).limit(1).explain({ verbose : true});
How can I achieve sort in my first approach.
Here is explain. Its still scanning 370061 indexes for
db.maxmind.find({startIpNum : { $lte: 459950297 }, endIpNum : { $gte: 459950297 } }).sort({startIpNum :1, endIpNum :1 , _id : -1 }).limit(1).hint("startIpNum_1_endIpNum_1__id_-1").explain( { verbose: true } );
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "yogeshTest.maxmind",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"startIpNum" : {
"$lte" : 459950297
}
},
{
"endIpNum" : {
"$gte" : 459950297
}
}
]
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 0,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"startIpNum" : 1,
"endIpNum" : 1,
"_id" : -1
},
"indexName" : "startIpNum_1_endIpNum_1__id_-1",
"isMultiKey" : false,
"direction" : "forward",
"indexBounds" : {
"startIpNum" : [
"[-inf.0, 459950297.0]"
],
"endIpNum" : [
"[459950297.0, inf.0]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 433,
"totalKeysExamined" : 370061,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 430,
"works" : 370062,
"advanced" : 1,
"needTime" : 370060,
"needFetch" : 0,
"saveState" : 2891,
"restoreState" : 2891,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1,
"executionTimeMillisEstimate" : 420,
"works" : 370061,
"advanced" : 1,
"needTime" : 370060,
"needFetch" : 0,
"saveState" : 2891,
"restoreState" : 2891,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 1,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 410,
"works" : 370061,
"advanced" : 1,
"needTime" : 370060,
"needFetch" : 0,
"saveState" : 2891,
"restoreState" : 2891,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"startIpNum" : 1,
"endIpNum" : 1,
"_id" : -1
},
"indexName" : "startIpNum_1_endIpNum_1__id_-1",
"isMultiKey" : false,
"direction" : "forward",
"indexBounds" : {
"startIpNum" : [
"[-inf.0, 459950297.0]"
],
"endIpNum" : [
"[459950297.0, inf.0]"
],
"_id" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 370061,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0
}
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "cus360-H81M-S",
"port" : 27017,
"version" : "3.0.3",
"gitVersion" : "b40106b36eecd1b4407eb1ad1af6bc60593c6105"
},
"ok" : 1
}
Before you post the output for db.collection.getIndexes() and explain of your query, let us try the following. What I suspect is that your {startIpNum :1 , endIpNum : 1 , _id : -1 } does not win as a query plan.
So what you can try is force MongoDB to use that index by hinting:
find({
startIpNum : { $lte: 459950297 },
endIpNum : { $gte: 459950297 }
}).sort({
startIpNum :1,
endIpNum :1 ,
_id : -1
}).limit(1).hint({startIpNum :1 , endIpNum : 1 , _id : -1 })
Currently it seems like your query fetches all the matching documents, loads them into memory, and sorts them there. With hinting, using your index, it will just pick your documents in the right order initially.