Related
Using mongo server v3.6.16.
I have a mongo collection with about 18m records. Records are being added at about 100k a day. I have a query that runs fairly often on the collection that depends on two values - user_id and server_time_stamp. I have a compound index set up for those two fields.
The index is regularly getting stale - and queries are taking minutes to complete and causing the server to burn all the CPU it can grab. As soon as I regenerate the index, queries happen quickly. But then a day or two later, the index is stale again. (ed. the index is failing more quickly now - within 30 mins.) I have no idea why the index is going stale - what can I look for?
Edit
Here are the index Fields:
{
"uid" : 1,
"server_time_stamp" : -1
}
and index options:
{
"v" : 2,
"name" : "server_time_stamp_1_uid_1",
"ns" : "sefaria.user_history"
}
This appears to be a Heisenbug. When I used "explain", it performs well. Here is one of the pathological queries, from the long query log, taking 445 seconds:
sefaria.user_history command: find { find: "user_history", filter: { server_time_stamp: { $gt: 1577918252 }, uid: 80588 }, sort: { _id: 1 }, lsid: { id: UUID("4936fb55-8514-4442-b852-306686985126") }, $db: "sefaria", $readPreference: { mode: "primaryPreferred" } } planSummary: IXSCAN { _id: 1 } keysExamined:17286277 docsExamined:17286277 cursorExhausted:1 numYields:142780 nreturned:79 reslen:35375 locks:{ Global: { acquireCount: { r: 285562 } }, Database: { acquireCount: { r: 142781 } }, Collection: { acquireCount: { r: 142781 } } } protocol:op_msg 445101ms
Here's the results of explain for a performant query, right after regenerating the index:
{
"queryPlanner" : {
"plannerVersion" : NumberInt(1),
"namespace" : "sefaria.user_history",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"uid" : {
"$eq" : 80588.0
}
},
{
"server_time_stamp" : {
"$gt" : 1577918252.0
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"uid" : NumberInt(1),
"server_time_stamp" : NumberInt(-1)
},
"indexName" : "server_time_stamp_1_uid_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"uid" : [
],
"server_time_stamp" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"uid" : [
"[80588.0, 80588.0]"
],
"server_time_stamp" : [
"[inf.0, 1577918252.0)"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"server_time_stamp" : {
"$gt" : 1577918252.0
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"uid" : NumberInt(1),
"book" : NumberInt(1),
"last_place" : NumberInt(1)
},
"indexName" : "uid_1_book_1_last_place_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"uid" : [
],
"book" : [
],
"last_place" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"uid" : [
"[80588.0, 80588.0]"
],
"book" : [
"[MinKey, MaxKey]"
],
"last_place" : [
"[MinKey, MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"server_time_stamp" : {
"$gt" : 1577918252.0
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"uid" : NumberInt(1)
},
"indexName" : "uid",
"isMultiKey" : false,
"multiKeyPaths" : {
"uid" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"uid" : [
"[80588.0, 80588.0]"
]
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : NumberInt(97),
"executionTimeMillis" : NumberInt(1),
"totalKeysExamined" : NumberInt(97),
"totalDocsExamined" : NumberInt(97),
"executionStages" : {
"stage" : "FETCH",
"nReturned" : NumberInt(97),
"executionTimeMillisEstimate" : NumberInt(0),
"works" : NumberInt(99),
"advanced" : NumberInt(97),
"needTime" : NumberInt(0),
"needYield" : NumberInt(0),
"saveState" : NumberInt(3),
"restoreState" : NumberInt(3),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"docsExamined" : NumberInt(97),
"alreadyHasObj" : NumberInt(0),
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : NumberInt(97),
"executionTimeMillisEstimate" : NumberInt(0),
"works" : NumberInt(98),
"advanced" : NumberInt(97),
"needTime" : NumberInt(0),
"needYield" : NumberInt(0),
"saveState" : NumberInt(3),
"restoreState" : NumberInt(3),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"keyPattern" : {
"uid" : NumberInt(1),
"server_time_stamp" : NumberInt(-1)
},
"indexName" : "server_time_stamp_1_uid_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"uid" : [
],
"server_time_stamp" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"uid" : [
"[80588.0, 80588.0]"
],
"server_time_stamp" : [
"[inf.0, 1577918252.0)"
]
},
"keysExamined" : NumberInt(97),
"seeks" : NumberInt(1),
"dupsTested" : NumberInt(0),
"dupsDropped" : NumberInt(0),
"seenInvalidated" : NumberInt(0)
}
}
},
"serverInfo" : {
"host" : "mongo-deployment-5cf4f4fff6-dz84r",
"port" : NumberInt(27017),
"version" : "3.6.15",
"gitVersion" : "18934fb5c814e87895c5e38ae1515dd6cb4c00f7"
},
"ok" : 1.0
}
The issue was about a query that runs well and uses the indexes suddenly stops using the index and results in a very poor performance. This is noted in the query plan and the log respectively.
The explain's output:
The query plan's "executionStats" says "totalKeysExamined" : NumberInt(97). The query filter is using index defined on the collection ("stage" : "IXSCAN") and the compound index "server_time_stamp_1_uid_1" is used. Also, the query's sort is using the index (the index on _id). As it is the query and the indexes are working as they are meant to be. And, "executionTimeMillis" : NumberInt(1) says that it is a performant query.
Details from the log:
{ ...
find: "user_history", filter: { server_time_stamp: { $gt: 1577918252 }, uid: 80588 }, sort: { _id: 1 }
planSummary: IXSCAN { _id: 1 } keysExamined:17286277 docsExamined:17286277 numYields:142780 nreturned:79
... }
From the log, note that the index "server_time_stamp_1_uid_1" is not used.
Discussion:
The data and the index (called as working set) for the frequently used queries are kept in the memory (RAM + file system cache). If the working set is not in the memory the system has to load it into the memory during the operation and it results in a slower performance. Reading from disk drive is much slower than the memory. Note that SSD drives are much faster than the HDD drives and when there is no option to increase the memory this could be an option.
Also, if the query is using indexes and the index size is large and could not be in memory, the index has to be read from the disk drive and it will slow down the operation. More memory is a solution and when not possible the solution can be in redesigning (or re-modeling) the data and its indexes.
But, the the problem in this case was not the available memory; there is enough of it.
The following info gives an idea about how much memory might be used for the working set for a given query:
db.collection.stats().indexSizes, size, count and avgObjSize.
Solution:
The query log with slow performance shows that the index "server_time_stamp_1_uid_1" is not used: planSummary: IXSCAN { _id: 1 }.
One way to make sure and force the query to use the index (always) is to use the hint on the query. The hint need to be on the index "server_time_stamp_1_uid_1". This way the situation as seen in the log will not happen.
Another way is to keep the index active in the memory. This can be achieved by running a query on the indexed fields only (a covered query: the query filter and returned fields are of indexed fields only). Running this dummy query, which runs often or before the actual query will make sure the index is available in the memory.
In this case, as #Laizer mentioned that supplying the hint to the query helped resolve the issue.
This behavior is due to the index not being capable of being selective and servicing the sort.
The log line for the slow operation is showing the operation using the _id index. The query planner likely made this selection to avoid having to sort results in memory (note the lack of hasSortStage: 1). As a consequence, however, it required scanning considerably more documents in memory (docsExamined:17286277) which made it take considerably longer.
Memory contention likely also played a part. Depending on load, the overhead from sorting results in memory may have contributed to pushing the index out of RAM and the _id index being selected.
A few comments:
As Babu noted, the explain posted above does not include a sort. Including the sort would likely show that stage consuming more time than the IXSCAN.
The name for the index (server_time_stamp_1_uid_1) suggests that server_time_stamp is placed first in the index, followed by uid. Equality matches should be prioritized; i.e. uid should be placed before ranges.
Some options to consider:
Create the index { "uid" : 1, "_id" : 1, "server_time_stamp" : 1 }. See here for guidance on sorting using indexes. Results may be mixed though given that both _id and server_time_stamp are likely to have a high cardinality, which means you may still be trading off scanning documents for avoiding a sort.
Assuming that the _id values are auto-generated, consider sorting by server_time_stamp rather than _id. This will allow you to bound AND sort using server_time_stamp_1_uid_1. The server_time_stamp is a timestamp, so it will also be relatively unique.
sefaria.user_history command: find { find: "user_history", filter: { server_time_stamp: { $gt: 1577918252 }, uid: 80588 }, sort: { _id: 1 }, lsid: { id: UUID("4936fb55-8514-4442-b852-306686985126") }, $db: "sefaria", $readPreference: { mode: "primaryPreferred" } } planSummary: IXSCAN { _id: 1 } keysExamined:17286277 docsExamined:17286277 cursorExhausted:1 numYields:142780 nreturned:79 reslen:35375 locks:{ Global: { acquireCount: { r: 285562 } }, Database: { acquireCount: { r: 142781 } }, Collection: { acquireCount: { r: 142781 } } } protocol:op_msg 445101ms
Looking at the query plan, the query uses _id index. Is it because you have a sort of _id field. I looked at your other plan attached.
"executionSuccess" : true,
"nReturned" : NumberInt(97),
"executionTimeMillis" : NumberInt(1),
"totalKeysExamined" : NumberInt(97),
"totalDocsExamined" : NumberInt(97),
The number of documents returned / examined are 1:1 ratio.
Also the query is using
"indexName" : "server_time_stamp_1_uid_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"uid" : [
],
"server_time_stamp" : [
]
},
I think there is something is missing in both queries. May be the sort is not mentioned in the good plan. Can you please check.
I believe that the issue here was memory. The instance was operating near the limit of physical memory. I can't say for sure, but I believe that the relevant index was being removed from memory, and that the poor query performance was a result of that. Regenerating the index forced it back into memory (assumedly, something else got kicked out of memory.)
I've put the instance on node with much more memory, and so far it seems to be performing well.
I have a txt file with mongoDB queries, like this:
db.telephone.find({'brand' : 'Apple'});
db.telephone.find({'brand' : 'Samsung'});
...to a total of about 1500 rows. I am executing this query like this.
mongo myDatabase C:\path\mongoDB.txt
Now I need to measure the time how long it takes to execute all of these queries. I dont really care about the output, I really only care about the time it takes (as a part of an experiment).
I thought that if I create a collection times and insert current time to it like this db.times.insert({time: Date()}); at the beginning and end of the query file, it would do what I need, but it seemingly does not work, as both of these result times are the same in the end (and I believe that executing all these queries did take more than 1 second for sure).
Is this because I dont print the output, so the queries dont really get executed? Or why does this not work? And is there a better way how to measure the time it takes to execute these queries from a file? Thanks you.
You can assign start and end time in the file itself. The following is an example:
var start_time = new Date().valueOf();
db.telephone.find({'brand' : 'Apple'});
db.telephone.find({'brand' : 'Samsung'});
var end_time = new Date().valueOf();
print(end_time-start_time);
How we can precisely measure the execution time?
To analyze the query, we can use explain(). It returns the complete statistics of the query. The following is an example:
db.telephone.find({'brand' : 'Apple'}).explain("executionStats")
Output:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "check.telephone",
"indexFilterSet" : false,
"parsedQuery" : {
"brand" : {
"$eq" : "Apple"
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"brand" : {
"$eq" : "Apple"
}
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 35,
"totalKeysExamined" : 0,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"brand" : {
"$eq" : "Apple"
}
},
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 3,
"advanced" : 1,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 1
}
},
"serverInfo" : {
"host" : "theMechanic",
"port" : 27017,
"version" : "4.0.11",
"gitVersion" : "417d1a712e9f040d54beca8e4943edce218e9a8c"
},
"ok" : 1
}
Note: The executionStats.executionTimeMillis holds actual query execution time.
I've got a Mongo collection with 1691721 items within it, containing essentially location information. I'm attempting to do a regex search into this, and it's pretty slow - but I don't understand why, as I thought I had appropriate indexes in place.
A typical document
{
"_id" : ObjectId("58c08029ef4468c8157455fa"),
"ng" : [
394235,
806529
],
"postcode" : "AB101AB"
}
Indexes
I've created a text index on the postcode field, which you can see here in the full list of indexes:
db.locations.getIndexes()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "Traders.locations"
},
{
"v" : 2,
"key" : {
"_fts" : "text",
"_ftsx" : 1
},
"name" : "postcode_text",
"ns" : "Traders.locations",
"weights" : {
"postcode" : 1
},
"default_language" : "english",
"language_override" : "language",
"textIndexVersion" : 3
}
]
Query
At this point in time, all I care about is the postcode field. So I've tried writing a query to obtain the last value:
db.locations.find({ postcode: { $regex: /^ZE29XN$/ } }, { postcode: 1, _id: 0 })
Now this takes a while to run, roughly 700ms to be precise which is a lot longer than I was expecting. As far as I was concerned this is a covered query, I've got a text index on the single field I care about. However if I explain the above query it suggests it's using a COLLSCAN, but I don't understand why:
db.locations.find({ postcode: { $regex: /^ZE29XN$/ } }, { postcode: 1, _id: 0 }).explain("allPlansExecution")
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "Traders.locations",
"indexFilterSet" : false,
"parsedQuery" : {
"postcode" : {
"$regex" : "^ZE29XN$"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"postcode" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"postcode" : {
"$regex" : "^ZE29XN$"
}
},
"direction" : "forward"
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 732,
"totalKeysExamined" : 0,
"totalDocsExamined" : 1691721,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 1,
"executionTimeMillisEstimate" : 697,
"works" : 1691723,
"advanced" : 1,
"needTime" : 1691721,
"needYield" : 0,
"saveState" : 13223,
"restoreState" : 13223,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"postcode" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"postcode" : {
"$regex" : "^ZE29XN$"
}
},
"nReturned" : 1,
"executionTimeMillisEstimate" : 676,
"works" : 1691723,
"advanced" : 1,
"needTime" : 1691721,
"needYield" : 0,
"saveState" : 13223,
"restoreState" : 13223,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 1691721
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "DESKTOP",
"port" : 27017,
"version" : "3.4.2",
"gitVersion" : "3f76e40c105fc223b3e5aac3e20dcd026b83b38b"
},
"ok" : 1
}
My Question
Why is the text index I've created not being used, and ultimately how can I make my query faster?
I should note that I'm open to alternative to using the $regex however I do need to be able to allow for a "starts with" - so ZE.* or ZE2.* or ZE29XN should all be searchable at speed.
Something potentially useful to note, I wondered if when I eventually get my index working, that marking it as unique: true might help speed things up. However running it produced a duplicate key error (despite the fact I can't find one when I run an aggregation - can dig into this if need be, but I'm not sure it's related).
A quick summary of text search operators in MongoDB:
$regex: provides regular expression capabilities for pattern matching strings in queries. The $regex operator does support partial matches but it will only be index covered if the search string is anchored (i.e. uses a leading ^).
$text: performs a text search on the content of the fields indexed with a text index (use of the $text operator is a necessary precondition for MongoDB to use a text index). These searches are typically 'fast' (subjective term but when you have one working you'll see what that means) but they do not support partial matches so you won't be able to 'text search' for partial postcodes.
With that in mind it looks like you are attempting to use partial matching (via $regex) against a text index. This will not work because a text index is only engaged for the $text operator.
Your stated requirements are:
You want partial string matches
You want index coverage
You can meet these requirements by (1) using $regex and (2) indexing (a normal index not a text index) the postcode field. This is subject to one (important!) caveat: your search strings must be anchored. So this requirement: "need to be able to allow for a "starts with" - so ZE.* or ZE2.* or ZE29XN" should be fine. But a search such as: .*29XN will not be index covered.
I am facing problem to create covered query. I am using Mongo 3 latest version. Here is my sample data which I have inserted 10006 documents into MongoDB.
db.order.insert({ _id: 1, cust_id: "abc1", ord_date: ISODate("2012-11-02T17:04:11.102Z"), status: "A", amount: 50 })
db.order.insert({ _id: 2, cust_id: "xyz1", ord_date: ISODate("2013-10-01T17:04:11.102Z"), status: "A", amount: 100 })
db.order.insert({ _id: 3, cust_id: "xyz1", ord_date: ISODate("2013-10-12T17:04:11.102Z"), status: "D", amount: 25 })
db.order.insert({ _id: 4, cust_id: "xyz1", ord_date: ISODate("2013-10-11T17:04:11.102Z"), status: "D", amount: 125 })
db.order.insert({ _id: 5, cust_id: "abc1", ord_date: ISODate("2013-11-12T17:04:11.102Z"), status: "A", amount: 25 })
For Covered Query, All the fields in the query are part of an index so I have created index for status, ord_date, cust_id and amount fields like :
db.orders.createIndex({status: 1})
db.orders.createIndex({amount: 1})
db.orders.createIndex({ord_date: 1})
db.orders.createIndex({cust_id: 1})
I have executed following query.
db.orders.find(
{status : "A"},{ord_date : 1, cust_id : 1}
).sort({ amount: -1 }).explain()
But This explain query returns executionStats.totalDocsExamined = 200 instead of executionStats.totalDocsExamined = 0. means it is scan documents when I execute query. In Mongo 3, We can check index covered a query using executionStats.totalDocsExamined instead of indexOnly.
Can anyone please suggest me what I am doing wrong in covered query ?
Here is my output after index suggestion by Markus:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "local.orders",
"indexFilterSet" : false,
"parsedQuery" : {
"status" : {
"$eq" : "A"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"_id" : 1,
"ord_date" : 1,
"cust_id" : 1
},
"inputStage" : {
"stage" : "SORT",
"sortPattern" : {
"amount" : -1
},
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"status" : {
"$eq" : "A"
}
},
"direction" : "forward"
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 10004,
"executionTimeMillis" : 70,
"totalKeysExamined" : 0,
"totalDocsExamined" : 10018,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 10004,
"executionTimeMillisEstimate" : 70,
"works" : 20026,
"advanced" : 10004,
"needTime" : 10021,
"needFetch" : 0,
"saveState" : 157,
"restoreState" : 157,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"_id" : 1,
"ord_date" : 1,
"cust_id" : 1
},
"inputStage" : {
"stage" : "SORT",
"nReturned" : 10004,
"executionTimeMillisEstimate" : 70,
"works" : 20026,
"advanced" : 10004,
"needTime" : 10020,
"needFetch" : 0,
"saveState" : 157,
"restoreState" : 157,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"amount" : -1
},
"memUsage" : 960384,
"memLimit" : 33554432,
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"status" : {
"$eq" : "A"
}
},
"nReturned" : 10004,
"executionTimeMillisEstimate" : 10,
"works" : 10020,
"advanced" : 10004,
"needTime" : 15,
"needFetch" : 0,
"saveState" : 157,
"restoreState" : 157,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 10018
}
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "pcd32",
"port" : 27017,
"version" : "3.0.7",
"gitVersion" : "6ce7cbe8c6b899552dadd907604559806aa2esd5"
}
}
While there are index intersections in MongoDB, they can be quite tricky to utilize. However, sticking to a rule of thumb is a rather safe bet:
When creating queries MongoDB, assume that only one index can be used at a time
This is especially true for covered queries, as detailed in the docs:
An index covers a query when both of the following apply:
all the fields in the query are part of an index, and
all the fields returned in the results are in the same index.
Having a compound index doesn't have drawbacks, when carefully crafted, as queries using only parts of that index can use it, too.
So in order to make your query covered, you need to have all keys you want to return in your index. Since you did not limit the fields returned ("projection" in MongoDB terms), I assume you need the _id field to be returned as well. Furthermore, your index should reflect your sorting order. So your index should look like:
db.orders.createIndex({_id:1,status:1, ord_date:1,cust_id:1,amount:-1})
for your query. Order matters, so in order to make best use of the newly created index, other queries should adhere to the same order of fields.
If you also need the _id field, then the below compound index should give you a covered query:
db.order.createIndex({status:1, amount:-1, ord_date:1, cust_id :1, _id:1})
If you don't need the _id field then use _id : 0 in the find(), so that _id is not retrieved and you can remove it from the index as well.
Note that in a covered query, ordering of the fields as compared to the actual query being executed is important for the index to be used in the execution of the query.
I have the following structure in the database:
{
"_id" : {
"user" : 14197,
"date" : ISODate("2014-10-24T00:00:00.000Z")
},
...
}
I have a performance problem when I try to select data by user & date-range. Monogo doesn't use index & runs full-scan over collection.
db.timeuse.daily.find({ "_id.user": 289006, "_id.date" : {$gt: ISODate("2014-10-23T00:00:00Z"), $lte: ISODate("2014-10-30T00:00:00Z")}}).explain()
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 6,
"nscannedObjects" : 66967,
"nscanned" : 66967,
"nscannedObjectsAllPlans" : 66967,
"nscannedAllPlans" : 66967,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 523,
"nChunkSkips" : 0,
"millis" : 1392,
"server" : "mongo-shard0003:27018",
"filterSet" : false,
"stats" : {
"type" : "COLLSCAN",
"works" : 66969,
"yields" : 523,
"unyields" : 523,
"invalidates" : 16,
"advanced" : 6,
"needTime" : 66962,
"needFetch" : 0,
"isEOF" : 1,
"docsTested" : 66967,
"children" : [ ]
},
"millis" : 1392
}
So far I found only one way - use $in.
db.timeuse.daily.find({"_id": { $in: [
{"user": 289006, "date": ISODate("2014-10-23T00:00:00Z")},
{"user": 289006, "date": ISODate("2014-10-24T00:00:00Z")}
]}}).explain()
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"_id" : [
[
{
"user" : 289006,
"date" : ISODate("2014-10-23T00:00:00Z")
},
{
"user" : 289006,
"date" : ISODate("2014-10-23T00:00:00Z")
}
],
[
{
"user" : 289006,
"date" : ISODate("2014-10-24T00:00:00Z")
},
{
"user" : 289006,
"date" : ISODate("2014-10-24T00:00:00Z")
}
]
]
},
If there's a more elegant way to run this kind of query?
TL;DR: Don't put your data in the _id field and use a compound index: db.timeuse.daily.ensureIndex( { "user" : 1, "date": 1 } ).
Explanation:
You're abusing the _id key convention, or more precisely the fact that MongoDB can index entire objects. What you want to achieve requires index intersection or a compound index, that is, either two separate indexes that can be combined (that feature is called index intersection and by now, it should be available in MongoDB, but it has limitations) or a special index for the set of keys which in MongoDB is called a compound index.
The _id field is indexed by default, but it's indexed as a whole, i.e. the _id index with only support equality queries on the entire object, rather than parts of the object. That also explains why the $in query works.
In general, that data structure with the default index will behave oddly. Consider this:
> db.sort.insert({"_id" : {"name" : "foo", value : 1} });
> db.sort.insert({"_id" : {"name" : "foo", value : 1, bla : "foo"} });
> db.sort.find();
{ "_id" : { "name" : "foo", "value" : 4343 } }
{ "_id" : { "name" : "foo", "value" : 4343, "bla" : "fooffo" } }
> db.sort.find({"_id" : {"name" : "foo", value : 4343} });
{ "_id" : { "name" : "foo", "value" : 4343 } }
// no second result here...
Imagine MongoDB basically hashed the entire object and was simply looking for the object hash - such an index can't support range queries based on some part of the hash.