MongoDB index use on find all without hint - mongodb

I've got a question on how to write an index properly to avoid resorting to a hint.
Sample "Test" Collection Schema
{
_id: ObjectId(<whatever>),
a: <whatever>,
b: <whatever>,
c: <whatever>,
d: <whatever>,
e: {
f: <whatever>,
g: <whatever>
}
}
Index on "Test"
db.test.ensureIndex( { "a": NumberInt(1), "c": NumberInt(1), "_id": NumberInt(1), "d": NumberInt(1) },
{ name: "a_1_c_1__id_1_d_1", background: true } );
Query without hint and query with hint...
> db.test.find({},{d:1}).explain();
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 752,
"nscannedObjects" : 752,
"nscanned" : 752,
"nscannedObjectsAllPlans" : 752,
"nscannedAllPlans" : 752,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 4,
"nChunkSkips" : 0,
"millis" : 5,
"indexBounds" : {
},
"server" : <whatever>
}
> db.test.find({},{d:1}).hint("a_1_c_1__id_1_d_1").explain();
{
"cursor" : "BtreeCursor a_1_c_1__id_1_d_1",
"isMultiKey" : false,
"n" : 752,
"nscannedObjects" : 752,
"nscanned" : 752,
"nscannedObjectsAllPlans" : 752,
"nscannedAllPlans" : 752,
"scanAndOrder" : false,
"indexOnly" : true,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"c" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"_id" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"d" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : <whatever>
}
I'd (obviously) like the query to use the covered index but I don't know how to get there without using the hint. Is it possible? I'd prefer to manipulate the index vs. changing the query but changing the query is an option, if need be.

Turns out this is a known issue. Apologies for the post.
https://jira.mongodb.org/browse/SERVER-2109

Related

query wouldn't use suitable index

I have a query
db.Product.find({
CategoryPath: /^399-305-352(-\d+)*$/,
"Availability.Status": {
$lt: 4
},
$or: [{
_id: {
$lt: 331000000
}
}, {
_id: {
$gt: 852000000,
$lt: 853000000
}
}, {
_id: {
$gt: 972000000,
$lt: 973000000
}
}]
}).sort({
"Availability.Status": 1,
Popularity: -1
});
with explain I find it uses index Availability.Status_1_Popularity_-1:
{
"cursor" : "BtreeCursor Availability.Status_1_Popularity_-1",
"isMultiKey" : false,
"n" : 913,
"nscannedObjects" : 470239,
"nscanned" : 470239,
"nscannedObjectsAllPlans" : 1387264,
"nscannedAllPlans" : 1387264,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 10838,
"nChunkSkips" : 0,
"millis" : 10117,
"indexBounds" : {
"Availability.Status" : [
[
-Infinity,
4
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
},
"server" : "dal05mgo13.sl.dx:27017",
"filterSet" : false
}
this is pretty slow for me. I actually have another index CategoryPath_1_Availability.Status_1 which I think is a better choice. but when I force mongodb to use it with hint, I get an error:
{
"$err" : "Runner error: Overflow sort stage buffered data usage of 33581891 bytes exceeds internal limit of 33554432 bytes",
"code" : 17144
}
Now what I don't understand is, with the conditions specified in find, there are only 913 results selected, even without index, sort shouldn't have used up 32MB memory to sort the 913 records. Can anyone tell me what's happening?
I'm using MongoDB 2.6.10 x86_64
EDIT: my colleague just created a new index Availability.Status_1_Popularity_-1_CategoryPath_1 which is now winning from other plans. I still don't understand why though. Here are the detail explain info:
{
"cursor" : "BtreeCursor Availability.Status_1_Popularity_-1_CategoryPath_1",
"isMultiKey" : true,
"n" : 913,
"nscannedObjects" : 1325,
"nscanned" : 1930,
"nscannedObjectsAllPlans" : 7729,
"nscannedAllPlans" : 8334,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 64,
"nChunkSkips" : 0,
"millis" : 45,
"indexBounds" : {
"Availability.Status" : [
[
-Infinity,
4
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
],
"CategoryPath" : [
[
"399-305-352",
"399-305-353"
],
[
/^399-305-352(-\d+)*$/,
/^399-305-352(-\d+)*$/
]
]
},
"allPlans" : [
{
"cursor" : "BtreeCursor Availability.Status_1_Popularity_-1_CategoryPath_1",
"isMultiKey" : true,
"n" : 913,
"nscannedObjects" : 1325,
"nscanned" : 1930,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"Availability.Status" : [
[
-Infinity,
4
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
],
"CategoryPath" : [
[
"399-305-352",
"399-305-353"
],
[
/^399-305-352(-\d+)*$/,
/^399-305-352(-\d+)*$/
]
]
}
},
{
"cursor" : "BtreeCursor Availability.Status_1_Popularity_-1",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 306,
"nscanned" : 306,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"Availability.Status" : [
[
-Infinity,
4
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"clauses" : [
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 305,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
852000000,
853000000
]
]
}
},
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
972000000,
973000000
]
]
}
},
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
-Infinity,
331000000
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 0,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : true,
"nChunkSkips" : 0
},
{
"clauses" : [
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 7,
"nscannedObjects" : 7,
"nscanned" : 7,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
972000000,
973000000
]
]
}
},
{
"cursor" : "BtreeCursor _id_1_Availability.Status_1_Popularity_-1",
"isMultiKey" : false,
"n" : 297,
"nscannedObjects" : 297,
"nscanned" : 297,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
852000000,
853000000
]
],
"Availability.Status" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
-Infinity,
331000000
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 0,
"nscannedObjects" : 304,
"nscanned" : 304,
"scanAndOrder" : true,
"nChunkSkips" : 0
},
{
"clauses" : [
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 305,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
852000000,
853000000
]
]
}
},
{
"cursor" : "BtreeCursor _id_1_Availability.Status_1_Popularity_-1",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
972000000,
973000000
]
],
"Availability.Status" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
-Infinity,
331000000
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 0,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : true,
"nChunkSkips" : 0
},
{
"clauses" : [
{
"cursor" : "BtreeCursor _id_1_Availability.Status_1_Popularity_-1",
"isMultiKey" : false,
"n" : 305,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
852000000,
853000000
]
],
"Availability.Status" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"cursor" : "BtreeCursor _id_1_Availability.Status_1_Popularity_-1",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
972000000,
973000000
]
],
"Availability.Status" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
-Infinity,
331000000
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 0,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : true,
"nChunkSkips" : 0
},
{
"clauses" : [
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 305,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
852000000,
853000000
]
]
}
},
{
"cursor" : "BtreeCursor _id_",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
972000000,
973000000
]
]
}
},
{
"cursor" : "BtreeCursor _id_1_Availability.Status_1_Popularity_-1",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"_id" : [
[
-Infinity,
331000000
]
],
"Availability.Status" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"Popularity" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 0,
"nscannedObjects" : 305,
"nscanned" : 305,
"scanAndOrder" : true,
"nChunkSkips" : 0
}
],
"server" : "dal05mgo12.sl.dx:27017",
"filterSet" : false,
"stats" : {
"type" : "FETCH",
"works" : 1931,
"yields" : 64,
"unyields" : 64,
"invalidates" : 0,
"advanced" : 913,
"needTime" : 1017,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 913,
"children" : [
{
"type" : "IXSCAN",
"works" : 1931,
"yields" : 64,
"unyields" : 64,
"invalidates" : 0,
"advanced" : 1325,
"needTime" : 605,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ Availability.Status: 1.0, Popularity: -1.0, CategoryPath: 1.0 }",
"isMultiKey" : 1,
"boundsVerbose" : "field #0['Availability.Status']: [-inf.0, 4.0), field #1['Popularity']: [MaxKey, MinKey], field #2['CategoryPath']: [\"399-305-352\", \"399-305-353\"), [/^399-305-352(-\\d+)*$/, /^399-305-352(-\\d+)*$/]",
"yieldMovedCursor" : 0,
"dupsTested" : 1325,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 1930,
"children" : [ ]
}
]
}
}
That's because the order of the fields in the index matters.
CategoryPath_1_Availability.Status_1 translates for the following indexing order: index by CategoryPath then by Availability.Status.
But you want the index to start with Availability.Status, and mongo checks to see if there is any index that starts with Availability.Status (ascending), and there isn't (before your friend created the new index). Order matters :)
As a general rule: lets say you have the following fields in a collection: a, b and c. Let's set an index on a and b (a ascending, b ascending). You will be able to use this index in a sort only if the sort starts with a (ascending), or a (ascending) and b (ascending). Hope you understand :)

Mongodb Compound index and sorting

I have this collection :
db.place.find() :
{
_id : "p1",
alterNames : ["abcd","abcD"],
population : 122
}
{
_id : "p2",
alterNames : ["qsdf","qsDF"],
population : 100
}
I want to find documents having alterNames starting with "ab" and sort them by population.
I created this index : {alterNames : 1, population : -1}
My query :
db.place.find({alterNames : /^ab/}).sort({population : -1}).limit(10).explain()
I was waiting to see "n" = "nScannedObjects" = 10
What I got :
"n" = 10
"nScannedObjects" = 4765
Did I miss a thing?
Edit :
Here is the full explain :
db.place.find({alterNames : /^pari/ }).sort({population : -1}).limit(10).explain()
"clauses" : [
{
"cursor" : "BtreeCursor alterNames_1_population_-1",
"isMultiKey" : true,
"n" : 10,
"nscannedObjects" : 4765,
"nscanned" : 4883,
"scanAndOrder" : true,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"alterNames" : [
[
"pari",
"parj"
],
[
/^pari/,
/^pari/
]
],
"population" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"cursor" : "BtreeCursor ",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : true,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"alterNames" : [
[
"pari",
"parj"
],
[
/^pari/,
/^pari/
]
],
"population" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 10,
"nscannedObjects" : 4765,
"nscanned" : 4883,
"nscannedObjectsAllPlans" : 4765,
"nscannedAllPlans" : 4883,
"scanAndOrder" : false,
"nYields" : 890,
"nChunkSkips" : 0,
"millis" : 396,
"server" : "localhost:27017",
"filterSet" : false
Your notation is confusing. I'm assuming your collection consists of documents that look like the two documents in the places array.
> db.test.find()
{ "_id" : "p1", "alterNames" : [ "abcd", "abcD" ], "population" : 122 }
{ "_id" : "p2", "alterNames" : [ "qsdf", "qsDF" ], "population" : 100 }
For a left-anchored regex like /^ab/, MongoDB converts the query to one that's actually a range query and can efficiently use the index
{ "alterNames" : /^ab/ } => { "alterNames" : { "$gte" : "ab", "$lt" : "ac" } }
Each value that matches the range, for example "abcd", has an index of population values below it for documents with an alterNames (multikey) value of "abcd". To return the matching documents in population-order, MongoDB has to externally sort the documents returned from each matching bucket. I believe that's the source of your higher nscannedObjects. If you check the explain (which would have been nice to include in its entirety), you should find scanAndOrder : true.

Why is this $elemMatch query not using my index?

My query:
{
"unique_contact_method.enrichments": {
"$not": {
"$elemMatch": {
"created_by.name": "fullcontact"
}
}
}
}
My Index:
{
v: 1,
name: "unique_contact_method.enrichments.created_by.name_1",
key: {
"unique_contact_method.enrichments.created_by.name": 1
},
ns: "app27434806.unique_contact_methods",
background: true,
safe: true
}
The .explain() result:
Why no index?
The use of the $not operator here is what makes index usage impossible. There is one statement in the documentation that "implies" this, if not completely clearly:
"Remember that the $not operator only affects other operators and cannot check fields and documents independently. So, use the $not operator for logical disjunctions and the $ne operator to test the contents of fields directly."
The essential phrase there is "cannot check fields", which means it does not actually "test" the value of the field as can be done with an index. A simple document explains this the best:
{
"_id" : ObjectId("53f3e414deee3a78e47e57e2"),
"created" : [ { "name" : "Bill" }, { "name" : "Ted" } ]
}
Where of course an index is created on "created.name".
Now consider the following query and explain output:
db.doctest.find({ "created": { "$elemMatch": { "name": "Bill" } } }).explain()
{
"cursor" : "BtreeCursor created.name_1",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"created.name" : [
[
"Bill",
"Bill"
]
]
},
"server" : "ubuntu:27017",
"filterSet" : false
}
That simply selects the index and shows the index bounds as expected.
Not look at this with $not, and I'm going to "force" the index with .hint():
db.doctest.find({ "created": { "$not": { "$elemMatch": { "name": "Bill" } } } }).hint({ "created.name": 1 }).explain()
{
"cursor" : "BtreeCursor created.name_1",
"isMultiKey" : true,
"n" : 0,
"nscannedObjects" : 1,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"created.name" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "ubuntu:27017",
"filterSet" : false
}
The important part to look at here is "indexBounds". This explains why without the hint the index would not be used, as simply put there are no "bounds" to select by. The $not operation basically says:
"Look at every value tested by the condition and if it is true then consider it false or essentially the reverse"
The end evaluation here is that "Ted" is not "Bill" therefore the condition is true, but there is no way to "look for that" using an index.
So the consideration here is how do you do the same thing and use an index? The passage from the documentation tells you that in order to consider the "field" you need to use the $ne operator instead:
db.doctest.find({ "created": { "$elemMatch": { "name": { "$ne": "Bill" } } } }).explain()
{
"cursor" : "BtreeCursor created.name_1",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"created.name" : [
[
{
"$minElement" : 1
},
"Bill"
],
[
"Bill",
{
"$maxElement" : 1
}
]
]
},
"server" : "ubuntu:27017",
"filterSet" : false
}
Now the "indexBounds" shows you that the index is used to essentially "filter out" the values that were supplied. So the index is used to pull any other value than "Bill".
The conclusion here is that $not has it's logical uses, but in many cases what you actually want is $ne instead. Where $not must be applied, take into consideration that and index for the field values will not be used to make the comparison.
Occasionally I find the index has been used in query automatically even though operator $not joins the action. It let me recall
this question which also confused me on a long moment. I try on the new clue and find something different. And I think I find the answer finally. Welcome to everyone to comment here if find something else different.
Run on mongo shell, V2.6.4
Initialize data as below:
> db.a.drop();
false
> db.a.insert({_id:1, a:[1,2,3], b:[{x:1, y:2}, {x:4, y:4}], c:1});
WriteResult({ "nInserted" : 1 })
> db.a.insert({_id:2, a:[4,2,3], b:[{x:1, y:2}, {x:4, y:4}], c:1});
WriteResult({ "nInserted" : 1 })
> db.a.ensureIndex({a:1}, {name:"a"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
> db.a.ensureIndex({"b.x":1}, {name:"bx"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 2,
"numIndexesAfter" : 3,
"ok" : 1
}
> db.a.ensureIndex({c:1}, {name:"c"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 3,
"numIndexesAfter" : 4,
"ok" : 1
}
> db.a.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"a" : 1
},
"name" : "a",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"b.x" : 1
},
"name" : "bx",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"c" : 1
},
"name" : "c",
"ns" : "test.a"
}
]
> db.a.find();
{ "_id" : 1, "a" : [ 1, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 2, "y" : 3 } ], "c" : 1 }
{ "_id" : 2, "a" : [ 4, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 4, "y" : 4 } ], "c" : 1 }
This block just simply proves that index will be properly used automatically even though $not joins the query action.
> db.a.find({c:{$not:{$gte:1}}}).explain();
{
"cursor" : "BtreeCursor c",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"c" : [
[
{
"$minElement" : 1
},
1
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
This is the style that the original question mentioned. Index has been used automatically.
> db.a.find({b:{$elemMatch:{x:{$gte:1}}}}).explain();
{
"cursor" : "BtreeCursor bx", // attention on this line
"isMultiKey" : true,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 4,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 9,
"indexBounds" : {
"b.x" : [
[
1,
Infinity
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
Index doesn't work when use operator $not preceding $elemMatch. It's the core of this question.
> db.a.find({b:{$not:{$elemMatch:{x:{$gte:1}}}}}).explain();
{
"cursor" : "BasicCursor", // attention on this line
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 2,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"server" : "Duke-PC:27017",
"filterSet" : false
}
This block: find some way to explain the mechanics of index on array filed.
Totally two documents, but nscanned: 6. This tells us something how the index has been structured on array type. That is, index node is on every element of array but not the array itself. I imagine the index structure on field a like this:
BTree: Node(value:1, entry:[entry({_id:1})]), Node(value:2, entry:[entry({_id:1}), entry({_id:2})]), ...
Of course, this is only my imagination for explanation. :)
> db.a.find({a:{$gte:1}}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 6, // attention on this line
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 6,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
1,
Infinity
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
When use operator $not, the relevant index has been adopted automatically. And the field "indexBounds" tells us how $not handles the query.
> db.a.find({a:{$not:{$gte:2}}},{_id:0,a:1}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true,
"n" : 0,
"nscannedObjects" : 1, // attention on this field
"nscanned" : 2, // attention on this field
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : { // attention on this field
"a" : [
[
{
"$minElement" : 1
},
2
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
Insert a new document with same field name a but not array.
> db.a.insert({a:1});
WriteResult({ "nInserted" : 1 })
> db.a.find();
{ "_id" : 1, "a" : [ 1, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 2, "y" : 3 } ], "c" : 1 }
{ "_id" : 2, "a" : [ 4, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 4, "y" : 4 } ], "c" : 1 }
{ "_id" : ObjectId("541e4fcbb65042180c128280"), "a" : 1 }
Please read this block comparing with just above content.
> db.a.find({a:{$not:{$gte:2}}},{_id:0,a:1}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true, // This tells engine there are repeated array elements on index.
"n" : 1,
"nscannedObjects" : 2, // The third document should only access the index to fetch data
// since it has enough information.
// But here engine still read from the collection. My unstanding is the engine
// can not distinguish whether this index field is an array element or not,
// so it has to access the collection to find more information.
"nscanned" : 3,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 3,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 25,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
2
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
Conclusion:
elemMatch is very special:
$elemMatch explicitly tells that the field "b" is an array.
And according to the query definition on this operator, any element found matching the query then true can be returned immediately. But only completing to scan all elements of the array and not finding any satisfying one, then false can be returned.
But index structure (think about my imagination above) on array can not support this kind of operation because engine can not determine which nodes on index are exactly from a certain array, if only by index. This is the most important point to explain this question.
Other operators have not this limit from their own query definition, such as $gte, $lt, ..., because only one matching can judge it's matched or not, which can be satisfied by index directly.
Finally, there is a way to solve the original question, but not perfectly because the whole element must be provided.
Index on the array field, not the element.
> db.a.ensureIndex({b:1});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 4,
"numIndexesAfter" : 5,
"ok" : 1
}
> db.a.find({b:{$ne:{x:2, y:3}}}).explain();
{
"cursor" : "BtreeCursor b_1",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 2,
"nscanned" : 4,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 33,
"indexBounds" : {
"b" : [
[
{
"$minElement" : 1
},
{
"x" : 2,
"y" : 3
}
],
[
{
"x" : 2,
"y" : 3
},
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}

why is mongodb hitting this index

Given that i have an index in my collection asd
> db.system.indexes.find().pretty()
{ "v" : 1, "key" : { "_id" : 1 }, "ns" : "asd.test", "name" : "_id_" },
{
"v" : 1,
"key" : {
"a" : 1,
"b" : 1,
"c" : 1
},
"ns" : "asd.test",
"name" : "a_1_b_1_c_1"
}
As far as i know in theory the order of the parameters queried is important in order to hit an index...
That is why im wondering how and why im actually hitting the index with this query
> db.asd.find({c:{$gt: 5000},a:{$gt:5000}}).explain()
{
"cursor" : "BtreeCursor a_1_b_1_c_1",
"isMultiKey" : false,
"n" : 90183,
"nscannedObjects" : 90183,
"nscanned" : 94885,
"nscannedObjectsAllPlans" : 90288,
"nscannedAllPlans" : 94990,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 1,
"nChunkSkips" : 0,
"millis" : 272,
"indexBounds" : {
"a" : [
[
5000,
1.7976931348623157e+308
]
],
"b" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"c" : [
[
5000,
1.7976931348623157e+308
]
]
}
}
Order in which you pass fields in your query does not affect index selection process. If it did, it'd be a very fragile system.
Order of fields in the index definition, on the other hand, is very important. Maybe you confuse these two cases.

MongoDB indexOnly false

I have created an index for the category field, then execute a find for {category: 'Example'} with the field selection like {_id: 0, category: 1}. Running explain on this query shows indexOnly to be false. It's really slow. What am I missing here?
EDIT
Explain:
{
"cursor" : "BtreeCursor title",
"isMultiKey" : false,
"n" : 2642,
"nscannedObjects" : 2642,
"nscanned" : 2642,
"nscannedObjectsAllPlans" : 2642,
"nscannedAllPlans" : 2642,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 3,
"indexBounds" : {
"category" : [
[
"TV",
"TV"
]
],
"title" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "DeathDesk:27017"
}
EDIT 2
getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"ns" : "test.media",
"name" : "_id_"
},
{
"v" : 1,
"key" : {
"category" : 1
},
"ns" : "test.media",
"name" : "category",
"dropDups" : false,
"background" : false
}
]