Yet another mongodb indexOnly - mongodb

I've spent the better part of this morning re-reading MongoDB docs, blogs and other answers on the stack her and I'm still missing something which I hope is painfully obvious to others.
EDIT: I've changed the scheme of the document to not have sub-documents (metadata.*) and am still having problems with the index not being covered. I've dropped the existing indexes and re-indexed with new ones:
So not I've got:
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.daily"
},
{
"v" : 1,
"key" : {
"host" : 1,
"cid" : 1,
"title" : 1,
"urls" : 1,
"global" : -1,
"current" : -1,
"total" : -1
},
"name" : "byHostTotals",
"ns" : "test.daily"
},
{
"v" : 1,
"key" : {
"host" : 1,
"cid" : 1,
"title" : 1,
"urls" : 1,
"total" : -1,
"global" : -1,
"current" : -1
},
"name" : "byHostCurrents",
"ns" : "test.daily"
}
]
And given this query:
db.daily.find({'host': 'example.com'}, {'_id': 0, 'cid': 1, 'title': 1, 'current': 1}).hint("byHostCurrents").sort({'current': -1}).limit(10).explain()
is not showing up as being covered by the index named "byHostCurrent":
{
"clauses" : [
{
"cursor" : "BtreeCursor byHostCurrents",
"isMultiKey" : true,
"n" : 10,
"nscannedObjects" : 1090,
"nscanned" : 1111,
"scanAndOrder" : true,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"host" : [
[
"example.com",
"example.com"
]
],
"cid" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"title" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"total" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
],
"global" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
],
"current" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"cursor" : "BtreeCursor ",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : true,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"host" : [
[
"usatoday.com",
"usatoday.com"
]
],
"cid" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"title" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"total" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
],
"global" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
],
"current" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 10,
"nscannedObjects" : 1090,
"nscanned" : 1111,
"nscannedObjectsAllPlans" : 1090,
"nscannedAllPlans" : 1111,
"scanAndOrder" : false,
"nYields" : 8,
"nChunkSkips" : 0,
"millis" : 9,
"server" : "ubuntu:27017",
"filterSet" : false
}
MongoDB version is: 2.6.3.

So here's the skinny...
When the Query:
db.daily.find({'host': 'example.com'}, {'_id': 0, 'cid': 1, 'title': 1, 'current': 1}).hint("byHostCurrents").sort({'current': -1}).limit(10);
If I didn't have the .sort() in there it would use the index, however since I'm using sort NOW the ORDER of the Indexed fields become important.
For the above query to use an index, I'd need to make a new index like this:
db.daily.ensureIndex({'current': -1, 'host': 1, 'cid': 1, 'title': 1});
Now with this index in place, we'll get indexOnly: true since we're looking down the total currents in reverse order (descending) and we'll only have to scan as many entries in the index as is needed to meet the 'host' = 'example.com' and limit requirements.
So in total I had to have 4 additional indexes to support my queries:
one to find the content ids with the most current people on it (the above index)
one to find the content ids that have had the most people on it (like the above but totals: -1 rather than current:-1)
one to find content by host sorted by current (see index below) and,
one to find content by host sorted by total (like the one below)
db.daily.ensureIndex({'host': 1, 'current': -1, 'cid': 1, 'title': 1});
So the MongoDB docs are not very clear on their explanation of these things especially when looking at the sort issue. What they lack to say is if you are going to use a sort, you have to include the prefix fields after your equality query or to include all the prefix fields.
For example given my original index from my question:
db.daily.ensureIndex({"host" : 1, "cid" : 1, "title" : 1, "urls" : 1, "global" : -1, "current" : -1, "total" : -1});
If I wanted a query to be covered by the index then I'd have to change from this:
db.daily.find({'host': 'example.com'}, {'_id': 0, 'cid': 1, 'title': 1, 'current': 1}).hint("byHostCurrents").sort({'current': -1}).limit(10);
To This:
db.daily.find({'host': 'example.com'}, {'_id': 0, 'cid': 1, 'title': 1, 'current': 1}).hint("byHostCurrents").sort({'cid':1, 'title':1, 'urls': 1, 'global: 1, 'current': -1}).limit(10);
which is not what I wanted.
Hope this helps someone in the future.

Related

MongoDB picking wrong index

The following document is stored in a collection:
"ldr": {
"d": NumberInt(318),
"w": NumberInt(46),
"m": NumberInt(10),
"pts": [
{
"lid": ObjectId("47cc67093475061e3d95369d"),
"dPts": NumberLong(110),
"wPts": NumberLong(110),
"mPts": NumberLong(220),
"aPts": NumberLong(3340)
},
{
"lid": ObjectId("56316279be4f0eda62ebfee0"),
"dPts": NumberInt(0),
"wPts": NumberInt(0),
"mPts": NumberInt(0),
"aPts": NumberInt(0)
}
]
}
I have 4 indexes on a collection:
ldr.pts.lid_1_ldr.d_1_ldr.pts.dPts_-1
ldr.pts.lid_1_ldr.w_1_ldr.pts.wPts_-1
ldr.pts.lid_1_ldr.m_1_ldr.pts.mPts_-1
ldr.pts.lid_1_ldr.pts.aPts_-1
I use the following query:
db.my_collection.find({"ldr.pts.lid":ObjectId("47cc67093475061e3d95369d"), "ldr.w": NumberInt(46)},{"ldr":1}).sort({"ldr.pts.wPts":-1}).explain()
Note: I have run this query with the {ldr:1} left out with the same result.
I would expect the query above to use the following index:
ldr.pts.lid_1_ldr.w_1_ldr.pts.wPts_-1
However, this is the result of the explain:
{
"cursor" : "BtreeCursor ldr.pts.lid_1_ldr.d_1_ldr.pts.dPts_-1",
"isMultiKey" : true,
"n" : 3,
"nscannedObjects" : 4,
"nscanned" : 4,
"nscannedObjectsAllPlans" : 16,
"nscannedAllPlans" : 16,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"ldr.pts.lid" : [
[
ObjectId("47cc67093475061e3d95369d"),
ObjectId("47cc67093475061e3d95369d")
]
],
"ldr.d" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"ldr.pts.dPts" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
},
"server" : "Beast-PC:27017",
"filterSet" : false
}
As you can see the first index is being picked.
I've tried using a hint and supplying the correct index but that still results in indexOnly being false and in scanAndOrder being true.
Any ideas?
Sorting on a field within an array isn't likely to produce what you're expecting as your descending sort on ldr.pts.wPts will sort based on the max of all the wPts values from each document's pts array, rather than just the wPts value from the matching pts array element.
That's at the root of why your query can't use an index for the sorting.

why is mongodb hitting this index

Given that i have an index in my collection asd
> db.system.indexes.find().pretty()
{ "v" : 1, "key" : { "_id" : 1 }, "ns" : "asd.test", "name" : "_id_" },
{
"v" : 1,
"key" : {
"a" : 1,
"b" : 1,
"c" : 1
},
"ns" : "asd.test",
"name" : "a_1_b_1_c_1"
}
As far as i know in theory the order of the parameters queried is important in order to hit an index...
That is why im wondering how and why im actually hitting the index with this query
> db.asd.find({c:{$gt: 5000},a:{$gt:5000}}).explain()
{
"cursor" : "BtreeCursor a_1_b_1_c_1",
"isMultiKey" : false,
"n" : 90183,
"nscannedObjects" : 90183,
"nscanned" : 94885,
"nscannedObjectsAllPlans" : 90288,
"nscannedAllPlans" : 94990,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 1,
"nChunkSkips" : 0,
"millis" : 272,
"indexBounds" : {
"a" : [
[
5000,
1.7976931348623157e+308
]
],
"b" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"c" : [
[
5000,
1.7976931348623157e+308
]
]
}
}
Order in which you pass fields in your query does not affect index selection process. If it did, it'd be a very fragile system.
Order of fields in the index definition, on the other hand, is very important. Maybe you confuse these two cases.

Why does MongoDB's "$and" operator sometimes use a different plan vs. specifying the criteria inline?

It seems to me that the following two queries should have exactly the same "explain" output:
Query 1:
{
$and: [
{ $or: [
{ Foo: "123" },
{ Bar: "456" }
] },
{ Baz: { $in: ["abc", "def"] } }
]
}
Query 2:
{
$or: [
{ Foo: "123" },
{ Bar: "456" }
],
Baz: { $in: ["abc", "def"] } }
}
Note that I have indexes on { Foo: -1, Baz: -1 } and { Bar: -1, Baz: -1 }, so this is optimized for the $or operator. And in fact, in the version for Query 2, in the explain output, I see two clauses, both with appropriate index bounds, one for (Foo, Baz) and one for (Bar, Baz). MongoDB is doing exactly what it's supposed to.
But in the first version (Query 1), there are no clauses anymore. It gives me a BasicCursor with no index bounds specified.
What's the difference between these two queries? Why does Mongo seem to be able to optimize #2 but not #1?
Right now I'm testing these queries using MongoVue, so I have control over the JSON, but ultimately I'm going to be using the C# driver, and I'm pretty sure it will always emit the syntax in #1 and not #2, so it's important to find out what's going on...
This seems to be a bug of some kind in mongodb. What version are you using?
According to that bug report the issue is resolved in 2.5.3.
Until we move to the later versions (I am at 2.4.6) we will have to be careful with the $and operator.
I am going to try it in 2.6 as well.
UPDATE:
Indeed it is fixed in 2.6.3 that I am now.
> db.test.find()
{ "_id" : 1, "Fields" : { "K1" : 123, "K2" : 456 } }
{ "_id" : 2, "Fields" : { "K1" : 456, "K2" : 123 } }
> db.test.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.test"
},
{
"v" : 1,
"key" : {
"Fields.K1" : 1
},
"name" : "Fields.K1_1",
"ns" : "test.test"
},
{
"v" : 1,
"key" : {
"Fields.K2" : 1
},
"name" : "Fields.K2_1",
"ns" : "test.test"
}
]
> db.test.find({"$and" : [{ "Fields.K1" : 123, "Fields.K2" : 456}]}).explain()
{
"cursor" : "BtreeCursor Fields.K1_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"Fields.K1" : [
[
123,
123
]
]
},
"server" : "benihime:27017",
"filterSet" : false
}
> db.test.find({ "Fields.K1" : 123, "Fields.K2" : 456}).explain()
{
"cursor" : "BtreeCursor Fields.K1_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"Fields.K1" : [
[
123,
123
]
]
},
"server" : "benihime:27017",
"filterSet" : false
}

MongoDB indexOnly false

I have created an index for the category field, then execute a find for {category: 'Example'} with the field selection like {_id: 0, category: 1}. Running explain on this query shows indexOnly to be false. It's really slow. What am I missing here?
EDIT
Explain:
{
"cursor" : "BtreeCursor title",
"isMultiKey" : false,
"n" : 2642,
"nscannedObjects" : 2642,
"nscanned" : 2642,
"nscannedObjectsAllPlans" : 2642,
"nscannedAllPlans" : 2642,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 3,
"indexBounds" : {
"category" : [
[
"TV",
"TV"
]
],
"title" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "DeathDesk:27017"
}
EDIT 2
getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"ns" : "test.media",
"name" : "_id_"
},
{
"v" : 1,
"key" : {
"category" : 1
},
"ns" : "test.media",
"name" : "category",
"dropDups" : false,
"background" : false
}
]

Mongo Index not being used

I created an index around several items for a particular query I am doing:
{
"v" : 1,
"key" : {
"MODIFIED" : -1,
"state" : 1,
"fail" : 1,
"generated" : 1
},
"ns" : "foo.bar",
"name" : "MODIFIED_-1_state_1_fail_1_generated"
}
However when I execute my query, it doesn't apear to be using my index. Could you please provide some insite into what I'm doing wrong?
Thank you!
db.foo.find( {
"$or": [
{
"MODIFIED": {
"$gt": {
"sec": 1321419600,
"usec": 0
}
}
},
{
"$or": [
{"state": "ca"},
{"state": "ok"}
]
}
],
"$and": [
{"fail": {"$ne": 1}},
{"generated": {"$exists": false}}
]
}).explain();
{
"cursor" : "BasicCursor",
"nscanned" : 464215,
"nscannedObjects" : 464215,
"n" : 0,
"millis" : 7549,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
There's a good reason your index cannot be used for your query and I also think there are some issues with the query itself. The reason it's not hitting an index is because of the nested $or operator by the way but I think your actual problem is a lack of understanding on all the operators available to you in MongoDB :
First of all, your nested $or to check if the state is either "ca" or "ok" is not necessary and ( since it's the main reason you're not hitting your index) can be replaced with state:{$in:["ca", "ok"]} which does the exact same thing. Now your query is :
db.foo.find( {
"$or": [
{
"MODIFIED": {
"$gt": {
"sec": 1321419600,
"usec": 0
}
}
},
{
state:{$in:["ca", "ok"]}
}
],
"$and": [
{"fail": {"$ne": 1}},
{"generated": {"$exists": false}}
]
}).explain();
And it will hit your index. Your second issue is that a top-level $and clause is not necessary. Note that AND(OR(A, B), AND(C, D)) = AND(OR(A, B), C, D). This query does the same :
db.foo.find( {
"$or": [
{
"MODIFIED": {
"$gt": {
"sec": 1321419600,
"usec": 0
}
}
},
{
state:{$in:["ca", "ok"]}
}
],
"fail": {"$ne": 1},
"generated": {"$exists": false}
}).explain();
Which still hits the index :
{
"clauses" : [
{
"cursor" : "BtreeCursor MODIFIED_-1_state_1_fail_1_generated_1 multi",
"nscanned" : 0,
"nscannedObjects" : 0,
"n" : 0,
"millis" : 1,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"MODIFIED" : [
[
{
"$maxElement" : 1
},
{
"sec" : 1321419600,
"usec" : 0
}
]
],
"state" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"fail" : [
[
{
"$minElement" : 1
},
1
],
[
1,
{
"$maxElement" : 1
}
]
],
"generated" : [
[
null,
null
]
]
}
},
{
"cursor" : "BasicCursor",
"nscanned" : 0,
"nscannedObjects" : 0,
"n" : 0,
"millis" : 1,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
],
"nscanned" : 0,
"nscannedObjects" : 0,
"n" : 0,
"millis" : 1
}
Hope that helps! By the way it's slightly more conventional to start the first key in your compound index with order 1 and the second with -1. Note that the -1 is only used to determine the direction relative to the previous field.