Improve querying fields exist in MongoDB - mongodb

I'm in progress with estimation of MongoDB for our customers. Per requirements we need associate with some entity ent variable set of name-value pairs.
db.ent.insert({'a':5775, 'b':'b1'})
db.ent.insert({'c':'its a c', 'b':'b2'})
db.ent.insert({'a':7557, 'c':'its a c'})
After this I need intensively query ent for presence of fields:
db.ent.find({'a':{$exists:true}})
db.ent.find({'c':{$exists:false}})
Per MongoDB docs:
$exists is not very efficient even with an index, and esp. with {$exists:true} since it will effectively have to scan all indexed values.
Can experts there provide more efficient way (even with shift the paradigm) to deal fast with vary name-value pairs

You can redesign your schema like this:
{
pairs:[
{k: "a", v: 5775},
{k: "b", v: "b1"},
]
}
Then you indexing your key:
db.people.ensureIndex({"pairs.k" : 1})
After this you will able to search by exact match:
db.ent.find({'pairs.k':"a"})
In case you go with Sparse index and your current schema, proposed by #WesFreeman, you will need to create an index on each key you want to search. It can affect write performance or will be not acceptable if your keys are not static.

Simply redesign your schema such that it's an indexable query. Your use case is infact analogous to the first example application given in MongoDB The Definitive Guide.
If you want/need the convenience of result.a just store the keys somewhere indexable.
instead of the existing:
db.ent.insert({a:5775, b:'b1'})
do
db.ent.insert({a:5775, b:'b1', index: ['a', 'b']})
That's then an indexable query:
db.end.find({index: "a"}).explain()
{
"cursor" : "BtreeCursor index_1",
"nscanned" : 1,
"nscannedObjects" : 1,
"n" : 1,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : true,
"indexOnly" : false,
"indexBounds" : {
"index" : [
[
"a",
"a"
]
]
}
}
or if you're ever likely to query also by value:
db.ent.insert({
a:5775,
b:'b1',
index: [
{name: 'a', value: 5775},
{name: 'b', value: 'b1'}
]
})
That's also an indexable query:
db.end.find({"index.name": "a"}).explain()
{
"cursor" : "BtreeCursor index.name_",
"nscanned" : 1,
"nscannedObjects" : 1,
"n" : 1,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : true,
"indexOnly" : false,
"indexBounds" : {
"index.name" : [
[
"a",
"a"
]
]
}
}

I think a sparse index is the answer to this, although you'll need an index for each field. http://www.mongodb.org/display/DOCS/Indexes#Indexes-SparseIndexes
Sparse indexes should help with $exists:true queries.
Even still, if your field is not really sparse (meaning it's mostly set), it's not going to help you that much.
Update I guess I'm wrong. Looks like there's an open issue ( https://jira.mongodb.org/browse/SERVER-4187 ) still that $exists doesn't use sparse indexes. However, you can do something like this with find and sort, which looks like it properly uses the sparse index:
db.ent.find({}).sort({a:1});
Here's a full demonstration of the difference, using your example values:
> db.ent.insert({'a':5775, 'b':'b1'})
> db.ent.insert({'c':'its a c', 'b':'b2'})
> db.ent.insert({'a':7557, 'c':'its a c'})
> db.ent.ensureIndex({a:1},{sparse:true});
Note that find({}).sort({a:1}) uses the index (BtreeCursor):
> db.ent.find({}).sort({a:1}).explain();
{
"cursor" : "BtreeCursor a_1",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 2,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
}
}
And find({a:{$exists:true}}) does a full scan:
> db.ent.find({a:{$exists:true}}).explain();
{
"cursor" : "BasicCursor",
"nscanned" : 3,
"nscannedObjects" : 3,
"n" : 2,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
Looks like you can also use .hint({a:1}) to force it to use the index.
> db.ent.find().hint({a:1}).explain();
{
"cursor" : "BtreeCursor a_1",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 2,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
}
}

How about setting the non-exists field to null? Then you can query them with {field: {$ne: null}}.
db.ent.insert({'a':5775, 'b':'b1', 'c': null})
db.ent.insert({'a': null, 'b':'b2', 'c':'its a c'})
db.ent.insert({'a':7557, 'b': null, 'c':'its a c'})
db.ent.ensureIndex({"a" : 1})
db.ent.ensureIndex({"b" : 1})
db.ent.ensureIndex({"c" : 1})
db.ent.find({'a':{$ne: null}}).explain()
Here's the output:
{
"cursor" : "BtreeCursor a_1 multi",
"isMultiKey" : false,
"n" : 4,
"nscannedObjects" : 4,
"nscanned" : 5,
"nscannedObjectsAllPlans" : 4,
"nscannedAllPlans" : 5,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
null
],
[
null,
{
"$maxElement" : 1
}
]
]
},
"server" : "my-laptop"
}

Related

Query with $in and $nin doesn't use index

When matching an attribute against both $in and $nin, Mongo doesn't use the index correctly.
If only $in is used, then index takes advantage of that:
db.assets.find({
tags: {
$in: ['blah']
}
}).explain()
{
"cursor" : "BtreeCursor tags_1",
"isMultiKey" : true,
"n" : 6,
"nscannedObjects" : 6,
"nscanned" : 6,
"nscannedObjectsAllPlans" : 6,
"nscannedAllPlans" : 6,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"tags" : [
[ "blah", "blah" ]
]
}
}
However, if $nin is involved, instead of finding documents that match $in and then filtering out those that don't pass the $nin condition, it scans all documents.
db.assets.find({
tags: {
$in: ['blah'],
$nin: ['cat']
}
}).explain()
{
"cursor" : "BtreeCursor tags_1",
"isMultiKey" : true,
"n" : 75760,
"nscannedObjects" : 79974,
"nscanned" : 1197016,
"nscannedObjectsAllPlans" : 79974,
"nscannedAllPlans" : 1197130,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 9351,
"nChunkSkips" : 0,
"millis" : 2331,
"indexBounds" : {
"tags" : [
[ {"$minElement" : 1}, "cat" ],
[ "cat", {"$maxElement" : 1} ]
]
}
}
Is there a way to trick Mongo to do the right thing?

MongoDB - can not get a covered query

So I have an empty database 'tests' and a collection named 'test'.
First I ensured that my index was set correctly.
db.test.ensureIndex({t:1})
db.test.getIndices()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "tests.test"
},
{
"v" : 1,
"key" : {
"t" : 1
},
"name" : "t_1",
"ns" : "tests.test"
}
]
After that I inserted some test records.
db.test.insert({t:1234})
db.test.insert({t:5678})
When I query the DB with following command and let Mongo explain the results I get the following output:
db.test.find({t:1234},{_id:0}).explain()
{
"cursor" : "BtreeCursor t_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"t" : [
[
1234,
1234
]
]
},
"server" : "XXXXXX:27017",
"filterSet" : false
}
Can anyone please explain to me why indexOnly is false?
Thanks in advance.
To be a covered index query you need to only retrieve those fields that are in the index:
> db.test.find({ t: 1234 },{ _id: 0, t: 1}).explain()
{
"cursor" : "BtreeCursor t_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 0,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : true,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"t" : [
[
1234,
1234
]
]
},
"server" : "ubuntu:27017",
"filterSet" : false
}
Essentially this means that only the index is used in order to retrieve the data, without the need to go back to the actual document and retrieve further information. This can be as many fields as you need ( within reason ), but they do need to be included within the index and the only fields that are returned.
Hmm the reason has not been clearly explained (confusing me actually) so here is my effort.
Essentially in order for MongoDB to know that said index covers the query it has to know what fields you want.
If you just say you don't want _id how can it know that * - _id = t without looking?
Here * represents all fields, like it does in SQL.
Answer is it cannot. That is why you need to provide the full field/select/projection/whatever word they use for it definition so that MongoDB can know that your return fits the index.

MongoDB: Compound geospatial & ascending index issues

I have a compund index consisting of a simple ascending index and a geospatial index:
{ v: 1, key: { PlayerSortMask: 1, RandomGeoIdentifier: "2dsphere" }, ns: "JellyDev.Players", name: "Sort Mask + Random Geo ID", min: 0, max: 1 }
Now I have the following 2 problems:
1.
When I try to use the prefix index (querying only on the 1st index), I get a basic cursor used, and not the index I've created:
Query used:
{ "PlayerSortMask" : 2 }
Explain returned:
{ "cursor" : "BasicCursor", "isMultiKey" : false, "n" : 1, "nscannedObjects" : 1, "nscanned" : 1, "nscannedObjectsAllPlans" : 1, "nscannedAllPlans" : 1, "scanAndOrder" : false, "indexOnly" : false, "nYields" : 0, "nChunkSkips" : 0, "millis" : 0, "indexBounds" : { }, "allPlans" : [{ "cursor" : "BasicCursor", "n" : 1, "nscannedObjects" : 1, "nscanned" : 1, "indexBounds" : { } }], "server" : "widmore:10010" }
2.
Not sure if this is a problem or not, but when I query using both fields, using $eq & $near, I get the following explain:
{ "cursor" : "S2NearCursor", "isMultiKey" : true, "n" : 1, "nscannedObjects" : 1, "nscanned" : 6, "nscannedObjectsAllPlans" : 1, "nscannedAllPlans" : 6, "scanAndOrder" : false, "indexOnly" : false, "nYields" : 0, "nChunkSkips" : 0, "millis" : 0, "indexBounds" : { }, "nscanned" : NumberLong(6), "matchTested" : NumberLong(1), "geoMatchTested" : NumberLong(1), "numShells" : NumberLong(3), "keyGeoSkip" : NumberLong(5), "returnSkip" : NumberLong(0), "btreeDups" : NumberLong(0), "inAnnulusTested" : NumberLong(1), "allPlans" : [{ "cursor" : "S2NearCursor", "n" : 1, "nscannedObjects" : 1, "nscanned" : 6, "indexBounds" : { } }], "server" : "widmore:10010" }
And this is the query used to fetch the result:
{ "PlayerSortMask" : 2, "RandomGeoIdentifier" : { "$near" : { "$geometry" : { "type" : "Point", "coordinates" : [0.88434365572610107, 0.90583264916475525] } } } }
Now it says it uses the S2NearCursor, but it's obviously not the index I've created - as it has the name Sort Mask + Random Geo ID.
Any help would be greatly appreciated.
For problem 1 there's a known issue in MongoDB with compound geo indexes.
https://jira.mongodb.org/browse/SERVER-9257
The problem is fixed in 2.5.4 which is a beta release.
You can workaround this for now by creating an additional simple index on PlayerSortMask.
For problem 2, S2NearCursor means an index is being used. I think the explain "loses" the name and this is a known issue, but I can't remember the bug number.

Multikey sparse index on empty array in MongoDB

What behavior is expected in this case? Will mongo treat empty array as null/undefined and include it into sparse index or if array is empty, document won't be indexed?
Empty arrays are not treated the same as null in MongoDB. As you can see in the following code from the Mongo shell, a sparse index finds the empty array as an empty array as opposed to as null.
> c = db.docs
test.docs
> c.insert({a : []})
> c.ensureIndex({a : 1}, {sparse: true})
> c.find({a : []}).count()
1
> c.find({a : null}).count()
0
As with most interesting questions about MongoDB, using explain will provide a wealth of info. For example, you can see that the test actually uses the index and that the boundaries of the index are null and [], demonstrating their unique treatment.
> c.find({a : null}).explain()
{
"cursor" : "BtreeCursor a_1",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
null,
null
]
]
},
"server" : "new-host.home:27017"
}
> c.find({a : []}).explain()
{
"cursor" : "BtreeCursor a_1 multi",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
null,
null
],
[
[ ],
[ ]
]
]
},
"server" : "new-host.home:27017"
}

MongoDB: why doesn't sorting by multiple keys use an index?

Question:
I have a very large collection that is indexed by field ts: (timestamp)
> db.events.ensureIndex({'ts': -1})
I want to get last 5 entries. What surprises me is that the query doesn't use the index and is thus very slow:
> db.events.find().sort({'ts': -1, '_id': -1}).limit(5)
However, sorting just by ts or the other field uses index as it should:
> db.events.find().sort({'ts': -1}).limit(5)
> db.events.find().sort({'_id': -1}).limit(5)
Is this a bug in MongoDB, is this indeed a documented feature or am I doing something wrong?
Additional info:
> db.events.find().sort({'ts': -1, '_id': -1}).limit(5).explain()
{
"cursor" : "BasicCursor",
"nscanned" : 795609,
"nscannedObjects" : 795609,
"n" : 5,
"scanAndOrder" : true,
"millis" : 22866,
"nYields" : 73,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
> db.events.find().sort({'ts': -1}).limit(5).explain()
{
"cursor" : "BtreeCursor ts_-1",
"nscanned" : 5,
"nscannedObjects" : 5,
"n" : 5,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"ts" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
}
It's worth having a read of the Indexing Strategies section of the Indexing Advice & FAQ wiki page.
There are a few considerations that you may be missing:
MongoDB only uses one index per query
the sort column used must be the last column in the index
So, for your example you should add a compound index on ts and _id:
db.events.ensureIndex({'ts':-1, '_id':-1});
.. and confirm with explain() that the sort is now using the expected index:
> db.events.find().sort({'ts': -1, '_id':-1}).limit(5).explain()
{
"cursor" : "BtreeCursor ts_-1__id_-1",
"nscanned" : 5,
"nscannedObjects" : 5,
"n" : 5,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"ts" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
],
"_id" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
}