Index on multi-field object in mongoDB - mongodb

I have a collection generated by a map reduce call, and the key is composed by two fields, so my id is set as follow :
{
"_id": {
"ts": ISODate("2014-04-22T13: 46: 00.0Z"),
"own": "LP2"
}
//... my fields
}
An index is automatically constructed on "_id" :
{
"v": NumberInt(1),
"key": {
"_id": NumberInt(1)
},
"ns": "DB.COLLECTION_NAME",
"name": "_id_"
}
But when I do my queries I often want to sort on "ts" field.
I know that I can construct an index on it, but I was wondering if the index on "_id" was already supporting it, like a compound index. That could save time on insertion.
Thanks in advance for your answers.

I don't believe it will. The way to test this is to insert some test data and try a few test queries. When I ran db.collectionname.find({}).sort({"_id.ts":-1}).explain() it was clear that it was not using an index for the sort:
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 13,
"nscannedObjects" : 13,
"nscanned" : 13,
"nscannedObjectsAllPlans" : 13,
"nscannedAllPlans" : 13,
"scanAndOrder" : true,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"server" : "local:27017",
"filterSet" : false
}
Note that scanandorder is true.
After adding a new index however that indexed _id.ts directly I saw better results:
db.collectionname.ensureIndex({"_id.ts":1})
db.collectionname.find({}).sort({"_id.ts":-1}).explain()
{
"cursor" : "BtreeCursor _id.ts_1 reverse",
"isMultiKey" : false,
"n" : 13,
"nscannedObjects" : 13,
"nscanned" : 13,
"nscannedObjectsAllPlans" : 13,
"nscannedAllPlans" : 13,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"_id.ts" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
},
"server" : "local:27017",
"filterSet" : false
}
Note that scanandorder is now false.

Related

Query with $in and $nin doesn't use index

When matching an attribute against both $in and $nin, Mongo doesn't use the index correctly.
If only $in is used, then index takes advantage of that:
db.assets.find({
tags: {
$in: ['blah']
}
}).explain()
{
"cursor" : "BtreeCursor tags_1",
"isMultiKey" : true,
"n" : 6,
"nscannedObjects" : 6,
"nscanned" : 6,
"nscannedObjectsAllPlans" : 6,
"nscannedAllPlans" : 6,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"tags" : [
[ "blah", "blah" ]
]
}
}
However, if $nin is involved, instead of finding documents that match $in and then filtering out those that don't pass the $nin condition, it scans all documents.
db.assets.find({
tags: {
$in: ['blah'],
$nin: ['cat']
}
}).explain()
{
"cursor" : "BtreeCursor tags_1",
"isMultiKey" : true,
"n" : 75760,
"nscannedObjects" : 79974,
"nscanned" : 1197016,
"nscannedObjectsAllPlans" : 79974,
"nscannedAllPlans" : 1197130,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 9351,
"nChunkSkips" : 0,
"millis" : 2331,
"indexBounds" : {
"tags" : [
[ {"$minElement" : 1}, "cat" ],
[ "cat", {"$maxElement" : 1} ]
]
}
}
Is there a way to trick Mongo to do the right thing?

MongoDB index intersection

Hey I want to evaluate the performance of index intersection but I'm not able to get an intersection between two indices.
I've inserted some dummy records into my DB along this manual.
http://docs.mongodb.org/manual/core/index-intersection/
Insert code:
for(var i=0;i<1000;i++){
for(var j=0;j<100;j++){
db.t.insert({item:"abc"+i,qty:j})
}
}
Indices:
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "db.t"
},
{
"v" : 1,
"key" : {
"qty" : 1
},
"name" : "qty_1",
"ns" : "db.t"
},
{
"v" : 1,
"key" : {
"item" : 1
},
"name" : "item_1",
"ns" : "db.t"
}
]
Query:
db.t.find({item:"abc123",qty:{$gt:15}}).explain()
Result of explain:
{
"cursor" : "BtreeCursor item_1",
"isMultiKey" : false,
"n" : 84,
"nscannedObjects" : 100,
"nscanned" : 100,
"nscannedObjectsAllPlans" : 201,
"nscannedAllPlans" : 305,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 2,
"nChunkSkips" : 0,
"millis" : 1,
"indexBounds" : {
"item" : [
[
"abc123",
"abc123"
]
]
},
"server" : "brews18:27017",
"filterSet" : false
}
My question is why mongo is only using item as an index an does not use an intersection.
Thanks in advance
Well it actually does even though it does not in this case. To really see what is happening you need to look at the "verbose" form of explain, by adding true:
db.t.find({item:"abc123",qty:{$gt:15}}).explain(true)
{
"cursor" : "BtreeCursor item_1",
"isMultiKey" : false,
"n" : 84,
"nscannedObjects" : 100,
"nscanned" : 100,
"nscannedObjectsAllPlans" : 201,
"nscannedAllPlans" : 304,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 2,
"nChunkSkips" : 0,
"millis" : 2,
"indexBounds" : {
"item" : [
[
"abc123",
"abc123"
]
]
},
"allPlans" : [
{
"cursor" : "BtreeCursor item_1",
"isMultiKey" : false,
"n" : 84,
"nscannedObjects" : 100,
"nscanned" : 100,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"item" : [
[
"abc123",
"abc123"
]
]
}
},
{
"cursor" : "BtreeCursor qty_1",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 101,
"nscanned" : 102,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"qty" : [
[
15,
Infinity
]
]
}
},
{
"cursor" : "Complex Plan",
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 102,
"nChunkSkips" : 0
}
],
Cut short, but the last part is what you are looking for. As explained in the manual, the appearance of "Complex Plan" means an intersection is being used.
{
"cursor" : "Complex Plan",
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 102,
"nChunkSkips" : 0
}
The only case here is that while it is being "looked at" it is not being chosen by the optimizer in this case as the most "optimal" query. So the optimizer is saying that in fact the plan using just the one selected index, is the one that will complete in the most responsive fashion.
So while the "intersection" was considered, it was not the "best fit" and the single index was chosen.

MongoDB: Compound geospatial & ascending index issues

I have a compund index consisting of a simple ascending index and a geospatial index:
{ v: 1, key: { PlayerSortMask: 1, RandomGeoIdentifier: "2dsphere" }, ns: "JellyDev.Players", name: "Sort Mask + Random Geo ID", min: 0, max: 1 }
Now I have the following 2 problems:
1.
When I try to use the prefix index (querying only on the 1st index), I get a basic cursor used, and not the index I've created:
Query used:
{ "PlayerSortMask" : 2 }
Explain returned:
{ "cursor" : "BasicCursor", "isMultiKey" : false, "n" : 1, "nscannedObjects" : 1, "nscanned" : 1, "nscannedObjectsAllPlans" : 1, "nscannedAllPlans" : 1, "scanAndOrder" : false, "indexOnly" : false, "nYields" : 0, "nChunkSkips" : 0, "millis" : 0, "indexBounds" : { }, "allPlans" : [{ "cursor" : "BasicCursor", "n" : 1, "nscannedObjects" : 1, "nscanned" : 1, "indexBounds" : { } }], "server" : "widmore:10010" }
2.
Not sure if this is a problem or not, but when I query using both fields, using $eq & $near, I get the following explain:
{ "cursor" : "S2NearCursor", "isMultiKey" : true, "n" : 1, "nscannedObjects" : 1, "nscanned" : 6, "nscannedObjectsAllPlans" : 1, "nscannedAllPlans" : 6, "scanAndOrder" : false, "indexOnly" : false, "nYields" : 0, "nChunkSkips" : 0, "millis" : 0, "indexBounds" : { }, "nscanned" : NumberLong(6), "matchTested" : NumberLong(1), "geoMatchTested" : NumberLong(1), "numShells" : NumberLong(3), "keyGeoSkip" : NumberLong(5), "returnSkip" : NumberLong(0), "btreeDups" : NumberLong(0), "inAnnulusTested" : NumberLong(1), "allPlans" : [{ "cursor" : "S2NearCursor", "n" : 1, "nscannedObjects" : 1, "nscanned" : 6, "indexBounds" : { } }], "server" : "widmore:10010" }
And this is the query used to fetch the result:
{ "PlayerSortMask" : 2, "RandomGeoIdentifier" : { "$near" : { "$geometry" : { "type" : "Point", "coordinates" : [0.88434365572610107, 0.90583264916475525] } } } }
Now it says it uses the S2NearCursor, but it's obviously not the index I've created - as it has the name Sort Mask + Random Geo ID.
Any help would be greatly appreciated.
For problem 1 there's a known issue in MongoDB with compound geo indexes.
https://jira.mongodb.org/browse/SERVER-9257
The problem is fixed in 2.5.4 which is a beta release.
You can workaround this for now by creating an additional simple index on PlayerSortMask.
For problem 2, S2NearCursor means an index is being used. I think the explain "loses" the name and this is a known issue, but I can't remember the bug number.

Mongodb indexing

I have a query
db.messages.find({'headers.Date':{'$gt': new Date(2001,3,1)}},{'headers.From':1, _id:0}).sort({'headers.From':1})
I have set headers.From as index. Now which part of query will use this index ? i.e find part of query or sort part of query?
Explain output is
{
"cursor" : "BtreeCursor headers.From_1",
"isMultiKey" : false,
"n" : 83057,
"nscannedObjects" : 120477,
"nscanned" : 120477,
"nscannedObjectsAllPlans" : 120581,
"nscannedAllPlans" : 120581,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 250,
"indexBounds" : {
"headers.From" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "Andrews-iMac.local:27017"
}
Any help is appreciated !!!
The index is being used for the sort part, not for the query, as your query doesn't use the headers.From field and your sort does.

Improve querying fields exist in MongoDB

I'm in progress with estimation of MongoDB for our customers. Per requirements we need associate with some entity ent variable set of name-value pairs.
db.ent.insert({'a':5775, 'b':'b1'})
db.ent.insert({'c':'its a c', 'b':'b2'})
db.ent.insert({'a':7557, 'c':'its a c'})
After this I need intensively query ent for presence of fields:
db.ent.find({'a':{$exists:true}})
db.ent.find({'c':{$exists:false}})
Per MongoDB docs:
$exists is not very efficient even with an index, and esp. with {$exists:true} since it will effectively have to scan all indexed values.
Can experts there provide more efficient way (even with shift the paradigm) to deal fast with vary name-value pairs
You can redesign your schema like this:
{
pairs:[
{k: "a", v: 5775},
{k: "b", v: "b1"},
]
}
Then you indexing your key:
db.people.ensureIndex({"pairs.k" : 1})
After this you will able to search by exact match:
db.ent.find({'pairs.k':"a"})
In case you go with Sparse index and your current schema, proposed by #WesFreeman, you will need to create an index on each key you want to search. It can affect write performance or will be not acceptable if your keys are not static.
Simply redesign your schema such that it's an indexable query. Your use case is infact analogous to the first example application given in MongoDB The Definitive Guide.
If you want/need the convenience of result.a just store the keys somewhere indexable.
instead of the existing:
db.ent.insert({a:5775, b:'b1'})
do
db.ent.insert({a:5775, b:'b1', index: ['a', 'b']})
That's then an indexable query:
db.end.find({index: "a"}).explain()
{
"cursor" : "BtreeCursor index_1",
"nscanned" : 1,
"nscannedObjects" : 1,
"n" : 1,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : true,
"indexOnly" : false,
"indexBounds" : {
"index" : [
[
"a",
"a"
]
]
}
}
or if you're ever likely to query also by value:
db.ent.insert({
a:5775,
b:'b1',
index: [
{name: 'a', value: 5775},
{name: 'b', value: 'b1'}
]
})
That's also an indexable query:
db.end.find({"index.name": "a"}).explain()
{
"cursor" : "BtreeCursor index.name_",
"nscanned" : 1,
"nscannedObjects" : 1,
"n" : 1,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : true,
"indexOnly" : false,
"indexBounds" : {
"index.name" : [
[
"a",
"a"
]
]
}
}
I think a sparse index is the answer to this, although you'll need an index for each field. http://www.mongodb.org/display/DOCS/Indexes#Indexes-SparseIndexes
Sparse indexes should help with $exists:true queries.
Even still, if your field is not really sparse (meaning it's mostly set), it's not going to help you that much.
Update I guess I'm wrong. Looks like there's an open issue ( https://jira.mongodb.org/browse/SERVER-4187 ) still that $exists doesn't use sparse indexes. However, you can do something like this with find and sort, which looks like it properly uses the sparse index:
db.ent.find({}).sort({a:1});
Here's a full demonstration of the difference, using your example values:
> db.ent.insert({'a':5775, 'b':'b1'})
> db.ent.insert({'c':'its a c', 'b':'b2'})
> db.ent.insert({'a':7557, 'c':'its a c'})
> db.ent.ensureIndex({a:1},{sparse:true});
Note that find({}).sort({a:1}) uses the index (BtreeCursor):
> db.ent.find({}).sort({a:1}).explain();
{
"cursor" : "BtreeCursor a_1",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 2,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
}
}
And find({a:{$exists:true}}) does a full scan:
> db.ent.find({a:{$exists:true}}).explain();
{
"cursor" : "BasicCursor",
"nscanned" : 3,
"nscannedObjects" : 3,
"n" : 2,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
Looks like you can also use .hint({a:1}) to force it to use the index.
> db.ent.find().hint({a:1}).explain();
{
"cursor" : "BtreeCursor a_1",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 2,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
}
}
How about setting the non-exists field to null? Then you can query them with {field: {$ne: null}}.
db.ent.insert({'a':5775, 'b':'b1', 'c': null})
db.ent.insert({'a': null, 'b':'b2', 'c':'its a c'})
db.ent.insert({'a':7557, 'b': null, 'c':'its a c'})
db.ent.ensureIndex({"a" : 1})
db.ent.ensureIndex({"b" : 1})
db.ent.ensureIndex({"c" : 1})
db.ent.find({'a':{$ne: null}}).explain()
Here's the output:
{
"cursor" : "BtreeCursor a_1 multi",
"isMultiKey" : false,
"n" : 4,
"nscannedObjects" : 4,
"nscanned" : 5,
"nscannedObjectsAllPlans" : 4,
"nscannedAllPlans" : 5,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
null
],
[
null,
{
"$maxElement" : 1
}
]
]
},
"server" : "my-laptop"
}