Sort on $geoWithin geospatial query in MongoDB - mongodb

I'm trying to retrieve a bunch of Polygons stored inside my db, and sort them by radius. So I wrote a query with a simple $geoWithin.
So, without sorting the code looks like this:
db.areas.find(
{
"geometry" : {
"$geoWithin" : {
"$geometry" : {
"type" : "Polygon",
"coordinates" : [ [ /** omissis: array of points **/ ] ]
}
}
}
}).limit(10).explain();
And the explain result is the following:
{
"cursor" : "S2Cursor",
"isMultiKey" : true,
"n" : 10,
"nscannedObjects" : 10,
"nscanned" : 367,
"nscannedObjectsAllPlans" : 10,
"nscannedAllPlans" : 367,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 2,
"indexBounds" : {
},
"nscanned" : 367,
"matchTested" : NumberLong(10),
"geoTested" : NumberLong(10),
"cellsInCover" : NumberLong(27),
"server" : "*omissis*"
}
(Even if it's fast, it shows as cursor S2Cursor, letting me understand that my compound index has not been used. However, it's fast)
So, whenever I try to add a sort command, simply with .sort({ radius: -1 }), the query becomes extremely slow:
{
"cursor" : "S2Cursor",
"isMultiKey" : true,
"n" : 10,
"nscannedObjects" : 58429,
"nscanned" : 705337,
"nscannedObjectsAllPlans" : 58429,
"nscannedAllPlans" : 705337,
"scanAndOrder" : true,
"indexOnly" : false,
"nYields" : 3,
"nChunkSkips" : 0,
"millis" : 3186,
"indexBounds" : {
},
"nscanned" : 705337,
"matchTested" : NumberLong(58432),
"geoTested" : NumberLong(58432),
"cellsInCover" : NumberLong(27),
"server" : "*omissis*"
}
with MongoDB scanning all the documents. Obviously I tried to add a compound index, like { radius: -1, geometry : '2dsphere' } or { geometry : '2dsphere' , radius: -1 }, but nothing helped. Still very slow.
I would know if I'm using in the wrong way the compound index, if the S2Cursor tells me something I should change in my indexing strategy, overall, what I am doing wrong.
(PS: I'm using MongoDB 2.4.5+, so the problem is NOT caused by second field ascending in compound index when using 2dsphere index as reported here https://jira.mongodb.org/browse/SERVER-9647)

First of all, s2Cursor means that the query uses a geographic index.
There can be multiple reasons why the sort operation is slow, sort operation require memory, maybe your server has very little memory, you should consider executing sort operations in code, not at the server side.

Related

MongoDB - Index scan low performance

I'm very new to MongoDB and i'm trying to test some performance in order to understand if my structure is fine.
I have a collection with 5 fields (3 date, one Int and one pointer to another ObjectId)
In this collection i've created an index on two fields:
_p_monitor_ref Asc (this is the pointer)
collected Desc (this is one Date field)
The index name is: _p_monitor_ref_1_collected_-1
I've created this index in the beginning and populated the table with some records. After that, i've duplicated the records many times with this script.
var bulk = db.measurements.initializeUnorderedBulkOp();
db.measurements.find().limit(1483570).forEach(function(document) {
document._id = new ObjectId();
bulk.insert(document);
});
bulk.execute();
Now, the collection have 3 million of document.
Now, i try to execute explain to see if the collection use the index and how many time is needed to be executed. This is the query:
db.measurements.find({ "_p_monitor_ref": "Monitors$iKNoB6Ga5P" }).sort({collected: -1}).explain()
As you see, i use _p_monitor_ref to search all documents by pointer, and then i order for collected -1 (this is the index)
This is the first result when i run it. MongoDB use the index (BtreeCursor _p_monitor_ref_1_collected_-1) but the execution time is very hight "millis" : 120286,:
{
"cursor" : "BtreeCursor _p_monitor_ref_1_collected_-1",
"isMultiKey" : false,
"n" : 126862,
"nscannedObjects" : 126862,
"nscanned" : 126862,
"nscannedObjectsAllPlans" : 126862,
"nscannedAllPlans" : 126862,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 23569,
"nChunkSkips" : 0,
"millis" : 120286,
"indexBounds" : {
"_p_monitor_ref" : [
[
"Monitors$iKNoB6Ga5P",
"Monitors$iKNoB6Ga5P"
]
],
"collected" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
},
"server" : "my-pc",
"filterSet" : false
}
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 2967141,
"nscannedObjects" : 2967141,
"nscanned" : 2967141,
"nscannedObjectsAllPlans" : 2967141,
"nscannedAllPlans" : 2967141,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 27780,
"nChunkSkips" : 0,
"millis" : 11501,
"server" : "my-pc",
"filterSet" : false
}
Now, if i execute the explain again this is the result and the time is "millis" : 201:
{
"cursor" : "BtreeCursor _p_monitor_ref_1_collected_-1",
"isMultiKey" : false,
"n" : 126862,
"nscannedObjects" : 126862,
"nscanned" : 126862,
"nscannedObjectsAllPlans" : 126862,
"nscannedAllPlans" : 126862,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 991,
"nChunkSkips" : 0,
"millis" : 201,
"indexBounds" : {
"_p_monitor_ref" : [
[
"Monitors$iKNoB6Ga5P",
"Monitors$iKNoB6Ga5P"
]
],
"collected" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
},
"server" : "my-pc",
"filterSet" : false
}
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 2967141,
"nscannedObjects" : 2967141,
"nscanned" : 2967141,
"nscannedObjectsAllPlans" : 2967141,
"nscannedAllPlans" : 2967141,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 23180,
"nChunkSkips" : 0,
"millis" : 651,
"server" : "my-pc",
"filterSet" : false
}
Why i have this two very different results ? Maybe the second execution take the data from some kind of cache...
Now, the collection have 3 million of record... what if the collection will grow and become 10/20/30 million ?
I dont know if i'm doing something wrong. Sure, i'm executing it on my Laptop (i dont have a SSD).
The reason why you have smaller execution time at second attempt is connected with fact, that first attempt forced mongo to load data into memory and data was still available in memory when second attempt was executed.
When your collection will grow, index will grow as well - so that could affect that it will be to big to fit in free memory blocks and mongodb engine will load/unload part of that index - so performance will vary.

MongoDB: degraded query performance

I have a user's collection in MongoDB with over 2.5 million of records which constitute to 30 GB. I have about 4 to 6 GB of indexes. It's in sharded environment with two shards, each consisting of replica set. Servers are dedicated especially to Mongo with no overhead. Total RAM is over 10 GB which more than enough for the kind of queries I am performing (shown below).
My concern is that despite of having indexes to the appropriate fields time to retrieve the result is huge (2 minutes to whopping 30 minutes), which is not acceptable. I am newbie to MongoDB & really in confused state as to why this is happening.
Sample schema is:
user:
{
_id: UUID (indexed by default),
name: string,
dob: ISODate,
addr: string,
createdAt: ISODate (indexed),
.
.
.,
transaction:[
{
firstTransaction: ISODate(indexed),
lastTransaction: ISODate(indexed),
amount: float,
product: string (indexed),
.
.
.
},...
],
other sub documents...
}
Sub document length varies from 0- 50 or so.
Queries which I performed are:
1) db.user.find().min({createdAt:ISODate("2014-12-01")}).max({createdAt:ISODate("2014-12-31")}).explain()
This query worked slow at first, but then was lightning fast(I guess because of warming up).
2) db.user.find({transaction:{$elemMatch:{product:'mobile'}}}).explain()
This query took over 30 mins & warming up wasn't of help as every time the performance was same. It returned over half of the collection.
3) db.user.find({transaction:{$elemMatch:{product:'mobile'}}, firstTransaction:{$in:[ISODate("2015-01-01"),ISODate("2015-01-02")]}}}}).explain()
This is the main query which I want to be performant. But to my bad luck this query takes more than 30 mins to perform. I tried many versions of it such as this:
db.user.find({transaction:{$elemMatch:{product:'mobile'}}}).min({transaction:{$elemMatch:{firstTransaction:ISODate("2015-01-01")}}}).max({transaction:{$elemMatch:{firstTransaction:ISODate("2015-01-02")}}}).explain()
This query gave me error:
planner returned error: unable to find relevant index for max/min
query & with hint():
planner returned error: hint provided does not work with min query
I used min max function because of the uncertainty of the range queries in MongoDB with $lt, $gt operators, which sometimes ignore either of the bound & end up scanning more documents than needed.
I used indexes such as:
db.user.ensureIndex({createdAt: 1})
db.user.ensureIndex({"transaction.firstTransaction":1})
db.user.ensureIndex({"transaction.lastTransaction":1})
db.user.ensureIndex({"transaction.product":1})
I tried to use compound indexing for the 3 query, which is:
db.user.ensureIndex({"transaction.firstTransaction":1, "transaction.product":1})
But this seems to give me no result. Query gets stuck & never returns the result. I mean it. NEVER. Like deadlocked. I don't know why. So I dropped this index & got the result after waiting for over half an hour (really frustrating).
Please help me out as I am really desperate to find out the solution & out of ideas.
This output might help:
Following is the output for:
query:
db.user.find({transaction:{$elemMatch:{product:"mobile", firstTransaction:{$gte:ISODate("2015-01-01"), $lt:ISODate("2015-01-02")}}}}).hint("transaction.firstTransaction_1_transaction.product_1").explain()
output:
{
"clusteredType" : "ParallelSort",
"shards" : {
"test0/mrs00.test.com:27017,mrs01.test.com:27017" : [
{
"cursor" : "BtreeCursor transaction.product_1_transaction.firstTransaction_1",
"isMultiKey" : true,
"n" : 622,
"nscannedObjects" : 350931,
"nscanned" : 352000,
"nscannedObjectsAllPlans" : 350931,
"nscannedAllPlans" : 352000,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 119503,
"nChunkSkips" : 0,
"millis" : 375693,
"indexBounds" : {
"transaction.product" : [
[
"mobile",
"mobile"
]
],
"transaction.firstTransaction" : [
[
true,
ISODate("2015-01-02T00:00:00Z")
]
]
},
"server" : "ip-12-0-0-31:27017",
"filterSet" : false
}
],
"test1/mrs10.test.com:27017,mrs11.test.com:27017" : [
{
"cursor" : "BtreeCursor transaction.product_1_transaction.firstTransaction_1",
"isMultiKey" : true,
"n" : 547,
"nscannedObjects" : 350984,
"nscanned" : 352028,
"nscannedObjectsAllPlans" : 350984,
"nscannedAllPlans" : 352028,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 132669,
"nChunkSkips" : 0,
"millis" : 891898,
"indexBounds" : {
"transaction.product" : [
[
"mobile",
"mobile"
]
],
"transaction.firstTransaction" : [
[
true,
ISODate("2015-01-02T00:00:00Z")
]
]
},
"server" : "ip-12-0-0-34:27017",
"filterSet" : false
}
]
},
"cursor" : "BtreeCursor transaction.product_1_transaction.firstTransaction_1",
"n" : 1169,
"nChunkSkips" : 0,
"nYields" : 252172,
"nscanned" : 704028,
"nscannedAllPlans" : 704028,
"nscannedObjects" : 701915,
"nscannedObjectsAllPlans" : 701915,
"millisShardTotal" : 1267591,
"millisShardAvg" : 633795,
"numQueries" : 2,
"numShards" : 2,
"millis" : 891910
}
Query:
db.user.find({transaction:{$elemMatch:{product:'mobile'}}}).explain()
Output:
{
"clusteredType" : "ParallelSort",
"shards" : {
"test0/mrs00.test.com:27017,mrs01.test.com:27017" : [
{
"cursor" : "BtreeCursor transaction.product_1",
"isMultiKey" : true,
"n" : 553072,
"nscannedObjects" : 553072,
"nscanned" : 553072,
"nscannedObjectsAllPlans" : 553072,
"nscannedAllPlans" : 553072,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 164888,
"nChunkSkips" : 0,
"millis" : 337909,
"indexBounds" : {
"transaction.product" : [
[
"mobile",
"mobile"
]
]
},
"server" : "ip-12-0-0-31:27017",
"filterSet" : false
}
],
"test1/mrs10.test.com:27017,mrs11.test.com:27017" : [
{
"cursor" : "BtreeCursor transaction.product_1",
"isMultiKey" : true,
"n" : 554176,
"nscannedObjects" : 554176,
"nscanned" : 554176,
"nscannedObjectsAllPlans" : 554176,
"nscannedAllPlans" : 554176,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 107496,
"nChunkSkips" : 0,
"millis" : 327928,
"indexBounds" : {
"transaction.product" : [
[
"mobile",
"mobile"
]
]
},
"server" : "ip-12-0-0-34:27017",
"filterSet" : false
}
]
},
"cursor" : "BtreeCursor transaction.product_1",
"n" : 1107248,
"nChunkSkips" : 0,
"nYields" : 272384,
"nscanned" : 1107248,
"nscannedAllPlans" : 1107248,
"nscannedObjects" : 1107248,
"nscannedObjectsAllPlans" : 1107248,
"millisShardTotal" : 665837,
"millisShardAvg" : 332918,
"numQueries" : 2,
"numShards" : 2,
"millis" : 337952
}
Please let me know if I have missed any of the details.
Thanks.
1st: Your queries are overly complicated. Using $elemMatch way too often.
2nd: if you can include your shard key in the query it will drastically improve speed.
I'm going to optimize your queries for you:
db.user.find({
createdAt: {
$gte: ISODate("2014-12-01"),
$lte: ISODate("2014-12-31")
}
}).explain()
db.user.find({
'transaction.product':'mobile'
}).explain()
db.user.find({
'transaction.product':'mobile',
firstTransaction:{
$in:[
ISODate("2015-01-01"),
ISODate("2015-01-02")
]
}
}).explain()
Bottom line is this: include your shard key each time is a time saver.
It might even save time to loop through your shard keys and make the same query multiple times.
Reason for performance degradation was the large working set. For some queries (mainly range queries) the set exceeded physical limit & page faults occurred. Due to this performance got degraded.
One solution I did was to apply some filters for the query which will limit the result set & tried to perform equality check instead of the range (iterating over range).
Those tweaks worked for me. Hope it helps others too.

MongoDB - can not get a covered query

So I have an empty database 'tests' and a collection named 'test'.
First I ensured that my index was set correctly.
db.test.ensureIndex({t:1})
db.test.getIndices()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "tests.test"
},
{
"v" : 1,
"key" : {
"t" : 1
},
"name" : "t_1",
"ns" : "tests.test"
}
]
After that I inserted some test records.
db.test.insert({t:1234})
db.test.insert({t:5678})
When I query the DB with following command and let Mongo explain the results I get the following output:
db.test.find({t:1234},{_id:0}).explain()
{
"cursor" : "BtreeCursor t_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"t" : [
[
1234,
1234
]
]
},
"server" : "XXXXXX:27017",
"filterSet" : false
}
Can anyone please explain to me why indexOnly is false?
Thanks in advance.
To be a covered index query you need to only retrieve those fields that are in the index:
> db.test.find({ t: 1234 },{ _id: 0, t: 1}).explain()
{
"cursor" : "BtreeCursor t_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 0,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : true,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"t" : [
[
1234,
1234
]
]
},
"server" : "ubuntu:27017",
"filterSet" : false
}
Essentially this means that only the index is used in order to retrieve the data, without the need to go back to the actual document and retrieve further information. This can be as many fields as you need ( within reason ), but they do need to be included within the index and the only fields that are returned.
Hmm the reason has not been clearly explained (confusing me actually) so here is my effort.
Essentially in order for MongoDB to know that said index covers the query it has to know what fields you want.
If you just say you don't want _id how can it know that * - _id = t without looking?
Here * represents all fields, like it does in SQL.
Answer is it cannot. That is why you need to provide the full field/select/projection/whatever word they use for it definition so that MongoDB can know that your return fits the index.

Why is MongoDB $geoWithin slow when index is hinted?

I have been struggling with this for some time. I have a collection of over 100,000 documents. Each document has a geoLocation field, that uses GeoJSON format. I added a 2dsphere index to the geoLocation field.
If I run this simple query, it takes almost 1 second to complete:
db.guestBookPost.find({"geoLocation" : { "$geoWithin" : {$centerSphere:[[-118.3688331113197 , 34.1620417429723], .00068621014493]}}, $hint:"geoLocation_2dsphere"}).limit(10)
The explain shows:
{
"cursor" : "S2Cursor",
"isMultiKey" : true,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 100211,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 100211,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 910,
"indexBounds" : {
},
"nscanned" : 100211,
"matchTested" : NumberLong(100211),
"geoTested" : NumberLong(0),
"cellsInCover" : NumberLong(8),
"server" : "ip-10-245-26-151:27017"
}
It doesn't look like the $geoWithin query is using the hinted index. The cursor type is S2Cursor, which seems incorrect. Am I doing anything wrong? This is MongoDB 2.4.3
Thanks,
Les
s2cursor is the 2dsphere index, you can take a look at this:
http://blog.mongodb.org/post/50984169045/new-geo-features-in-mongodb-2-4?mkt_tok=3RkMMJWWfF9wsRovs67NZKXonjHpfsX74%2BktX6C1lMI%2F0ER3fOvrPUfGjI4JS8FnI%2BSLDwEYGJlv6SgFSrHCMahnybgIUhI%3D

MongoDB - Query on nested field with index

I am trying to figure out how I must structure queries such that they will hit my index.
I have documents structured like so:
{ "attributes" : { "make" : "Subaru", "color" : "Red" } }
With an index of: db.stuff.ensureIndex({"attributes.make":1})
What I've found is that querying using dot notation hits the index while querying with a document does not.
Example:
db.stuff.find({"attributes.make":"Subaru"}).explain()
{
"cursor" : "BtreeCursor attributes.make_1",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 2,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"attributes.make" : [
[
"Subaru",
"Subaru"
]
]
}
}
vs
db.stuff.find({attributes:{make:"Subaru"}}).explain()
{
"cursor" : "BasicCursor",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 0,
"millis" : 1,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
Is there a way to get the document style query to hit the index? The reason is that when constructing queries from my persistent objects it's much easier to serialize them out as documents as opposed to something using dot notation.
I'll also add that we're using a home grown data mapper layer built w/ Jackson. Would using something like Morphia help with properly constructing these queries?
Did some more digging and this thread explains what's going with the sub-document query. My problem above was that to make the sub-document based query act like the dot-notation I needed to use elemMatch.
db.stuff.find({"attributes":{"$elemMatch" : {"make":"Subaru"}}}).explain()
{
"cursor" : "BtreeCursor attributes.make_1",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 0,
"millis" : 2,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"attributes.make" : [
[
"Subaru",
"Subaru"
]
]
}
}