MongoDB - Query not working in 2.6 - mongodb

I'm currently migrating our database from Mongo 2.4.9 to 2.6.4.
I have an odd situation where a query that is giving good results in 2.4 is returning no documents in 2.6.
The query in question:
var dbSearch = {
created: { $gte: new Date(1409815194808) },
geolocation: {
$geoWithin: {
$center: [ [ 4.895167900000001, 52.3702157 ], 0.1125 ]
}
}
};
on that collection are the following (relevant) indexes:
{ "v" : 1, "key" : { "created" : -1 }, "name" : "createdIndex", "ns" : "prod.search", "background" : true }
{ "v" : 1, "key" : { "geolocation" : "2d", "created" : -1 }, "name" : "geolocationCreatedIndex", "ns" : "prod.search" }
Running this query against Mongo 2.6 gives the following query-log:
{ created: { $gte: new Date(1409815194808) }, geolocation: { $geoWithin: { $center: [ [ 4.895167900000001, 52.3702157 ], 0.1125 ] } } }
planSummary: IXSCAN { created: -1 } ntoreturn:0 ntoskip:0 keyUpdates:0 numYields:0 locks(micros) r:8196 nreturned:0 reslen:20 8ms
I'm firing this query to the database with NodeJS using the node-mongodb-native module.
Note that when i remove either one of the search fields (created or geolocation) the queries will produce the right results on 2.4 and 2.6. The combination (as posted above) gives no results on 2.6
Edits with extra requested information
Explain on mongo 2.4 query:
> db.search.find({created: { $gte: new Date(1409815194808) }, geolocation: {$geoWithin: {$center: [ [ 4.895167900000001, 52.3702157 ], 0.1125 ] } } }).explain()
{
"cursor" : "GeoBrowse-circle",
"isMultiKey" : false,
"n" : 321,
"nscannedObjects" : 321,
"nscanned" : 321,
"nscannedObjectsAllPlans" : 321,
"nscannedAllPlans" : 321,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 69,
"indexBounds" : {
"geolocation" : [ ]
},
"lookedAt" : NumberLong(8940),
"matchesPerfd" : NumberLong(8538),
"objectsLoaded" : NumberLong(8538),
"pointsLoaded" : NumberLong(0),
"pointsSavedForYield" : NumberLong(0),
"pointsChangedOnYield" : NumberLong(0),
"pointsRemovedOnYield" : NumberLong(0),
"server" : "ubmongo24.local:27017"
}
Explain on mongo 2.6 query:
> db.search.find({created: { $gte: new Date(1409815194808) }, geolocation: {$geoWithin: {$center: [ [ 4.895167900000001, 52.3702157 ], 0.1125 ] } } }).explain();
{
"cursor" : "BtreeCursor createdIndex",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 1403,
"nscanned" : 1403,
"nscannedObjectsAllPlans" : 2808,
"nscannedAllPlans" : 2808,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 21,
"nChunkSkips" : 0,
"millis" : 8,
"indexBounds" : {
"created" : [
[
ISODate("0NaN-NaN-NaNTNaN:NaN:NaNZ"),
ISODate("2014-09-04T07:19:54.808Z")
]
]
},
"server" : "ubmongo26.local:27017",
"filterSet" : false
}

Based on the explain output, the issue appears to be the index being chosen by the query optimizer (which was extensively overhauled in 2.6 - mostly for the better, but it does meant there are new edge cases). It is using the single field createdIndex rather than the compound index being used in 2.4
Try hinting the geolocationCreatedIndex index in 2.6 (.hint({ "geolocation" : "2d", "created" : -1 })) and see if that fixes your issues - it is choosing the createdIndex instead by default and hence not using the geo index at all.

Related

Mongodb geonear and aggregate very slow

My current mongo version is 2.4.9 and collection have around 2.8millions rows. My query take super long to finish when using $geonear in the query.
Example of my collection
"loc" : {
"type" : "Point",
"coordinates" : [
100.46589473,
5.35149077
]
},
"email" : "abc#123.com"
loc index
{
"v" : 1,
"key" : {
"loc" : "2dsphere"
},
"ns" : "test.collect",
"name" : "loc_2dsphere",
"background" : true
}
Tested this query will took around 10 to 15minute to finish
db.getCollection('collect').aggregate(
[
{ '$match':
{'loc':
{'$geoNear':
{'$geometry':
{'type':'Point','coordinates':[101.6862,3.0829],'$maxDistance':10000}
}
}
}
},
{'$group':{'_id':'email', 'email':{'$last':'$email'},'loc':{'$last':'$loc'} }}
])
Below are explain result
{
"serverPipeline" : [
{
"query" : {
"loc" : {
"$geoNear" : {
"$geometry" : {
"type" : "Point",
"coordinates" : [
101.6862,
3.0829
],
"$maxDistance" : 10000
}
}
}
},
"projection" : {
"email" : 1,
"loc" : 1,
"_id" : 0
},
"cursor" : {
"cursor" : "S2NearCursor",
"isMultiKey" : true,
"n" : 111953,
"nscannedObjects" : 111953,
"nscanned" : 96677867,
"nscannedObjectsAllPlans" : 111953,
"nscannedAllPlans" : 96677867,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 183,
"nChunkSkips" : 0,
"millis" : 895678,
"indexBounds" : {},
"nscanned" : NumberLong(96677867),
"matchTested" : NumberLong(3472481),
"geoMatchTested" : NumberLong(3472481),
"numShells" : NumberLong(53),
"keyGeoSkip" : NumberLong(93205386),
"returnSkip" : NumberLong(20148837),
"btreeDups" : NumberLong(0),
"inAnnulusTested" : NumberLong(3472481),
"allPlans" : [
{
"cursor" : "S2NearCursor",
"n" : 111953,
"nscannedObjects" : 111953,
"nscanned" : 96677867,
"indexBounds" : {}
}
],
"server" : "xxx:xxx"
}
},
{
"$group" : {
"_id" : {
"$const" : "email"
},
"email" : {
"$last" : "$email"
},
"loc" : {
"$last" : "$loc"
}
}
}
],
"ok" : 1
}
Is my query inappropriate, anything else I can do to improve the speed??
Try to use $geoNear aggregation pipeline directly
https://docs.mongodb.com/v2.4/reference/operator/aggregation/geoNear/
To add to venkat's answer (using $geoNear in the aggregation pipeline directly, instead of using it under a $match):
Try reducing the $maxDistance number if you don't need all of the result.
$geoNear returns documents in a sorted order by distance. If you don't need the results in a sorted order, you can use $geoWithin instead: https://docs.mongodb.com/v2.4/reference/operator/query/geoWithin/
Also, MongoDB 2.4 was released in March 2013, and is not supported anymore. If possible, I would recommend you to upgrade to the latest version (currently 3.2.10). There are many bugfixes and performance improvements in the newer version.
$geoNear with aggregate framework works way faster than below(find query)
"$near": {
"$geometry": {
"type": "Point",
"coordinates": [lng,lat],
},
},
somewhere my mongodb also went to "no socket available" by the latter, but former worked great (<20ms) [tested also from golang + mgo drivers]

Increase Aggregation Framework Performance in MongoDb

I have 200k records in my collection. My data model looks like as follows:
{
"_id" : ObjectId("51750ec159dcef125863b7c4"),
"DateAdded" : ISODate("2013-04-22T00:00:00.000Z"),
"DateRemoved" : ISODate("2013-12-22T00:00:00.000Z"),
"DealerID" : ObjectId("51750bd559dcef07ec964a41"),
"ExStockID" : "8324482",
"Make" : "Mazda",
"Model" : "3",
"Price" : 11479,
"Year" : 2012,
"Variant" : "1.6d (115) TS 5dr",
"Turnover": 150
}
I have several indexes for the collection, one of those used for aggregation framework is:
{
"DealerID" : 1,
"DateRemoved" : -1,
"Price" : 1,
"Turnover" : 1
}
The aggregate query which is being used:
db.stats.aggregate([
{
"$match": {
"DealerID": {
"$in": [
ObjectId("523325ac59dcef1b90a3d446"),
....
// here is specified more than 150 ObjectIds
]
},
"DateRemoved": {
"$gte": ISODate("2013-12-01T00:00:00Z"),
"$lt": ISODate("2014-01-01T00:00:00Z")
}
}
},
{ "$project" : { "Price":1, "Turnover":1 } },
{
"$group": {
"_id": null,
"Price": {
"$avg": "$Price"
},
"Turnover": {
"$avg": "$Turnover"
}
}
}]);
and the time for this query executions resides between 30-200 seconds.
How can I optimize this?
You can try to run explain on the aggregation pipeline, but as I don't have your full dataset, I can't try it out properly:
p = [
{
"$match": {
"DealerID": {
"$in": [
ObjectId("51750bd559dcef07ec964a41"),
ObjectId("51750bd559dcef07ec964a44"),
]
},
"DateRemoved": {
"$gte": ISODate("2013-12-01T00:00:00Z"),
"$lt": ISODate("2014-01-01T00:00:00Z")
}
}
},
{ "$project" : { "Price":1, "Turnover":1 } },
{
"$group": {
"_id": null,
"Price": {
"$avg": "$Price"
},
"Turnover": {
"$avg": "$Turnover"
}
}
}];
db.s.runCommand('aggregate', { pipeline: p, explain: true } );
I would suggest to remove the fields that are not part of the $match (Price and Turnover). Also, I think you should switch the order of DealerId and DateRemoved as you want to do one range search, and from that range then include all the dealers. Doing it the other way around means that you can really only use the index for the 150 single items, and then you need to do a range search.
Using #Derick's answer I have found the index which prevented to create the covered index. As far as I can see query optimizer uses the first index which covers just the query itself, so I have changed the order of indexes. So here is outcome before and after.
Before:
{
"serverPipeline" : [
{
"query" : {...},
"projection" : { "Price" : 1, "Turnover" : 1, "_id" : 0 },
"cursor" : {
"cursor" : "BtreeCursor DealerIDDateRemoved multi",
"isMultiKey" : false,
"n" : 11036,
"nscannedObjects" : 11008,
"nscanned" : 11307,
"nscannedObjectsAllPlans" : 11201,
"nscannedAllPlans" : 11713,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 58,
"indexBounds" : {...},
"allPlans" : [...],
"oldPlan" : {...},
"server" : "..."
}
},
{
"$group" : {...}
}
],
"ok" : 1
}
After these changes indexOnly param now shows true, this means we have just created the covered index:
{
"serverPipeline" : [
{
"query" : {...},
"projection" : { "Price" : 1, "Turnover" : 1, "_id" : 0 },
"cursor" : {
"cursor" : "BtreeCursor DealerIDDateRemovedPriceTurnover multi",
"isMultiKey" : false,
"n" : 11036,
"nscannedObjects" : 0,
"nscanned" : 11307,
"nscannedObjectsAllPlans" : 285,
"nscannedAllPlans" : 11713,
"scanAndOrder" : false,
"indexOnly" : true,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 58,
"indexBounds" : {...},
"allPlans" : [...],
"server" : "..."
}
},
{
"$group" : {...}
],
"ok" : 1
}
Now the query works approximately between 0.085-0.300 seconds. Additional information about covered queries Create Indexes that Support Covered Queries

MongoDB embedded secondary compound index covered slow query

I have following embedded secondary compound index:
db.people.ensureIndex({"sources_names.source_id":1,"sources_names.value":1})
Here is part of db.people.getIndexes():
{
"v" : 1,
"key" : {
"sources_names.source_id" : 1,
"sources_names.value" : 1
},
"ns" : "diglibtest.people",
"name" : "sources_names.source_id_1_sources_names.value_1"
}
So I run following index covered query:
db.people.find({ "sources_names.source_id": ObjectId('5166d57f7a8f348676000001'), "sources_names.value": "Ulrike Weiland" }, {"sources_names.source_id":1, "sources_names.value":1, "_id":0} ).pretty()
{
"sources_names" : [
{
"value" : "Ulrike Weiland",
"source_id" : ObjectId("5166d57f7a8f348676000001")
}
]
}
It took about 5 seconds. So I run explain:
db.people.find({ "sources_names.source_id": ObjectId('5166d57f7a8f348676000001'), "sources_names.value": "Ulrike Weiland" }, {"sources_names.source_id":1, "sources_names.value":1, "_id":0 }).explain()
{
"cursor" : "BtreeCursor sources_names.source_id_1_sources_names.value_1",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 1260353,
"nscanned" : 1260353,
"nscannedObjectsAllPlans" : 1260353,
"nscannedAllPlans" : 1260353,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 4,
"nChunkSkips" : 0,
"millis" : 4308,
"indexBounds" : {
"sources_names.source_id" : [
[
ObjectId("5166d57f7a8f348676000001"),
ObjectId("5166d57f7a8f348676000001")
]
],
"sources_names.value" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "dash-pc.local:27017"
}
But why this index-covered-query goes through whole database? How should I create index to boost performance?
Thanks!
You are using a multikey index (i.e. sources_names.source_id) in multiple places, from the docs ( http://docs.mongodb.org/manual/tutorial/create-indexes-to-support-queries/#create-indexes-that-support-covered-queries ):
An index cannot cover a query if:
any of the indexed fields in any of the documents in the collection includes an array.
If an indexed field is an array, the index becomes a multi-key index index and cannot
support a covered query.
You can tell this is a multikey index here form the explain:
"isMultiKey" : true,
Basically the dot notation is classed as multikey because sources_names is an array as such the index contains an array.
As for improving the speed: I have not looked in this but your problem is here:
"sources_names.value" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
Whereby the index is not being optimally used to find the sources_names.value.
Edit
I thought that the answer I just gave was a bit weird, since this should not be a multikey index, so I actually went off and tested this:
> db.gh.ensureIndex({'d.id':1,'d.g':1})
> db.gh.find({'d.id':5, 'd.g':'d'})
{ "_id" : ObjectId("516826e5f44947064473a00a"), "d" : { "id" : 5, "g" : "d" } }
> db.gh.find({'d.id':5, 'd.g':'d'}).explain()
{
"cursor" : "BtreeCursor d.id_1_d.g_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"d.id" : [
[
5,
5
]
],
"d.g" : [
[
"d",
"d"
]
]
},
"server" : "ubuntu:27017"
}
It seems my original thoughts where right, this shouldn't be a multikey index. You have some dirty data in value me thinks and it is causing you problems.
I would go through your database and make sure that your records are correctly entered.
You most likely have something like:
{
"sources_names" : [
{
"value" : ["Ulrike Weiland", 1],
"source_id" : ObjectId("5166d57f7a8f348676000001")
}
]
}
Some where.

MongoDB two seemingly identical queries - one uses an index the other doesn't

I noticed in Mongo logs that some queries were taking longer than expected.
Fri Jan 4 08:53:39 [conn587] query mydb.User query: { query: { someField: "eu", lastRecord.importantValue: { $ne: nan.0 }, lastRecord.otherValue: { $gte: 1000 } }, orderby: { lastRecord.importantValue: -1 } } ntoreturn:50 ntoskip:0 nscanned:40681 scanAndOrder:1 keyUpdates:0 numYields: 1 locks(micros) r:2649788 nreturned:50 reslen:334041 1575ms
Considering that I had built an index on {someField : 1, "lastRecord.otherValue" : 1, "lastRecord.importantValue" : -1} I went to investigate.
During that I noticed that what seem like two identical queries to me, just phrased differently syntactically and what return identical values, are executed differently by MongoDB - one uses the index as expected, while the other doesn't.
And my web application invokes the version that doesn't use indexes.
I'm obviously misunderstanding something fundamental here.
Index used fine:
> db.User.find({}, {_id:1}).sort({"lastRecord.importantValue" : -1}).limit(5).explain()
{
"cursor" : "BtreeCursor lastRecord.importantValue_-1",
"isMultiKey" : false,
"n" : 5,
"nscannedObjects" : 5,
"nscanned" : 5,
"nscannedObjectsAllPlans" : 5,
"nscannedAllPlans" : 5,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"lastRecord.importantValue" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
},
"server" : "whatever"
}
Index not used:
> db.User.find({$query: {}, $orderby : {"lastRecord.importantValue": -1}}, {_id:1}).limit(5).explain()
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 67281,
"nscanned" : 67281,
"nscannedObjectsAllPlans" : 67281,
"nscannedAllPlans" : 67281,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 133,
"indexBounds" : {
},
"server" : "whatever"
}
Hint doesn't help:
> db.User.find({$query: {}, $orderby : {"lastRecord.importantValue": -1}, $hint : {"lastRecord.importantValue" : -1}}, {_id:1}).limit(5).explain()
{
"cursor" : "BasicCursor",
// snip
}
However returned values are same (like expected):
> db.User.find({}, {_id:1}).sort({"lastRecord.importantValue" : -1}).limit(5)
{ "_id" : NumberLong(500280899) }
{ "_id" : NumberLong(500335132) }
{ "_id" : NumberLong(500378261) }
{ "_id" : NumberLong(500072584) }
{ "_id" : NumberLong(500071366) }
> db.User.find({$query: {}, $orderby : {"lastRecord.importantValue": -1}}, {_id:1}).limit(5)
{ "_id" : NumberLong(500280899) }
{ "_id" : NumberLong(500335132) }
{ "_id" : NumberLong(500378261) }
{ "_id" : NumberLong(500072584) }
{ "_id" : NumberLong(500071366) }
Index is present (this one I created to test the simpler queries, the compound index is also still there):
> db.User.getIndexes()
[
// snip other indexes
{
"v" : 1,
"key" : {
"lastRecord.importantValue" : -1
},
"ns" : "mydb.User",
"name" : "lastRecord.importantValue_-1"
}
]
Morphia code just FYI (not sure if I can get exact command it generates):
ds.find(User.class).filter("someField =", v1)
.filter("lastRecord.importantValue !=", Double.NaN)
.filter("lastRecord.otherValue >=", v2)
.order("-lastRecord.importantValue")
.limit(50);
Any ideas?
Edit 6-Jan:
Just noticed another instance of this in the logs:
TOKEN ip-10-212-234-60 Sun Jan 6 09:20:54 [conn249] query mydb.User query: { query: { someField: "eu", lastRecord.otherValue: { $gte: 1000 } }, orderby: { lastRecord.importantValue: -1 } } cursorid:9069510232503855502 ntoreturn:50 ntoskip:0 nscanned:2042 keyUpdates:0 locks(micros) r:118923 nreturned:50 reslen:344959 118ms
Note I have since removed the $ne from the query. So it executes in 118 ms and (if I interpret right) scans 2042 rows only.
However if I do the following from console on the same server:
> db.User.find({$query: { someField: "eu", "lastRecord.otherValue": { $gte: 1000 } }, $orderby: { "lastRecord.importantValue": -1 } }).explain()
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 70308,
"nscanned" : 70308,
"nscannedObjectsAllPlans" : 70308,
"nscannedAllPlans" : 70308,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 1,
"nChunkSkips" : 0,
"millis" : 847,
"indexBounds" : {
},
"server" : "ip-whatever:27017"
}
So could it really be just a bug in explain?
Edit - further update 6-Jan:
On the other hand on my local system (same indexes, including "{someField : 1, "lastRecord.otherValue" : 1, "lastRecord.importantValue" : -1}") I managed to obtain the following under load:
Sun Jan 06 17:43:56 [conn33] query mydb.User query: { query: { someField: "eu", lastRecord.otherValue: { $gte: 1000 } }, orderby: { lastRecord.importantValue: -1 } }
cursorid:76077040284571 ntoreturn:50 ntoskip:0 nscanned:183248 keyUpdates:0 numYields: 2318 locks(micros) r:285016301 nreturned:50 reslen:341500 148567ms
148567ms :(
In fact the problem is mixing up the two syntax.
According to the documentation : http://docs.mongodb.org/manual/reference/operator/query/
So when you use .explain in :
db.User.find({}, {_id:1}).sort({"lastRecord.importantValue" : -1}).limit(5).explain()
fine but when you use this :
db.User.find({$query: {}, $orderby : {"lastRecord.importantValue": -1}}, {_id:1}).limit(5).explain()
Explain is used in the first type of syntax : you should use $explain:1 inside the $query and not .explain after.
See this question also : MongoDB $query operator ignores index?
It is quite about the same issue.

Nested queries Date range

I have a project where I embeds date ranges in a document.
Something like the following:
{ "availabilities" : [
{ "start_date" : ISODate("2012-06-28T00:00:00Z"), "end_date" : ISODate("2012-10-03T00:00:00Z") },
{ "start_date" : ISODate("2012-10-08T00:00:00Z"), "end_date" : ISODate("2012-10-28T00:00:00Z") }]
}
What I need to do is find all the documents that are available during a certain period
I use a query like this one:
db.faces.find({"availabilities" : {"$elemMatch" : {"$and" : [{"start_date" : {"$lte" : ISODate('2012-10-01 00:00:00 UTC')}}, {"end_date" : {"$gte": ISODate('2012-10-07 00:00:00 UTC')}}]}}})
But it won't use my indexes:
{
"v" : 1,
"key" : {
"availabilities.start_date" : 1,
"availabilities.end_date" : 1
},
"ns" : "faces_development.faces",
"name" : "availabilities.start_date_1_availabilities.end_date_1"
}
When I do an explain on the query, the output for the indexBounds is quite strange and I don't understand it.
{
"cursor" : "BtreeCursor availabilities.start_date_1_availabilities.end_date_1",
"isMultiKey" : true,
"n" : 71725,
"nscannedObjects" : 143019,
"nscanned" : 143019,
"nscannedObjectsAllPlans" : 143221,
"nscannedAllPlans" : 143221,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 2,
"nChunkSkips" : 0,
"millis" : 1608,
"indexBounds" : {
"availabilities.start_date" : [
[
true,
ISODate("2012-10-01T00:00:00Z")
]
],
"availabilities.end_date" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "foobar.local:27017"
}
Current version of mongoDB: MongoDB shell version: 2.2.0
How must I do to use indexes?
Trying to find related questions and bugs on mongodb without great success.
This will scan less of the index in 2.3: https://jira.mongodb.org/browse/SERVER-3104
Meanwhile, I suggest moving each availability into its own document, instead of having many in one array, for more efficient querying.