Mongo compound index with geo location not working - mongodb

I am having hard time with something that supposed to be trivial....
I have the following profile document structure:
{
pid:"profileId",
loc : {
"lat" : 32.082156661684621,
"lon" : 34.813229013156551,
"locTime" : NumberLong(0)
}
age:29
}
A common use-case in my app is to retrieve nearby profiles filtered by age.
{ "loc" : { "$near" : [ 32.08290052711715 , 34.80888522811172] , "$maxDistance" : 179.98560115190784}, "age" : { "$gte" : 0 , "$lte" : 33}}
So I have created the following compound index:
{ 'loc':2d , age:1}
And no matter what I do I can't make the query run with the created index (also tried with hint)
this is the generated explain for the query:
{
"cursor" : "GeoSearchCursor" ,
"isMultiKey" : false ,
"n" : 4 ,
"nscannedObjects" : 4 ,
"nscanned" : 4 ,
"nscannedObjectsAllPlans" : 4 ,
"nscannedAllPlans" : 4 ,
"scanAndOrder" : false ,
"indexOnly" : false ,
"nYields" : 0 ,
"nChunkSkips" : 0 ,
"millis" : 0 ,
"indexBounds" : { } ,
"allPlans" : [ { "cursor" : "GeoSearchCursor" , "n" : 4 , "nscannedObjects" : 4 , "nscanned" : 4 , "indexBounds" : { }
}
I am using mongodb version 2.4.4.
What am I doing wrong? your answer is highly appreciated.

The explain output says "cursor" : "GeoSearchCursor". This indicates your query used a geospatial index.
See the following for details:
http://docs.mongodb.org/manual/reference/method/cursor.explain/
2d indexes support a compound index with only one additional field, as a suffix of the 2d index field.
http://docs.mongodb.org/manual/applications/geospatial-indexes
As #stennie mentioned in the comment on your question the problem might be the ordering of the coordinates. They should be ordered long, lat. If that doesn't work try storing the loc as an array with long first element, lat second.
Here is a worked example:
I created three profile objects with location as array and the locTime separate from loc.
> db.profile.find()
{ "_id" : ObjectId("52cd54f1c43bb3a468b9fd0d"), "loc" : [ -6, 50 ], "age" : 29, "pid" : "001", "locTime" : NumberLong(0) }
{ "_id" : ObjectId("52cd5507c43bb3a468b9fd0f"), "loc" : [ -6, 53 ], "age" : 30, "pid" : "002", "locTime" : NumberLong(1) }
{ "_id" : ObjectId("52cd5515c43bb3a468b9fd10"), "loc" : [ -1, 51 ], "age" : 31, "pid" : "003", "loctime" : NumberLong(2) }
Finding using large distance and age
> db.profile.find({ "loc" : { "$near" : [ -1, 50] , "$maxDistance" : 5}, "age" : { "$gte" : 0 , "$lte" : 33}})
{ "_id" : ObjectId("52cd5515c43bb3a468b9fd10"), "loc" : [ -1, 51 ], "age" : 31, "pid" : "003", "loctime" : NumberLong(2) }
{ "_id" : ObjectId("52cd54f1c43bb3a468b9fd0d"), "loc" : [ -6, 50 ], "age" : 29, "pid" : "001", "locTime" : NumberLong(0) }
The explain shows the index is being used:
> db.profile.find({ "loc" : { "$near" : [ -1, 50] , "$maxDistance" : 5}, "age" : { "$gte" : 0 , "$lte" : 33}}).explain()
{
"cursor" : "GeoSearchCursor",
"isMultiKey" : false,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
},
}
Narrow the distance with the same age range
> db.profile.find({ "loc" : { "$near" : [ -1, 50] , "$maxDistance" : 1}, "age" : { "$gte" : 0 , "$lte" : 33}})
Here is the explain, again the index is used:
> db.profile.find({ "loc" : { "$near" : [ -1, 50] , "$maxDistance" : 1}, "age" : { "$gte" : 0 , "$lte" : 33}}).explain()
{
"cursor" : "GeoSearchCursor",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
},
}
Here are the indexes:
> db.profile.getIndices()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"ns" : "test.profile",
"name" : "_id_"
},
{
"v" : 1,
"key" : {
"loc" : "2d",
"age" : 1
},
"ns" : "test.profile",
"name" : "loc_2d_age_1"
}
]

Related

Mongo Geospatial Index on Large Database - Not using index

I have a MongoDB with over 150m+ records - an for some reason, even with the correct index, I get very poor performance with basic geospatial queries:
db.regions.find({
loc: { $near: {
$geometry: {
type: "Point" ,
coordinates: [ 15.8775 , 49.2177 ]
},
$maxDistance: 1000,
$minDistance: 1
} } }).limit(1).explain();
The explain shows that the index is not being used:
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 4102,
"nscanned" : 4102,
"nscannedObjectsAllPlans" : 4102,
"nscannedAllPlans" : 4102,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 2001,
"nChunkSkips" : 0,
"millis" : 18252,
"indexBounds" : {
},
"server" : "N/A:27017",
"filterSet" : false
}
However the indexes are definitely there in a 2dpshere field:
> db.regions.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "hive.regions"
},
{
"v" : 1,
"key" : {
"checkin_id" : 1
},
"name" : "checkin_id_1",
"ns" : "hive.regions"
},
{
"v" : 1,
"key" : {
"bid" : 1
},
"name" : "bid_1",
"ns" : "hive.regions"
},
{
"v" : 1,
"key" : {
"loc" : "2dsphere"
},
"name" : "loc_2dsphere",
"ns" : "hive.regions",
"2dsphereIndexVersion" : 2
}
]
Quick Query for a basic sort:
> db.regions.find().sort({"checkin_id":1}).limit(1).pretty();
{
"_id" : ObjectId("56645ce6e5bfa89d1f8b4567"),
"checkin_id" : 51548290,
"created_at" : ISODate("2013-10-29T04:15:43Z"),
"loc" : {
"type" : "Point",
"coordinates" : [
-117.236,
33.1557
]
},
"suburb" : "",
"state_district" : "",
"county" : "United States of America",
"state" : "California",
"vid" : 0,
"user_id" : 133661,
"bid" : 9288,
"item_id" : 0
}
I see with this query (using explain), I get the correct indexBounds results:
> db.regions.find().sort({"checkin_id":1}).limit(1).explain();
{
"cursor" : "BtreeCursor checkin_id_1",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"checkin_id" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "XXXX:27017",
"filterSet" : false
}
Any what I am missing here? Why isn't it using any indexes?

Mongodb Compound index and sorting

I have this collection :
db.place.find() :
{
_id : "p1",
alterNames : ["abcd","abcD"],
population : 122
}
{
_id : "p2",
alterNames : ["qsdf","qsDF"],
population : 100
}
I want to find documents having alterNames starting with "ab" and sort them by population.
I created this index : {alterNames : 1, population : -1}
My query :
db.place.find({alterNames : /^ab/}).sort({population : -1}).limit(10).explain()
I was waiting to see "n" = "nScannedObjects" = 10
What I got :
"n" = 10
"nScannedObjects" = 4765
Did I miss a thing?
Edit :
Here is the full explain :
db.place.find({alterNames : /^pari/ }).sort({population : -1}).limit(10).explain()
"clauses" : [
{
"cursor" : "BtreeCursor alterNames_1_population_-1",
"isMultiKey" : true,
"n" : 10,
"nscannedObjects" : 4765,
"nscanned" : 4883,
"scanAndOrder" : true,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"alterNames" : [
[
"pari",
"parj"
],
[
/^pari/,
/^pari/
]
],
"population" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
},
{
"cursor" : "BtreeCursor ",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"scanAndOrder" : true,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"alterNames" : [
[
"pari",
"parj"
],
[
/^pari/,
/^pari/
]
],
"population" : [
[
{
"$maxElement" : 1
},
{
"$minElement" : 1
}
]
]
}
}
],
"cursor" : "QueryOptimizerCursor",
"n" : 10,
"nscannedObjects" : 4765,
"nscanned" : 4883,
"nscannedObjectsAllPlans" : 4765,
"nscannedAllPlans" : 4883,
"scanAndOrder" : false,
"nYields" : 890,
"nChunkSkips" : 0,
"millis" : 396,
"server" : "localhost:27017",
"filterSet" : false
Your notation is confusing. I'm assuming your collection consists of documents that look like the two documents in the places array.
> db.test.find()
{ "_id" : "p1", "alterNames" : [ "abcd", "abcD" ], "population" : 122 }
{ "_id" : "p2", "alterNames" : [ "qsdf", "qsDF" ], "population" : 100 }
For a left-anchored regex like /^ab/, MongoDB converts the query to one that's actually a range query and can efficiently use the index
{ "alterNames" : /^ab/ } => { "alterNames" : { "$gte" : "ab", "$lt" : "ac" } }
Each value that matches the range, for example "abcd", has an index of population values below it for documents with an alterNames (multikey) value of "abcd". To return the matching documents in population-order, MongoDB has to externally sort the documents returned from each matching bucket. I believe that's the source of your higher nscannedObjects. If you check the explain (which would have been nice to include in its entirety), you should find scanAndOrder : true.

MongoDB Slow Query

I'm running a mongoDB query and it's taking too long. I'm querying the collection "play_sessions" for the data of 9 users as seen in (1). My documents contain data for a gameplay session for a user as seen (2). I have an index on "user_id" and this index is being used as seen in the .explain() output in (3). My indexes in the .stats() output are shown in (4).
The mongoDB version is 2.6.1. There are approximately 4 million entires in "play_sessions" and 43,000 distinct users.
This example query takes around 2 min and the actual query of 800 users takes a lot longer. I'd like to know why this query is slow and what I can do to speed it up.
(1) The query:
db.play_sessions.find({user_id: {$in: users}}, {play_data:-1}
(2) Example document:
{
"_id" : 1903200,
"score" : 1,
"user_id" : 60538,
"time" : ISODate("2014-02-12T03:49:59.919Z"),
"level" : 1,
"user_attempt_no" : 2,
"game_id" : 181,
"play_data" : [
**Some JSON in here**
],
"time_sec" : 7.989
}
(3) .explain() output
{
"cursor" : "BtreeCursor user_id_1",
"isMultiKey" : false,
"n" : 13724,
"nscannedObjects" : 13724,
"nscanned" : 13732,
"nscannedObjectsAllPlans" : 14128,
"nscannedAllPlans" : 14140,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 4463,
"nChunkSkips" : 0,
"millis" : 123631,
"indexBounds" : {
"user_id" : [
[
41930,
41930
],
...,
[
67112,
67112
]
]
},
"server" : "...",
"filterSet" : false
}
(4) .stats() output for the collection:
{
"ns" : "XXX.play_sessions",
"count" : 3957328,
"size" : 318453446112,
"avgObjSize" : 80471,
"storageSize" : 319917328096,
"numExtents" : 169,
"nindexes" : 10,
"lastExtentSize" : 2146426864,
"paddingFactor" : 1,
"systemFlags" : 1,
"userFlags" : 1,
"totalIndexSize" : 1962280880,
"indexSizes" : {
"_id_" : 184205280,
"game_id_1" : 167681584,
"user_id_1" : 113997968,
"user_id_1_game_id_1_level_1_time_1" : 288972544,
"game_id_1_level_1" : 141027824,
"game_id_1_level_1_user_id_1_time_1" : 301645344,
"user_id_1_game_id_1_level_1" : 228674544,
"game_id_1_level_1_user_id_1" : 245549808,
"user_id_1_user_attempt_no_1" : 135958704,
"user_id_1_time_1" : 154567280
},
"ok" : 1
}

why is mongodb hitting this index

Given that i have an index in my collection asd
> db.system.indexes.find().pretty()
{ "v" : 1, "key" : { "_id" : 1 }, "ns" : "asd.test", "name" : "_id_" },
{
"v" : 1,
"key" : {
"a" : 1,
"b" : 1,
"c" : 1
},
"ns" : "asd.test",
"name" : "a_1_b_1_c_1"
}
As far as i know in theory the order of the parameters queried is important in order to hit an index...
That is why im wondering how and why im actually hitting the index with this query
> db.asd.find({c:{$gt: 5000},a:{$gt:5000}}).explain()
{
"cursor" : "BtreeCursor a_1_b_1_c_1",
"isMultiKey" : false,
"n" : 90183,
"nscannedObjects" : 90183,
"nscanned" : 94885,
"nscannedObjectsAllPlans" : 90288,
"nscannedAllPlans" : 94990,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 1,
"nChunkSkips" : 0,
"millis" : 272,
"indexBounds" : {
"a" : [
[
5000,
1.7976931348623157e+308
]
],
"b" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
],
"c" : [
[
5000,
1.7976931348623157e+308
]
]
}
}
Order in which you pass fields in your query does not affect index selection process. If it did, it'd be a very fragile system.
Order of fields in the index definition, on the other hand, is very important. Maybe you confuse these two cases.

MongoDB indexOnly false

I have created an index for the category field, then execute a find for {category: 'Example'} with the field selection like {_id: 0, category: 1}. Running explain on this query shows indexOnly to be false. It's really slow. What am I missing here?
EDIT
Explain:
{
"cursor" : "BtreeCursor title",
"isMultiKey" : false,
"n" : 2642,
"nscannedObjects" : 2642,
"nscanned" : 2642,
"nscannedObjectsAllPlans" : 2642,
"nscannedAllPlans" : 2642,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 3,
"indexBounds" : {
"category" : [
[
"TV",
"TV"
]
],
"title" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "DeathDesk:27017"
}
EDIT 2
getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"ns" : "test.media",
"name" : "_id_"
},
{
"v" : 1,
"key" : {
"category" : 1
},
"ns" : "test.media",
"name" : "category",
"dropDups" : false,
"background" : false
}
]