MongoDB indexing results on array fields - mongodb

I have a collection as { student_id :1, teachers : [ "....",...]}
steps done in sequence as : 1) find by {teachers : "gore"}
2) set the index as { student_id : 1 }
3) find by {teachers : "gore"}
4) set the index as { teachers : 1 }
5) find by {teachers : "gore"}
and the results(time taken) are not that much effective by indexing teachers(array) Please someone explain what is happening? I may be doing something wrong here please correct me. The results are as :
d.find({teachers : "gore"}).explain()
{ "cursor" : "BasicCursor", "nscanned" : 999999, "nscannedObjects" : 999999, "n" : 447055, "millis" : 1623, "nYields" : 0, "nChunkSkips" : 0, "isMultiKey" : false, "indexOnly" : false, "indexBounds" : {
} }
d.ensureIndex({student_id : 1})
d.find({teachers : "gore"}).explain() { "cursor" : "BasicCursor", "nscanned" : 999999, "nscannedObjects" : 999999, "n" : 447055, "millis" : 1300, "nYields" : 0, "nChunkSkips" : 0, "isMultiKey" : false, "indexOnly" : false, "indexBounds" : {
} }
d.ensureIndex({teachers : 1})
d.find({teachers : "gore"}).explain() { "cursor" : "BtreeCursor teachers_1", "nscanned" : 447055, "nscannedObjects" : 447055, "n" : 447055, "millis" : 1501, "nYields" : 0, "nChunkSkips" : 0, "isMultiKey" : true, "indexOnly" : false, "indexBounds" : { "teachers" : [ [ "gore", "gore" ] ] } }

Do you have the same data inserted over and over? The fact that it is showing a BtreeCursor is a positive, but the number of nscannedObjects is too large. Do you have the same data inserted over and over again? Is it possible that you have 447055 "gore" values? If so, thats why its taking such a long time.

Related

mongodb $nearSphere performance issue with huge data(over 200w+)

mongodb version is 2.6
the total records in the collection is over 200w
details as below
collection table structure as below:
{
"postid":NumberLong(97040),
"accountid":NumberLong(348670),
"location":{
"type":"Point",
"coordinates":[
112.56531,
32.425657
]
},
"type":NumberLong(1),
"countspreads":NumberLong(6),
"countavailablespreads":NumberLong(6),
"timestamp":NumberLong(1428131578)
}
collection index is 2dsphere:
{
"v" : 1,
"key" : {
"location" : "2dsphere"
},
"name" : "location_2dsphere",
"ns" : "fly.postspreads",
"2dsphereIndexVersion" : 2
},
** query command ** as below
db.example.find({"location":{"$nearSphere":{"$geometry":{"type":"Point","coordinates":[113.547821,22.18648]},"$maxDistance":50000, "$minDistance":0}}}).explain()
result
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 145255,
"nscannedObjects" : 1290016,
"nscanned" : 1290016,
"nscannedObjectsAllPlans" : 1290016,
"nscannedAllPlans" : 1290016,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 4087,
"indexBounds" : {
},
"server" : "DB-SH-01:27017",
"filterSet" : false
}
the value of $maxDistance is too large for now, from the above result, we will find that the command was scanned over 100w+ records and cost 4087ms.
if we reduce the value of $maxDistance to 500, new result as below:
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 21445,
"nscannedObjects" : 102965,
"nscanned" : 102965,
"nscannedObjectsAllPlans" : 102965,
"nscannedAllPlans" : 102965,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 634,
"indexBounds" : {
},
"server" : "DB-SH-01:27017",
"filterSet" : false
}
now the command was scanned over 10w+ records and cost 634ms, the query speed is too slow too. even if i reduce the value of $maxDistance to 0.0001, the scanned records over 8w+, and the time is about 600ms too.
the query time is unacceptable, but i didn't find where was wrong.

mongodb slow query with $near and other condition

I have a mongodb collection named rooms, and it has a 2d index for field location. I've queried like this:
db.rooms.find( { "location" : { "$near" : { "latitude" : 37.3356135, "longitude" : 127.12383030000001 } }, "status": "open", "updated" : { "$gt" : ISODate("2014-06-03T15:34:22.213Z") }}).explain()
The result:
{
"cursor" : "GeoSearchCursor",
"isMultiKey" : false,
"n" : 7,
"nscannedObjects" : 143247,
"nscanned" : 143247,
"nscannedObjectsAllPlans" : 143247,
"nscannedAllPlans" : 143247,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 1457,
"indexBounds" : {
},
"server" : "ip-10-162-39-56:27017",
"filterSet" : false
}
Sometimes it takes more than 2000ms. But if I remove the $gt condition for updated field, the query is fast, about 5~30ms.
> db.rooms.find( { "location" : { "$near" : { "latitude" : 37.3356135, "longitude" : 127.12383030000001 } }, "status": "open"}).explain()
{
"cursor" : "GeoSearchCursor",
"isMultiKey" : false,
"n" : 100,
"nscannedObjects" : 1635,
"nscanned" : 2400,
"nscannedObjectsAllPlans" : 1635,
"nscannedAllPlans" : 2400,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 22,
"indexBounds" : {
},
"server" : "ip-10-162-39-56:27017",
"filterSet" : false
}
I've tried compound index for {location:"2d", updated: -1} but it didn't work. How can I make this query faster?

MongoDB: Compound geospatial & ascending index issues

I have a compund index consisting of a simple ascending index and a geospatial index:
{ v: 1, key: { PlayerSortMask: 1, RandomGeoIdentifier: "2dsphere" }, ns: "JellyDev.Players", name: "Sort Mask + Random Geo ID", min: 0, max: 1 }
Now I have the following 2 problems:
1.
When I try to use the prefix index (querying only on the 1st index), I get a basic cursor used, and not the index I've created:
Query used:
{ "PlayerSortMask" : 2 }
Explain returned:
{ "cursor" : "BasicCursor", "isMultiKey" : false, "n" : 1, "nscannedObjects" : 1, "nscanned" : 1, "nscannedObjectsAllPlans" : 1, "nscannedAllPlans" : 1, "scanAndOrder" : false, "indexOnly" : false, "nYields" : 0, "nChunkSkips" : 0, "millis" : 0, "indexBounds" : { }, "allPlans" : [{ "cursor" : "BasicCursor", "n" : 1, "nscannedObjects" : 1, "nscanned" : 1, "indexBounds" : { } }], "server" : "widmore:10010" }
2.
Not sure if this is a problem or not, but when I query using both fields, using $eq & $near, I get the following explain:
{ "cursor" : "S2NearCursor", "isMultiKey" : true, "n" : 1, "nscannedObjects" : 1, "nscanned" : 6, "nscannedObjectsAllPlans" : 1, "nscannedAllPlans" : 6, "scanAndOrder" : false, "indexOnly" : false, "nYields" : 0, "nChunkSkips" : 0, "millis" : 0, "indexBounds" : { }, "nscanned" : NumberLong(6), "matchTested" : NumberLong(1), "geoMatchTested" : NumberLong(1), "numShells" : NumberLong(3), "keyGeoSkip" : NumberLong(5), "returnSkip" : NumberLong(0), "btreeDups" : NumberLong(0), "inAnnulusTested" : NumberLong(1), "allPlans" : [{ "cursor" : "S2NearCursor", "n" : 1, "nscannedObjects" : 1, "nscanned" : 6, "indexBounds" : { } }], "server" : "widmore:10010" }
And this is the query used to fetch the result:
{ "PlayerSortMask" : 2, "RandomGeoIdentifier" : { "$near" : { "$geometry" : { "type" : "Point", "coordinates" : [0.88434365572610107, 0.90583264916475525] } } } }
Now it says it uses the S2NearCursor, but it's obviously not the index I've created - as it has the name Sort Mask + Random Geo ID.
Any help would be greatly appreciated.
For problem 1 there's a known issue in MongoDB with compound geo indexes.
https://jira.mongodb.org/browse/SERVER-9257
The problem is fixed in 2.5.4 which is a beta release.
You can workaround this for now by creating an additional simple index on PlayerSortMask.
For problem 2, S2NearCursor means an index is being used. I think the explain "loses" the name and this is a known issue, but I can't remember the bug number.

Mongodb indexing

I have a query
db.messages.find({'headers.Date':{'$gt': new Date(2001,3,1)}},{'headers.From':1, _id:0}).sort({'headers.From':1})
I have set headers.From as index. Now which part of query will use this index ? i.e find part of query or sort part of query?
Explain output is
{
"cursor" : "BtreeCursor headers.From_1",
"isMultiKey" : false,
"n" : 83057,
"nscannedObjects" : 120477,
"nscanned" : 120477,
"nscannedObjectsAllPlans" : 120581,
"nscannedAllPlans" : 120581,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 250,
"indexBounds" : {
"headers.From" : [
[
{
"$minElement" : 1
},
{
"$maxElement" : 1
}
]
]
},
"server" : "Andrews-iMac.local:27017"
}
Any help is appreciated !!!
The index is being used for the sort part, not for the query, as your query doesn't use the headers.From field and your sort does.

slow Mongodb $near search with additional criteria

I have a collection, the data look like this:
{
"_id" : ObjectId("4e627655677c27cf24000000"),
"gps" : {
"lng" : 116.343079,
"lat" : 40.034283
},
"lat" : 1351672296
}
And I build a compound index:
{
"v" : 1,
"key" : {
"gps" : "2d",
"lat" : 1
},
"ns" : "test.user",
"name" : "gps__lat_1"
}
A pure $near query like below can be very fast ( < 20ms ):
>db.user.find({"gps":{"$near":{"lng":116.343079,"lat":40.034283}}}).explain()
{
"cursor" : "GeoSearchCursor",
"nscanned" : 100,
"nscannedObjects" : 100,
"n" : 100,
"millis" : 23,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
But the query with "lat" criteria is very slow ( 900ms+ ):
>db.user.find({"gps":{"$near":{"lng":116.343079,"lat":40.034283}},"lat":{"$gt":1351413167}}).explain()
{
"cursor" : "GeoSearchCursor",
"nscanned" : 3,
"nscannedObjects" : 3,
"n" : 3,
"millis" : 665,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
}
}
Can anybody explain this? Great thx!
I updated my Mongodb up to 2.2.0, the problem disappeared.
127.0.0.1/test> db.user.find({gps:{$near:[116,40]},lat:{$gt:1351722342}}).explain()
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
},
"server" : "zhangshenjiamatoMacBook-Air.local:27017"
}
From the explain above, it doesn't look like the geoIndex is being used at all - also, it looks like the above query didn't return any results!
If your query is using the 2d index, the explain should contain:
"cursor" : "GeoSearchCursor"
Can you check if upgrading to 2.2.0 really solved your issue? :)
Sundar