MongoDB geospatial index on $center - mongodb

Collection Schema
{
"_id" : ObjectId("5d3562bf1b48d90ea4b06a74"),
"name" : "19",
"location" : {
"type" : "Point",
"coordinates" : [
50.0480208,
30.5239127
]
}
}
Indexes
> db.places.getIndexes()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.places"
},
{
"v" : 2,
"key" : {
"location" : "2dsphere"
},
"name" : "location_2dsphere",
"ns" : "test.places",
"2dsphereIndexVersion" : 3
}
There is 2 milion documents is stored in collection.
First I ran query like this.
db.places.find({ location: {$geoWithin: { $center: [[60.0478308, 40.5237227], 10] } }})
But it takes 2 seconds. So I examine query via explain().
> db.places.find({ location: {$geoWithin: { $center: [[60.0478308, 40.5237227], 10] } }}).explain('executionStats')
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.places",
"indexFilterSet" : false,
"parsedQuery" : {
"location" : {
"$geoWithin" : {
"$center" : [
[
60.0478308,
40.5237227
],
10
]
}
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"location" : {
"$geoWithin" : {
"$center" : [
[
60.0478308,
40.5237227
],
10
]
}
}
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1414213,
"executionTimeMillis" : 2093,
"totalKeysExamined" : 0,
"totalDocsExamined" : 2000000,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"location" : {
"$geoWithin" : {
"$center" : [
[
60.0478308,
40.5237227
],
10
]
}
}
},
"nReturned" : 1414213,
"executionTimeMillisEstimate" : 1893,
"works" : 2000002,
"advanced" : 1414213,
"needTime" : 585788,
"needYield" : 0,
"saveState" : 15681,
"restoreState" : 15681,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 2000000
}
},
"serverInfo" : {
"host" : "Johnui-iMac",
"port" : 27017,
"version" : "4.0.3",
"gitVersion" : "7ea530946fa7880364d88c8d8b6026bbc9ffa48c"
},
"ok" : 1
}
You know that query stage is COLLSCAN.
I wonder that, I already created index for location fields, but it seems doesnt' work.
So I create more indexes.
"v" : 2,
"key" : {
"location.coordinates" : 1
},
"name" : "location.coordinates_1",
"ns" : "test.places"
},
{
"v" : 2,
"key" : {
"location" : 1
},
"name" : "location_1",
"ns" : "test.places"
}
But it doesn't work too.
Is there any issue on my index configuration?

You seem to have created a 2dsphere Index on your location, but the MongoDB docs on $centre specify that:
Only the 2d geospatial index supports $center.
Therefore, I suggest you create a 2d index on the location field and the scan will be performed using this index

Related

mongodb is not using an index for a find command

I have approximately 40M documents in a mongo collection. There is an index on the location.country field:
MongoDB Enterprise cluster-0-shard-0:PRIMARY> db.cases.getIndexes()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_"
},
//...
{
"v" : 2,
"key" : {
"location.country" : -1
},
"name" : "countriesIdx",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
//...
]
But queries don't use it:
MongoDB Enterprise cluster-0-shard-0:PRIMARY> db.cases.find({'location.country':'ghana'}).explain({verbosity: 'executionStats'})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "covid19.cases",
"indexFilterSet" : false,
"parsedQuery" : {
"location.country" : {
"$eq" : "ghana"
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"location.country" : {
"$eq" : "ghana"
}
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 195892,
"totalKeysExamined" : 0,
"totalDocsExamined" : 39264034,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"location.country" : {
"$eq" : "ghana"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 99032,
"works" : 39264036,
"advanced" : 0,
"needTime" : 39264035,
"needYield" : 0,
"saveState" : 39503,
"restoreState" : 39503,
"isEOF" : 1,
"direction" : "forward",
"docsExamined" : 39264034
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "cluster-0-shard-00-01-vwhx6.mongodb.net",
"port" : 27017,
"version" : "4.4.8",
"gitVersion" : "83b8bb8b6b325d8d8d3dfd2ad9f744bdad7d6ca0"
},
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1629732226, 1),
"signature" : {
"hash" : BinData(0,"piKWDwLDv7FRcnwCe51PZDLR4UM="),
"keyId" : NumberLong("6958739380580122625")
}
},
"operationTime" : Timestamp(1629732226, 1)
}
Do I need to set up the index differently or do something else to get mongo to use the index? I have tried to hint that it should, but it still does a COLLSCAN. While the examples I've shown above are using mongosh, the behaviour is the same in my node app using mongoose.

MongoDB misuses GeoSpatialIndexes within CompoundIndexes when sorting

I go to the Mongo command line (version 4.4.1) and do
db.places.dropIndexes()
db.places.createIndex( { location : "2dsphere" , category1 : 1 } )
db.places.createIndex( { location : "2dsphere" , category2 : 1 } )
db.places.find({ location: { $near: { $geometry: { type: "Point", coordinates: [ 0, 0 ] } } } }).sort({ category2 : 1 }).explain()
The result shows that the index (location, category2) is not used, event though there is a perfect match for the query (find by location and sort by category2).
The winning plan shows the category1 index is used:
"indexName" : "location_2dsphere_category1_1",
db.places.find({ location: { $near: { $geometry: { type: "Point", coordinates: [ 0, 0 ] } } } }).sort({ category2 : 1 }).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "dev.places",
"indexFilterSet" : false,
"parsedQuery" : {
"location" : {
"$near" : {
"$geometry" : {
"type" : "Point",
"coordinates" : [
0,
0
]
}
}
}
},
"queryHash" : "8766F2A3",
"planCacheKey" : "9B5661A5",
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"category2" : 1
},
"memLimit" : 104857600,
"type" : "simple",
"inputStage" : {
"stage" : "GEO_NEAR_2DSPHERE",
"keyPattern" : {
"location" : "2dsphere",
"category1" : 1
},
"indexName" : "location_2dsphere_category1_1",
"indexVersion" : 2,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"location" : "2dsphere",
"category1" : 1
},
"indexName" : "location_2dsphere_category1_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"location" : [ ],
"category1" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"location" : [
"[-9223372036854775807, -6917529027641081857]",
"[-6917529027641081855, -4611686018427387905]",
"[1, 2305843009213693951]",
"[2305843009213693953, 4611686018427387903]",
"[4611686018427387905, 6917529027641081855]",
"[6917529027641081857, 9223372036854775807]"
],
"category1" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"category2" : 1
},
"memLimit" : 104857600,
"type" : "simple",
"inputStage" : {
"stage" : "GEO_NEAR_2DSPHERE",
"keyPattern" : {
"location" : "2dsphere",
"category2" : 1
},
"indexName" : "location_2dsphere_category2_1",
"indexVersion" : 2,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"location" : "2dsphere",
"category2" : 1
},
"indexName" : "location_2dsphere_category2_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"location" : [ ],
"category2" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"location" : [
"[-9223372036854775807, -6917529027641081857]",
"[-6917529027641081855, -4611686018427387905]",
"[1, 2305843009213693951]",
"[2305843009213693953, 4611686018427387903]",
"[4611686018427387905, 6917529027641081855]",
"[6917529027641081857, 9223372036854775807]"
],
"category2" : [
"[MinKey, MaxKey]"
]
}
}
}
}
}
]
},
"serverInfo" : {
"host" : "XXXX",
"port" : 27017,
"version" : "4.4.1",
"gitVersion" : "ad91a93a5a31e175f5cbf8c69561e788bbc55ce1"
},
"ok" : 1
}
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"category2" : 1
},
"memLimit" : 104857600,
"type" : "simple",
"inputStage" : {
"stage" : "GEO_NEAR_2DSPHERE",
"keyPattern" : {
"location" : "2dsphere",
"category1" : 1
},
"indexName" : "location_2dsphere_category1_1",
See https://www.alexbevi.com/blog/2020/05/16/optimizing-mongodb-compound-indexes-the-equality-sort-range-esr-rule/.
$near is a range query and the index must reference the field being sorted on before fields that have range conditions on them to satisfy both range and sort requirements.
Neither of your indexes can be used to sort your query. MongoDB picks the first one since the usable part of them is just the geo index.
Since you haven't specified a distance with $near you are probably getting all documents that have the location field set via the index scan.

Improve slow query count mongodb

I'm trying to improve the performance of a count query (to calculate pagination to display on a screen) on a collection of 1138633 documents. The query analyze 391232 document for 364497 returned but it takes ~2sc to be executed and i think it's too long.
My query looks like this:
db.myCollection.count({
"$or" : [
{
"field_1" : {
"$lte" : 1.0
}
},
{"field_1" : {
"$eq" : null
}
}
],
"field_2" : {
"$eq" : false
},
"field_3" : {
"$ne" : true
},
"field_4" : {
"$eq" : "fr-FR"
},
"field_5" : {
"$ne" : null
},
"field_6" : {
"$ne" : null
},
"field_7" : {
"$gte" : ISODate("2016-10-14T00:00:00.000Z")
}
})
field_1 is a number , field_2 and field_3 a boolean, field_5 a string and field_6 an object ID which refer to a collection of 2 documents.
Here are my indexes (db.myCollection.getIndexes() ) :
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "db.myCollection"
},
{
"v" : 2,
"key" : {
"field_6" : 1,
"field_7" : -1
},
"name" : "field_6_1_field_7_-1",
"ns" : "db.myCollection",
"background" : true
},
{
"v" : 2,
"key" : {
"field_7" : 1
},
"name" : "field_7_1",
"background" : true,
"ns" : "db.myCollection"
},
{
"v" : 2,
"key" : {
"field_6" : 1
},
"name" : "field_6_1",
"ns" : "db.myCollection",
"background" : true
},
{
"v" : 2,
"key" : {
"field_1" : 1.0
},
"name" : "field_1_1",
"ns" : "db.myCollection"
}
]
I tried everything , like force indexe using hint , change the order of the query ( and the order of the multi key index) but nothing work.
Someone have an idea on what can I try to improve the execution time of this query? Do you need more details? like informations of the executionStats?
Thanks.
EDIT : More Detail, i calculated how much document are concerned by the clause and here is my result :
field 6 : 391232
field 1 lte 1 :721005
field 1 eq null : 417625
field 5 : 819688
field 4: 1123301
field 2 : 1138620
field 7: 1138630 (all document)
field 3: 1138630 (all document)
i reordered my query in the above order and i get ~1.82sc (0.2sc winned xD)
I assume the problem is because of the indexes which are maybe wrong.
For the detail index in explain do you know what section i have to check? here is what i found in execution plan about my indexes :
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 391232,
"executionTimeMillisEstimate" : 427,
"works" : 391234,
"advanced" : 391232,
"needTime" : 1,
"needYield" : 0,
"saveState" : 3060,
"restoreState" : 3060,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"field_6" : 1,
"field_7" : -1
},
"indexName" : "field_6_1_field_7_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"field_6" : [],
"field_7" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"field_6" : [
"[MinKey, null)",
"(null, MaxKey]"
],
"field_7" : [
"[new Date(9223372036854775807), new Date(1491350400000)]"
]
},
"keysExamined" : 391233,
"seeks" : 2,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}

$in slower when using indexed column

I am trying to optimise my query and have found that when using $in on a non-indexed column that the performance appears to be faster than when on an indexed column.
For example:
I have added an index on myCollection: {"entryVals.col1" : 1}.
To confirm:
db.myCollection.getIndexes()
returns:
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "myDb.myCollection"
},
{
"v" : 2,
"key" : {
"entryVals.col1" : 1
},
"name" : "entryVals.col1_1",
"ns" : "myDb.myCollection"
} ]
I then run a count with a query (printing the time taken) on both the indexed and non-indexed columns.
Count on indexed column
var a = new Date().getTime();
db.myCollection.count({"entryVals.col1": {$in:["a","b","c","d"]}});
new Date().getTime() - a;
returns
96 (time in ms)
Count on non-indexed column
var a = new Date().getTime();
db.myCollection.count({"entryVals.col2": {$in:["a","b","c","d"]}});
new Date().getTime() - a;
returns
60 (time in ms)
Please bare in mind that I ran the queries several times and took an average (there were little to no anomalies) .
Is anyone able to help enlighten me as to why the query on the column that is indexed is slower please?
Thanks in advance.
Explains
Count on indexed column
db.myCollection.explain().count({"entryVals.col1": {$in:["a","b","c","d"]}})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myCollection",
"indexFilterSet" : false,
"parsedQuery" : {
"entryVals.col1" : {
"$in" : [
"a",
"b",
"c",
"d"
]
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"entryVals.col1" : 1
},
"indexName" : "entryVals.col1_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"entryVals.col1" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"entryVals.col1" : [
"[\"a\", \"a\"]",
"[\"b\", \"b\"]",
"[\"c\", \"c\"]",
"[\"d\", \"d\"]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "obfuscated",
"port" : obfuscated,
"version" : "3.4.6-1.7",
"gitVersion" : "obfuscated"
},
"ok" : 1
}
Count on non-indexed column
db.myCollection.explain().count({"entryVals.col2": {$in:["a","b","c","d"]}})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myCollection",
"indexFilterSet" : false,
"parsedQuery" : {
"entryVals.col2" : {
"$in" : [
"a",
"b",
"c",
"d"
]
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"entryVals.col2" : {
"$in" : [
"a",
"b",
"c",
"d"
]
}
},
"direction" : "forward"
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "obfuscated",
"port" : obfuscated,
"version" : "3.4.6-1.7",
"gitVersion" : "obfuscated"
},
"ok" : 1
}

MongoDB sort winingplan overrides hint

I create a collection with three fields as described below. After that, I create an index over second field and executed a search using sort and hint operations.
Why - even using a hint over index created previously - MongoDB set sort as winningPlan?
I believe that if we filter data with some criteria and sort the result could be better, right?
Collection
> db.values.find()
{ "_id" : ObjectId("5763ffebe5a81f569b1005e5"), "field1" : "A", "field2" : "B", "field3" : "C" }
Indexes
> db.values.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "peftest.values"
},
{
"v" : 1,
"key" : {
"field2" : 1
},
"name" : "field2_1",
"ns" : "peftest.values"
}
]
Query and Explain
> db.values.find({field2:"B"}).sort({field1:1}).hint({field2:1}).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "peftest.values",
"indexFilterSet" : false,
"parsedQuery" : {
"field2" : {
"$eq" : "B"
}
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"field1" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"field2" : 1
},
"indexName" : "field2_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"field2" : [
"[\"B\", \"B\"]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "apstrd14501d.intraservice.corp",
"port" : 27017,
"version" : "3.2.4",
"gitVersion" : "e2ee9ffcf9f5a94fad76802e28cc978718bb7a30"
},
"ok" : 1
}
I think the plan is what you expect but you look at it from the wrong perspective :)
The input stage of the sort is an index scan so the query plan uses the index at first and the pass the result data to the sort.