MongoDB misuses GeoSpatialIndexes within CompoundIndexes when sorting - mongodb

I go to the Mongo command line (version 4.4.1) and do
db.places.dropIndexes()
db.places.createIndex( { location : "2dsphere" , category1 : 1 } )
db.places.createIndex( { location : "2dsphere" , category2 : 1 } )
db.places.find({ location: { $near: { $geometry: { type: "Point", coordinates: [ 0, 0 ] } } } }).sort({ category2 : 1 }).explain()
The result shows that the index (location, category2) is not used, event though there is a perfect match for the query (find by location and sort by category2).
The winning plan shows the category1 index is used:
"indexName" : "location_2dsphere_category1_1",
db.places.find({ location: { $near: { $geometry: { type: "Point", coordinates: [ 0, 0 ] } } } }).sort({ category2 : 1 }).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "dev.places",
"indexFilterSet" : false,
"parsedQuery" : {
"location" : {
"$near" : {
"$geometry" : {
"type" : "Point",
"coordinates" : [
0,
0
]
}
}
}
},
"queryHash" : "8766F2A3",
"planCacheKey" : "9B5661A5",
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"category2" : 1
},
"memLimit" : 104857600,
"type" : "simple",
"inputStage" : {
"stage" : "GEO_NEAR_2DSPHERE",
"keyPattern" : {
"location" : "2dsphere",
"category1" : 1
},
"indexName" : "location_2dsphere_category1_1",
"indexVersion" : 2,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"location" : "2dsphere",
"category1" : 1
},
"indexName" : "location_2dsphere_category1_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"location" : [ ],
"category1" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"location" : [
"[-9223372036854775807, -6917529027641081857]",
"[-6917529027641081855, -4611686018427387905]",
"[1, 2305843009213693951]",
"[2305843009213693953, 4611686018427387903]",
"[4611686018427387905, 6917529027641081855]",
"[6917529027641081857, 9223372036854775807]"
],
"category1" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"category2" : 1
},
"memLimit" : 104857600,
"type" : "simple",
"inputStage" : {
"stage" : "GEO_NEAR_2DSPHERE",
"keyPattern" : {
"location" : "2dsphere",
"category2" : 1
},
"indexName" : "location_2dsphere_category2_1",
"indexVersion" : 2,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"location" : "2dsphere",
"category2" : 1
},
"indexName" : "location_2dsphere_category2_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"location" : [ ],
"category2" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"location" : [
"[-9223372036854775807, -6917529027641081857]",
"[-6917529027641081855, -4611686018427387905]",
"[1, 2305843009213693951]",
"[2305843009213693953, 4611686018427387903]",
"[4611686018427387905, 6917529027641081855]",
"[6917529027641081857, 9223372036854775807]"
],
"category2" : [
"[MinKey, MaxKey]"
]
}
}
}
}
}
]
},
"serverInfo" : {
"host" : "XXXX",
"port" : 27017,
"version" : "4.4.1",
"gitVersion" : "ad91a93a5a31e175f5cbf8c69561e788bbc55ce1"
},
"ok" : 1
}
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"category2" : 1
},
"memLimit" : 104857600,
"type" : "simple",
"inputStage" : {
"stage" : "GEO_NEAR_2DSPHERE",
"keyPattern" : {
"location" : "2dsphere",
"category1" : 1
},
"indexName" : "location_2dsphere_category1_1",

See https://www.alexbevi.com/blog/2020/05/16/optimizing-mongodb-compound-indexes-the-equality-sort-range-esr-rule/.
$near is a range query and the index must reference the field being sorted on before fields that have range conditions on them to satisfy both range and sort requirements.
Neither of your indexes can be used to sort your query. MongoDB picks the first one since the usable part of them is just the geo index.
Since you haven't specified a distance with $near you are probably getting all documents that have the location field set via the index scan.

Related

MongoDB Query plan not using compound index

I am trying MongoDB with a dataset about the company profile margin for learning purpose. Here is the sample document
{
"parent_comp" : 1
"child_comp" : 101
"profit" : NumberLong(70320020)
}
I have created two indexes i.e one on child_comp field and the other one is a compound index with parent_comp, child_comp, and last_outage_timestamp.
For the below query, I executed the explain command to see the query plan.
MongoDB Enterprise > db.data.find({ "$and" : [{ "parent_comp" : 951, "child_comp" : 9351, "profit" : { "$gte" : { "$numberLong" : "500000000" } } }, { "profit" : { "$lte" : { "$numberLong" : "1000000000" } } }] }).sort({"profit" : 1}).limit(3).explain();
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.data",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"child_comp" : {
"$eq" : 9351
}
},
{
"parent_comp" : {
"$eq" : 951
}
},
{
"profit" : {
"$lte" : {
"$numberLong" : "1000000000"
}
}
},
{
"profit" : {
"$gte" : {
"$numberLong" : "500000000"
}
}
}
]
},
"queryHash" : "B570EF0C",
"planCacheKey" : "187EF74B",
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 3,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"child_comp" : {
"$eq" : 9351
}
},
{
"parent_comp" : {
"$eq" : 951
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"profit" : 1
},
"indexName" : "profit_index",
"isMultiKey" : false,
"multiKeyPaths" : {
"profit" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"profit" : [
"[{ $numberLong: \"500000000\" }, { $numberLong: \"1000000000\" }]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"profit" : 1
},
"limitAmount" : 3,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"parent_comp" : {
"$eq" : 951
}
},
{
"profit" : {
"$lte" : {
"$numberLong" : "1000000000"
}
}
},
{
"profit" : {
"$gte" : {
"$numberLong" : "500000000"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"child_comp" : 1
},
"indexName" : "child_comp_index",
"isMultiKey" : false,
"multiKeyPaths" : {
"child_comp" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"child_comp" : [
"[9351.0, 9351.0]"
]
}
}
}
}
},
{
"stage" : "LIMIT",
"limitAmount" : 3,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent_comp" : 1,
"child_comp" : 1,
"profit" : 1
},
"indexName" : "parent_comp_1_child_comp_1_profit_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent_comp" : [ ],
"child_comp" : [ ],
"profit" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent_comp" : [
"[951.0, 951.0]"
],
"child_comp" : [
"[9351.0, 9351.0]"
],
"profit" : [
"[{ $numberLong: \"500000000\" }, { $numberLong: \"1000000000\" }]"
]
}
}
}
}
]
},
"serverInfo" : {
"host" : "localhost",
"port" : 27017,
"version" : "4.2.8",
"gitVersion" : "43d25888249164d76d5e04dd6cf38f6111e21f5f"
},
"ok" : 1
}
As you can see winning plan used single index instead of compound index. So could you please let me know why compound index was not used.
Your query is sorting on profit, and the compound index does not include the field you are sorting on hence using the compound index would necessitate an additional sort stage.
The trade-offs and reasoning is further explained in the docs.
See also https://www.alexbevi.com/blog/2020/05/16/optimizing-mongodb-compound-indexes-the-equality-sort-range-esr-rule/.

Explain why results from mongo are being returned in reverse ObjectId order?

I have a list of news article items which I am tagging for entities, and topic tags.
my query
db["fmetadata"].find({'$and': [{'$text': {'$search': 'apple trump'}}, {'$or':
[{'entities': {'$elemMatch': {'$regex': 'apple|trump'}}}, {'tags': {'$elemMatch': {'$regex': 'apple|trump'}}}]}]}).explain()
query plan
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "dfabric.fmetadata",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"entities" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
},
{
"tags" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
}
]
},
{
"$text" : {
"$search" : "apple trump",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"entities" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
},
{
"tags" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
}
]
},
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "title_text_tags_text_entities_text",
"parsedTextQuery" : {
"terms" : [
"appl",
"trump"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title_text_tags_text_entities_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title_text_tags_text_entities_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
}
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "fabric-dev",
"port" : 27017,
"version" : "4.0.2",
"gitVersion" : "fc1573ba18aee42f97a3bb13b67af7d837826b47"
},
"ok" : 1
}
I see that
["queryPlanner"]["winningPlan"]["inputStage"]["inputStage"]["inputStages"]
"stage": "IXSCAN"
"direction": "backward"
Can this please be explained why?
I was developing a pagination cursor using >lastId, and limit technique. But since, results are being returned backwards, I have to use < lastId which seems counterintuitive.
If I don't sort my results in the natural order, can it be guaranteed that it will always be backwards/reverse?
Edit: as mentioned in the comment below
My objective here is to get the intuition as to why the index was scanned backwards- is it the way I formulated my query? or something else entirely? The ordering- forwards or backwards doesn't matter as much as the consistency of it remaining always so does- either always forwards or vice versa
I came across this question on stackoverflow, and I believe the accepted answer, with the comments below satisfactorily gives me the intuition I was looking for.
How does MongoDB sort records when no sort order is specified?

Mongo date range index with filters

We have the below query
db.Comment.find(
{
$and: [
{ reportCount: { $gt: 0 } },
{ assignee: { $exists: false } },
{ creationDate: { $gt: new Date(1507831097809) } },
{ creationDate: { $lt: new Date(1508522297966) } },
{ siteId: 'MAIN' },
{ parent: { $exists: false } },
{ status: 'ACTIVE' }
]
})
.sort({ creationDate: 1 })
And we have an index
{
"v" : 2,
"key" : {
"creationDate" : 1,
"reportCount" : 1,
"label" : 1
}
}
Here are explain results:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myNameSpace",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"creationDate" : 1.0,
"reportCount" : 1.0,
"label" : 1.0
},
"indexName" : "creationDate_1_reportCount_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"creationDate" : [],
"reportCount" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"creationDate" : [
"(new Date(1507831097809), new Date(1508522297966))"
],
"reportCount" : [
"(0.0, inf.0]"
],
"label" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1.0
},
"indexName" : "parent_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[null, null]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"assignee" : 1.0
},
"indexName" : "assignee_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"assignee" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"assignee" : [
"[null, null]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"siteId" : 1.0,
"updatedDate" : 1.0,
"label" : 1.0
},
"indexName" : "siteId_1_updatedDate_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"siteId" : [],
"updatedDate" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"siteId" : [
"[\"MAIN\", \"MAIN\"]"
],
"updatedDate" : [
"[MinKey, MaxKey]"
],
"label" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
}
]
},
"inputStage" : {
"stage" : "AND_SORTED",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1.0
},
"indexName" : "parent_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[null, null]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"assignee" : 1.0
},
"indexName" : "assignee_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"assignee" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"assignee" : [
"[null, null]"
]
}
}
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 19,
"executionTimeMillis" : 8,
"totalKeysExamined" : 533,
"totalDocsExamined" : 56,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"nReturned" : 19,
"executionTimeMillisEstimate" : 0,
"works" : 534,
"advanced" : 19,
"needTime" : 513,
"needYield" : 0,
"saveState" : 20,
"restoreState" : 20,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 56,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 56,
"executionTimeMillisEstimate" : 0,
"works" : 533,
"advanced" : 56,
"needTime" : 476,
"needYield" : 0,
"saveState" : 20,
"restoreState" : 20,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"creationDate" : 1.0,
"reportCount" : 1.0,
"label" : 1.0
},
"indexName" : "creationDate_1_reportCount_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"creationDate" : [],
"reportCount" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"creationDate" : [
"(new Date(1507831097809), new Date(1508522297966))"
],
"reportCount" : [
"(0.0, inf.0]"
],
"label" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 533,
"seeks" : 477,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"ok" : 1.0
}
The query is still taking 700-800 ms to return the data. How can I change the index to make the query run faster? Don't consider "keysExamined" : 533, "seeks" : 477, This data. This is just test data.
Looks like its using an index but only the first field in the index? Also multuKey is false?
A few key points from the explain plan output:
The query addresses the following attributes: siteId, status, creationDate, reportCount, assignee, parent
The winning plan has two stages:
IX_SCAN uses creationDate_1_reportCount_1_label_1, this uses indexed lookups on creationDate and reportCount to identify 56 documents which are then forwarded to the FETCH stage
FETCH receives 56 documents from the IX_SCAN stage and then interrogates these documents to apply the siteId, status, assignee and parent filters. This interrogation causes 37 documents to be discarded resulting in 19 document to be returned.
So, your index covers just 2 of the 6 attributes in your query and the remaining 4 attributes in your query are applied by examining the documents not the index. If you want this query to be fully index covered then create the following index:
db.collection.createIndex(
{siteId: 1, status: 1, creationDate: 1, reportCount: 1, assignee: 1, parent: 1}
)
If you re run with this index in place then you should find that (a) MongoDB chooses this index and (b) the number of documents forwarded by the IX_SCAN stage is the same as the number of documents returned by your find call.
I say "should find" because there are other aspects here which might result in MongoDB choosing a different index e.g. use of $nor and the sort stage (creationDate: 1). I would recommend tweaking the index and running with explain 'on' after each tweak and looking for these key items in the executionStats sub document:
"nReturned"
"totalKeysExamined"
"totalDocsExamined"
A simple rule of thumb is this: the closer totalKeysExamined is to nReturned and the closer totalDocsExamined is to zero ... the better your index coverage.
There is also the question of the cost of an index (in terms of impact on write times and index storage) so I'd suggest considering your non functional requirements - can your desired elapsed times be achieved without full index coverage? If not, then you should proceed with empirical testing but be prepared to tweak your choice in reponse to what the explain() output tells you.

MongoDB use index with $nin seems not to work in combination with $regex

It seems that my index in my MongoDB is not correct.
I have created 3 indexes. These:
{
_id: 1
}
{
isbn: 1
}
{
_id: 1,
isbn: 1
}
When doing a query with isbn or _id its working perfect. Even with isbn and _id. For example:
db.getCollection('books').find({
isbn: {
$regex: '^978048627.*'
},
_id: 'vGXejKQH5kw8Kfutk'
}
needs around 3ms.
But lets now say I want to search for an ISBN and need to exclude some _ids - I do this:
db.getCollection('books').find({
isbn: {
$regex: '^97804862731.*'
},
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Now its not working as it should. The query took more then 10 seconds!
When I do a isbn search without $regex but with $nin its works perfect - again around 3ms for the query. Example:
db.getCollection('books').find({
isbn: '9780486273136',
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Am I doing something wrong ? And why the index is not working correctly as it should ?
Here is the .explain() output when querying the 10 seconds query:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}
Solution
My solution - I do not know why - but is to use $and and $ne instead of $nin.
My query looks like this now:
db.getCollection('books').find({isbn:{$regex: '^97804862731.*'}, $and: [
{
_id: {
$ne: 'vGXejKQH5kw8Kfutk'
}
},
{
_id: {
$ne: 'another-id'
}
}
]})
and just takes around 3ms
Maybe someone can explain how this can happen ?
The explain() of this query
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}

MongoDB $or query against itself much faster

I have a mongo instance with 16m documents in a collection. I was writing a query to search for one of the (indexed) fields and I'm getting some weird results, which I cannot explain.
If I execute a query directly like:
find({ "$and" : [ { "ipAddr" : { "$regex" : "^01:172"}} , { "active" : true}]}).limit(100).sort({ "_id" : 1})
or even adding a pointless $or to the query:
find({ "$and" : [ { "$or" : [ { "ipAddr" : { "$regex" : "^01:172"}}]} , { "active" : true}]}).limit(100).sort({ "_id" : 1})
It returns
Fetched 3 record(s) in 71673ms
However, if I use an $or against itself like:
find({ "$and" : [ { "$or" : [ { "ipAddr" : { "$regex" : "^01:172"}} , { "ipAddr" : { "$regex" : "^01:172"}}]} , { "active" : true}]}).limit(100).sort({ "_id" : 1})
It returns:
Fetched 3 record(s) in 4ms
So a big performance difference. From inspecting the explain() on the queries, I could not determine why such a large performance difference exists. Can anyone shed light on what I'm missing or what mongo is doing differently between these?
Explain() on single $or which takes >60000ms
find({ "$and" : [ { "$or" : [ { "ipAddr" : { "$regex" : "^01:172"}}]} , { "active" : true}]}).limit(100).sort({ "_id" : 1}).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "CLS-TEST.Leases",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"active" : {
"$eq" : true
}
},
{
"ipAddr" : /^01:172/
}
]
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"active" : {
"$eq" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"ipAddr" : 1
},
"indexName" : "ipAddr_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"ipAddr" : [
"[\"01:172\", \"01:173\")",
"[/^01:172/, /^01:172/]"
]
}
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ipAddr" : /^01:172/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"sessionId" : 1,
"updateTime" : 1
},
"indexName" : "active_1_sessionId_1_updateTime_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"sessionId" : [
"[MinKey, MaxKey]"
],
"updateTime" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ipAddr" : /^01:172/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"clientId" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_clientId_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"clientId" : [
"[MinKey, MaxKey]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"ipAddr" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_ipAddr_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"ipAddr" : [
"[\"01:172\", \"01:173\")",
"[/^01:172/, /^01:172/]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ipAddr" : /^01:172/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"macAddress" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_macAddress_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"macAddress" : [
"[MinKey, MaxKey]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ipAddr" : /^01:172/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"remoteId" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_remoteId_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"remoteId" : [
"[MinKey, MaxKey]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"active" : {
"$eq" : true
}
},
{
"ipAddr" : /^01:172/
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
]
},
"serverInfo" : {
"host" : "",
"port" : 27017,
"version" : "3.2.3",
"gitVersion" : "b326ba837cf6f49d65c2f85e1b70f6f31ece7937"
},
"ok" : 1
}
Explain() on $or against itself which takes <50ms
find({ "$and" : [ { "$or" : [ { "ipAddr" : { "$regex" : "^01:172"}} , { "ipAddr" : { "$regex" : "^01:172"}}]} , { "active" : true}]}).limit(100).sort({ "_id" : 1}).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "CLS-TEST.Leases",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"ipAddr" : /^01:172/
},
{
"ipAddr" : /^01:172/
}
]
},
{
"active" : {
"$eq" : true
}
}
]
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"active" : {
"$eq" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"ipAddr" : 1
},
"indexName" : "ipAddr_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"ipAddr" : [
"[\"01:172\", \"01:173\")",
"[/^01:172/, /^01:172/]"
]
}
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"ipAddr" : /^01:172/
},
{
"ipAddr" : /^01:172/
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"sessionId" : 1,
"updateTime" : 1
},
"indexName" : "active_1_sessionId_1_updateTime_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"sessionId" : [
"[MinKey, MaxKey]"
],
"updateTime" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"ipAddr" : /^01:172/
},
{
"ipAddr" : /^01:172/
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"clientId" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_clientId_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"clientId" : [
"[MinKey, MaxKey]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"ipAddr" : /^01:172/
},
{
"ipAddr" : /^01:172/
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"ipAddr" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_ipAddr_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"ipAddr" : [
"[MinKey, MaxKey]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"ipAddr" : /^01:172/
},
{
"ipAddr" : /^01:172/
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"macAddress" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_macAddress_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"macAddress" : [
"[MinKey, MaxKey]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"ipAddr" : /^01:172/
},
{
"ipAddr" : /^01:172/
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1,
"remoteId" : 1,
"startTime" : -1,
"_id" : -1
},
"indexName" : "active_1_remoteId_1_startTime_-1__id_-1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[true, true]"
],
"remoteId" : [
"[MinKey, MaxKey]"
],
"startTime" : [
"[MaxKey, MinKey]"
],
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
{
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"ipAddr" : /^01:172/
},
{
"ipAddr" : /^01:172/
}
]
},
{
"active" : {
"$eq" : true
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
]
},
"serverInfo" : {
"host" : "",
"port" : 27017,
"version" : "3.2.3",
"gitVersion" : "b326ba837cf6f49d65c2f85e1b70f6f31ece7937"
},
"ok" : 1
}
You might note that none of the index selections include the combination of "active" and "ipAddr", which would be the useful index to define here.
In short, the "slower" query is using just the index for "ipAddr" and therefore it's taking more work to then "filter" out the { "active": true } entries.
Clearly when the other index selection(s) are using the "active" key with those bounds there are less results being passed to the subsequent filter on the regex pattern. You seem to have quite a few indexes here, and none of them are really optimal for the query.
I'll give you props for at least running the "explain" output on either query, but if you look closely then you should see that the "slow" query "incorrectly" chooses the "ipAddr" index thinking it to be optimal. It might not be, but it's a reasonable assumption for the optimiser to make considering an "anchored" regex in use.
The $or forces "index intersection", and it's smart enough to not do that when there is only "one" argument in the $or. "Two" arguments makes that happen, and the optimiser takes another "guess" by looking for an index preceeding with your other query condition ( the "active" value ).
That makes sense, since this is now running "two" queries from which it wil "intersect" the results, and therefore any conditions outside of the $or statement would be the logical choice to optimally choose an index for.
Since the returned results from these is likely smaller, it's therefore faster to "filter" out the regex match than to look at all the regex results and filter out the "active" values.
Therefore the "best" index to define for this query would be:
.createIndex({ "active": 1, "ipAddr": 1 })
And then the results from either query would be consistent, providing of course that the optimiser does not get confused by other indexes present and chooses that one. To force the index selection, use .hint()