Why is MongoDB treating queries differently when served from same index? - mongodb

I have a collection where my documents looks like:
{
"_id" : ObjectId("591dbe4a77d4ede22d765250"),
"name" : [
{
"de" : true,
"text" : "Brunhilde"
},
{
"sk" : true,
"text" : "Šimon"
}
]
}
I have defined an index as:
> db.names.createIndex({ 'name.de': 1, 'name.text': 1 }, { name: 'name_de', partialFilterExpression: { 'name.de': { $exists: true } }, collation: { locale: 'de' } });
When I do a query like:
> db.names.find({ 'name.de': true, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
The explain plan looks like:
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"name.text" : {
"$eq" : "Rüdi"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[true, true]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
It does IXSCAN followed by FETCH stage with filter. I've already created an question about the filter here.
The more interesting is what will happen when I just change the matching part of the query to:
> db.names.find({ 'name.de': { $exists: true }, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
i.e. expression 'name.de': { $exists: true } should be still subset of partialFilterExpression. As stated in documentation:
To use the partial index, a query must contain the filter expression (or a modified filter expression that specifies a subset of the filter expression) as part of its query condition.
But the explain plan looks like this:
...
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"name.de" : {
"$exists" : true
}
},
{
"name.text" : {
"$eq" : "Rüdi"
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[MinKey, MaxKey]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
...
As you can see index is used, but the whole filtering is happening in FETCH stage.
Question is: why the filtering is done in FETCH stage and what is so different between these 2 queries that MongoDB them differently?
Additionaly, sort query with $exists as:
> db.names.find({ 'name.de': { $exists: true } }).sort({ 'name.text': 1 }).collation({ locale: "de" })
Behaves the same, whole filtering and sorting is done after IXSCAN stage:
...
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"name.text" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"name.de" : {
"$exists" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
}
...
It even produces the incorrect results, while index is not used for sorting.

Related

MongoDB Query plan not using compound index

I am trying MongoDB with a dataset about the company profile margin for learning purpose. Here is the sample document
{
"parent_comp" : 1
"child_comp" : 101
"profit" : NumberLong(70320020)
}
I have created two indexes i.e one on child_comp field and the other one is a compound index with parent_comp, child_comp, and last_outage_timestamp.
For the below query, I executed the explain command to see the query plan.
MongoDB Enterprise > db.data.find({ "$and" : [{ "parent_comp" : 951, "child_comp" : 9351, "profit" : { "$gte" : { "$numberLong" : "500000000" } } }, { "profit" : { "$lte" : { "$numberLong" : "1000000000" } } }] }).sort({"profit" : 1}).limit(3).explain();
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.data",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"child_comp" : {
"$eq" : 9351
}
},
{
"parent_comp" : {
"$eq" : 951
}
},
{
"profit" : {
"$lte" : {
"$numberLong" : "1000000000"
}
}
},
{
"profit" : {
"$gte" : {
"$numberLong" : "500000000"
}
}
}
]
},
"queryHash" : "B570EF0C",
"planCacheKey" : "187EF74B",
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 3,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"child_comp" : {
"$eq" : 9351
}
},
{
"parent_comp" : {
"$eq" : 951
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"profit" : 1
},
"indexName" : "profit_index",
"isMultiKey" : false,
"multiKeyPaths" : {
"profit" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"profit" : [
"[{ $numberLong: \"500000000\" }, { $numberLong: \"1000000000\" }]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"profit" : 1
},
"limitAmount" : 3,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"parent_comp" : {
"$eq" : 951
}
},
{
"profit" : {
"$lte" : {
"$numberLong" : "1000000000"
}
}
},
{
"profit" : {
"$gte" : {
"$numberLong" : "500000000"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"child_comp" : 1
},
"indexName" : "child_comp_index",
"isMultiKey" : false,
"multiKeyPaths" : {
"child_comp" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"child_comp" : [
"[9351.0, 9351.0]"
]
}
}
}
}
},
{
"stage" : "LIMIT",
"limitAmount" : 3,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent_comp" : 1,
"child_comp" : 1,
"profit" : 1
},
"indexName" : "parent_comp_1_child_comp_1_profit_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent_comp" : [ ],
"child_comp" : [ ],
"profit" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent_comp" : [
"[951.0, 951.0]"
],
"child_comp" : [
"[9351.0, 9351.0]"
],
"profit" : [
"[{ $numberLong: \"500000000\" }, { $numberLong: \"1000000000\" }]"
]
}
}
}
}
]
},
"serverInfo" : {
"host" : "localhost",
"port" : 27017,
"version" : "4.2.8",
"gitVersion" : "43d25888249164d76d5e04dd6cf38f6111e21f5f"
},
"ok" : 1
}
As you can see winning plan used single index instead of compound index. So could you please let me know why compound index was not used.
Your query is sorting on profit, and the compound index does not include the field you are sorting on hence using the compound index would necessitate an additional sort stage.
The trade-offs and reasoning is further explained in the docs.
See also https://www.alexbevi.com/blog/2020/05/16/optimizing-mongodb-compound-indexes-the-equality-sort-range-esr-rule/.

Explain why results from mongo are being returned in reverse ObjectId order?

I have a list of news article items which I am tagging for entities, and topic tags.
my query
db["fmetadata"].find({'$and': [{'$text': {'$search': 'apple trump'}}, {'$or':
[{'entities': {'$elemMatch': {'$regex': 'apple|trump'}}}, {'tags': {'$elemMatch': {'$regex': 'apple|trump'}}}]}]}).explain()
query plan
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "dfabric.fmetadata",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"entities" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
},
{
"tags" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
}
]
},
{
"$text" : {
"$search" : "apple trump",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"entities" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
},
{
"tags" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
}
]
},
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "title_text_tags_text_entities_text",
"parsedTextQuery" : {
"terms" : [
"appl",
"trump"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title_text_tags_text_entities_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title_text_tags_text_entities_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
}
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "fabric-dev",
"port" : 27017,
"version" : "4.0.2",
"gitVersion" : "fc1573ba18aee42f97a3bb13b67af7d837826b47"
},
"ok" : 1
}
I see that
["queryPlanner"]["winningPlan"]["inputStage"]["inputStage"]["inputStages"]
"stage": "IXSCAN"
"direction": "backward"
Can this please be explained why?
I was developing a pagination cursor using >lastId, and limit technique. But since, results are being returned backwards, I have to use < lastId which seems counterintuitive.
If I don't sort my results in the natural order, can it be guaranteed that it will always be backwards/reverse?
Edit: as mentioned in the comment below
My objective here is to get the intuition as to why the index was scanned backwards- is it the way I formulated my query? or something else entirely? The ordering- forwards or backwards doesn't matter as much as the consistency of it remaining always so does- either always forwards or vice versa
I came across this question on stackoverflow, and I believe the accepted answer, with the comments below satisfactorily gives me the intuition I was looking for.
How does MongoDB sort records when no sort order is specified?

Mongo date range index with filters

We have the below query
db.Comment.find(
{
$and: [
{ reportCount: { $gt: 0 } },
{ assignee: { $exists: false } },
{ creationDate: { $gt: new Date(1507831097809) } },
{ creationDate: { $lt: new Date(1508522297966) } },
{ siteId: 'MAIN' },
{ parent: { $exists: false } },
{ status: 'ACTIVE' }
]
})
.sort({ creationDate: 1 })
And we have an index
{
"v" : 2,
"key" : {
"creationDate" : 1,
"reportCount" : 1,
"label" : 1
}
}
Here are explain results:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myNameSpace",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"creationDate" : 1.0,
"reportCount" : 1.0,
"label" : 1.0
},
"indexName" : "creationDate_1_reportCount_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"creationDate" : [],
"reportCount" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"creationDate" : [
"(new Date(1507831097809), new Date(1508522297966))"
],
"reportCount" : [
"(0.0, inf.0]"
],
"label" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1.0
},
"indexName" : "parent_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[null, null]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"assignee" : 1.0
},
"indexName" : "assignee_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"assignee" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"assignee" : [
"[null, null]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"siteId" : 1.0,
"updatedDate" : 1.0,
"label" : 1.0
},
"indexName" : "siteId_1_updatedDate_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"siteId" : [],
"updatedDate" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"siteId" : [
"[\"MAIN\", \"MAIN\"]"
],
"updatedDate" : [
"[MinKey, MaxKey]"
],
"label" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
}
]
},
"inputStage" : {
"stage" : "AND_SORTED",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1.0
},
"indexName" : "parent_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[null, null]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"assignee" : 1.0
},
"indexName" : "assignee_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"assignee" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"assignee" : [
"[null, null]"
]
}
}
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 19,
"executionTimeMillis" : 8,
"totalKeysExamined" : 533,
"totalDocsExamined" : 56,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"nReturned" : 19,
"executionTimeMillisEstimate" : 0,
"works" : 534,
"advanced" : 19,
"needTime" : 513,
"needYield" : 0,
"saveState" : 20,
"restoreState" : 20,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 56,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 56,
"executionTimeMillisEstimate" : 0,
"works" : 533,
"advanced" : 56,
"needTime" : 476,
"needYield" : 0,
"saveState" : 20,
"restoreState" : 20,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"creationDate" : 1.0,
"reportCount" : 1.0,
"label" : 1.0
},
"indexName" : "creationDate_1_reportCount_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"creationDate" : [],
"reportCount" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"creationDate" : [
"(new Date(1507831097809), new Date(1508522297966))"
],
"reportCount" : [
"(0.0, inf.0]"
],
"label" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 533,
"seeks" : 477,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"ok" : 1.0
}
The query is still taking 700-800 ms to return the data. How can I change the index to make the query run faster? Don't consider "keysExamined" : 533, "seeks" : 477, This data. This is just test data.
Looks like its using an index but only the first field in the index? Also multuKey is false?
A few key points from the explain plan output:
The query addresses the following attributes: siteId, status, creationDate, reportCount, assignee, parent
The winning plan has two stages:
IX_SCAN uses creationDate_1_reportCount_1_label_1, this uses indexed lookups on creationDate and reportCount to identify 56 documents which are then forwarded to the FETCH stage
FETCH receives 56 documents from the IX_SCAN stage and then interrogates these documents to apply the siteId, status, assignee and parent filters. This interrogation causes 37 documents to be discarded resulting in 19 document to be returned.
So, your index covers just 2 of the 6 attributes in your query and the remaining 4 attributes in your query are applied by examining the documents not the index. If you want this query to be fully index covered then create the following index:
db.collection.createIndex(
{siteId: 1, status: 1, creationDate: 1, reportCount: 1, assignee: 1, parent: 1}
)
If you re run with this index in place then you should find that (a) MongoDB chooses this index and (b) the number of documents forwarded by the IX_SCAN stage is the same as the number of documents returned by your find call.
I say "should find" because there are other aspects here which might result in MongoDB choosing a different index e.g. use of $nor and the sort stage (creationDate: 1). I would recommend tweaking the index and running with explain 'on' after each tweak and looking for these key items in the executionStats sub document:
"nReturned"
"totalKeysExamined"
"totalDocsExamined"
A simple rule of thumb is this: the closer totalKeysExamined is to nReturned and the closer totalDocsExamined is to zero ... the better your index coverage.
There is also the question of the cost of an index (in terms of impact on write times and index storage) so I'd suggest considering your non functional requirements - can your desired elapsed times be achieved without full index coverage? If not, then you should proceed with empirical testing but be prepared to tweak your choice in reponse to what the explain() output tells you.

MongoDB use index with $nin seems not to work in combination with $regex

It seems that my index in my MongoDB is not correct.
I have created 3 indexes. These:
{
_id: 1
}
{
isbn: 1
}
{
_id: 1,
isbn: 1
}
When doing a query with isbn or _id its working perfect. Even with isbn and _id. For example:
db.getCollection('books').find({
isbn: {
$regex: '^978048627.*'
},
_id: 'vGXejKQH5kw8Kfutk'
}
needs around 3ms.
But lets now say I want to search for an ISBN and need to exclude some _ids - I do this:
db.getCollection('books').find({
isbn: {
$regex: '^97804862731.*'
},
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Now its not working as it should. The query took more then 10 seconds!
When I do a isbn search without $regex but with $nin its works perfect - again around 3ms for the query. Example:
db.getCollection('books').find({
isbn: '9780486273136',
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Am I doing something wrong ? And why the index is not working correctly as it should ?
Here is the .explain() output when querying the 10 seconds query:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}
Solution
My solution - I do not know why - but is to use $and and $ne instead of $nin.
My query looks like this now:
db.getCollection('books').find({isbn:{$regex: '^97804862731.*'}, $and: [
{
_id: {
$ne: 'vGXejKQH5kw8Kfutk'
}
},
{
_id: {
$ne: 'another-id'
}
}
]})
and just takes around 3ms
Maybe someone can explain how this can happen ?
The explain() of this query
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}

mongodb find $text search query is changed

So I am running into some weird behavior in my mongo environment when using the $text operator. I am getting results back when I am expecting none. I expect the string ABCD to match, but there are no entries that contain ACBDE. a wild card index for text is defined('$**': text)
As you can see in the explain the parsed text query is modifying what I have in $search. Any ideas of what might be going on?
> db.catalogentries.find({$text: {$search: 'abcde'}}).pretty().explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.catalogentries",
"indexFilterSet" : false,
"parsedQuery" : {
"$text" : {
"$search" : "abcde",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
},
"winningPlan" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "$**_text",
"parsedTextQuery" : {
"terms" : [
"abcd"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "TEXT_OR",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "$**_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
}
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "mongo01",
"port" : 27017,
"version" : "3.2.4",
"gitVersion" : "e2ee9ffcf9f5a94fad76802e28cc978718bb7a30"
},
"ok" : 1
}
It also should be noted that if I search ACBDF, ABCDD, or ABC that I get no results as expected.
If your search term isn't an actual word, then you should add $language: 'none' to your query to disable word stemming that can lead to unexpected results like this.
So change your query to:
db.catalogentries.find({$text: {$search: 'abcde', $language: 'none'}})