Background
I have a collection with the following schema:
{ appId, createdAt, lastSeen, lastHeard, deleted, ...otherFields }
and the following indexes:
{ appId: 1, deleted: 1, lastSeen: 1 }
{ appId: 1, deleted: 1, lastHeard: 1 }
{ appId: 1, createdAt: 1, deleted: 1 }
{ appId: 1, deleted: 1, createdAt: 1, lastSeen: 1, lastHeard: 1 }
In my application I have an aggregation:
db.getCollection('client_users').aggregate([
{
$match: {
deleted: false,
appId: 'appid',
$or: [
{ createdAt: { $gt: new Date('2020-10-19T17:00:00.000Z') } },
{ lastSeen: { $gt: new Date('2020-10-19T17:00:00.000Z') } },
{ lastHeard: { $gt: new Date('2020-10-19T17:00:00.000Z') } },
]
}
},
{
$group: {
_id: '$geoLocation.city',
count: {
$sum: 1
}
}
},
{
$sort: {
count: -1
}
}
]);
My intention was to use the first three indexes from the above for this aggregation, as I understand that the $or query is parsed into 3 separate queries. However, from the explain output, the winning plan uses the forth index ({ appId: 1, deleted: 1, createdAt: 1, lastSeen: 1, lastHeard: 1 } ) for the 2 last clauses:
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"appId" : 1,
"createdAt" : 1,
"deleted" : 1
},
"indexName" : "appId_1_createdAt_1_deleted_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"appId" : [],
"createdAt" : [],
"deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"appId" : [
"[\"appid\", \"appid\"]"
],
"createdAt" : [
"(new Date(1603126800000), new Date(9223372036854775807)]"
],
"deleted" : [
"[false, false]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"appId" : 1,
"deleted" : 1,
"createdAt" : 1,
"lastSeen" : 1,
"lastHeard" : 1
},
"indexName" : "appId_1_deleted_1_createdAt_1_lastSeen_1_lastHeard_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"appId" : [],
"deleted" : [],
"createdAt" : [],
"lastSeen" : [],
"lastHeard" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"appId" : [
"[\"appid\", \"appid\"]"
],
"deleted" : [
"[false, false]"
],
"createdAt" : [
"[MinKey, MaxKey]"
],
"lastSeen" : [
"[MinKey, MaxKey]"
],
"lastHeard" : [
"(new Date(1603126800000), new Date(9223372036854775807)]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"appId" : 1,
"deleted" : 1,
"createdAt" : 1,
"lastSeen" : 1,
"lastHeard" : 1
},
"indexName" : "appId_1_deleted_1_createdAt_1_lastSeen_1_lastHeard_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"appId" : [],
"deleted" : [],
"createdAt" : [],
"lastSeen" : [],
"lastHeard" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"appId" : [
"[\"appid\", \"appid\"]"
],
"deleted" : [
"[false, false]"
],
"createdAt" : [
"[MinKey, MaxKey]"
],
"lastSeen" : [
"(new Date(1603126800000), new Date(9223372036854775807)]"
],
"lastHeard" : [
"[MinKey, MaxKey]"
]
}
}
]
}
},
Which is not what I want. What's stranger is that when I try it with only 1 of the clauses, as in this $match stage:
$match: {
deleted: false,
appId: 'appid',
lastSeen: {$gt: new Date('2020-10-19T17:00:00.000Z') },
}
It uses the correct index ({ appId: 1, deleted: 1, lastSeen: 1 }). I know this from the explain output and from timing the actual aggregation. Specifically, running it with no hint or with hint: appId_1_deleted_1_lastSeen_1 takes three times shorter than with hint: appId_1_deleted_1_createdAt_1_lastSeen_1_lastHeard_1. This makes me very confused about how mongodb chooses the index.
Can someone explain to me what could have been the reason for this behavior? Is there a way for me to force mongodb to use the indexes I want in this case? Thanks.
I figured it out. It was precisely because of the $or query. Mongodb chooses the query plan by letting them do a small race against each other. The more inefficient plan luckily won because the first $or clause took care of everything (remember it was $or so only 1 clause is enough). I fixed this by dropping the forth index.
Related
I'm running into a unique situation where one query seems to do an in-memory sort. Query 1 is the one that does the in-memory sort, while Query 2 is doing a merge sort correctly.
There are a few parts to the query, so I want to know which part is causing the query sort to be done in memory?
I do have a workaround, but I would like to know the reason behind this. They both have 2 input stages, so I'm not sure what is the cause.
Schema:
schema = {
date: Date, // date that can change
createTime: Date, // create time of document
value: Number
}
Index:
schema.index({value: 1, createTime: -1, date: 1});
Query 1: I have $or at the top level to avoid using incorrect index: MongoDB query to slow when using $or operator
db.getCollection('dates').find({
$or: [
{value: {$in: [1, 2]}, date: null},
{value: {$in: [1, 2]}, date: {$gt: ISODate("2020-06-16T23:59:59.999Z")}}
]
}).sort({createTime:-1}).explain()
Query 1 plan: As you can see it does a sort in-memory. I'm not sure exactly why this is occurring.
{
"stage" : "SUBPLAN",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "SORT",
"sortPattern" : {
"createTime" : -1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "FETCH",
"filter" : {
"date" : {
"$eq" : null
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"value" : 1,
"createTime" : -1,
"date" : 1
},
"indexName" : "value_1_createTime_-1_date_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"value" : [],
"createTime" : [],
"date" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"value" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
],
"createTime" : [
"[MaxKey, MinKey]"
],
"date" : [
"[undefined, undefined]",
"[null, null]"
]
}
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"value" : 1,
"createTime" : -1,
"date" : 1
},
"indexName" : "value_1_createTime_-1_date_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"value" : [],
"createTime" : [],
"date" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"value" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
],
"createTime" : [
"[MaxKey, MinKey]"
],
"date" : [
"(new Date(1592351999999), new Date(9223372036854775807)]"
]
}
}
]
}
}
}
}
}
Query 2:
db.getCollection('dates').find({
value: {$in: [1, 2]},
date: {$not: {$lte: ISODate("2020-06-16T23:59:59.999Z")}}
}).sort({createTime:-1}).explain()
Query 2 plan: The workaround query I used, which does a merge sort successfully.
{
"stage" : "FETCH",
"inputStage" : {
"stage" : "SORT_MERGE",
"sortPattern" : {
"createTime" : -1.0
},
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"value" : 1,
"createTime" : -1,
"date" : 1
},
"indexName" : "value_1_createTime_-1_date_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"value" : [],
"createTime" : [],
"date" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"value" : [
"[1.0, 1.0]"
],
"createTime" : [
"[MaxKey, MinKey]"
],
"date" : [
"[MinKey, true]",
"(new Date(1592351999999), MaxKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"value" : 1,
"createTime" : -1,
"date" : 1
},
"indexName" : "value_1_createTime_-1_date_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"value" : [],
"createTime" : [],
"date" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"value" : [
"[2.0, 2.0]"
],
"createTime" : [
"[MaxKey, MinKey]"
],
"date" : [
"[MinKey, true]",
"(new Date(1592351999999), MaxKey]"
]
}
}
]
}
}
Each of the branches of $or could use an index, but then you still have two result sets and if you apply sort on top the database has to sort the results in memory. Seems reasonable that having sort over an $or operator would produce an in-memory sort.
I have a collection where my documents looks like:
{
"_id" : ObjectId("591dbe4a77d4ede22d765250"),
"name" : [
{
"de" : true,
"text" : "Brunhilde"
},
{
"sk" : true,
"text" : "Šimon"
}
]
}
I have defined an index as:
> db.names.createIndex({ 'name.de': 1, 'name.text': 1 }, { name: 'name_de', partialFilterExpression: { 'name.de': { $exists: true } }, collation: { locale: 'de' } });
When I do a query like:
> db.names.find({ 'name.de': true, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
The explain plan looks like:
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"name.text" : {
"$eq" : "Rüdi"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[true, true]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
It does IXSCAN followed by FETCH stage with filter. I've already created an question about the filter here.
The more interesting is what will happen when I just change the matching part of the query to:
> db.names.find({ 'name.de': { $exists: true }, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
i.e. expression 'name.de': { $exists: true } should be still subset of partialFilterExpression. As stated in documentation:
To use the partial index, a query must contain the filter expression (or a modified filter expression that specifies a subset of the filter expression) as part of its query condition.
But the explain plan looks like this:
...
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"name.de" : {
"$exists" : true
}
},
{
"name.text" : {
"$eq" : "Rüdi"
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[MinKey, MaxKey]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
...
As you can see index is used, but the whole filtering is happening in FETCH stage.
Question is: why the filtering is done in FETCH stage and what is so different between these 2 queries that MongoDB them differently?
Additionaly, sort query with $exists as:
> db.names.find({ 'name.de': { $exists: true } }).sort({ 'name.text': 1 }).collation({ locale: "de" })
Behaves the same, whole filtering and sorting is done after IXSCAN stage:
...
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"name.text" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"name.de" : {
"$exists" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
}
...
It even produces the incorrect results, while index is not used for sorting.
I have the following query.
db.getCollection('logs').find({'uid.$id': {
'$in': [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]}, levelno: { '$gte': 10 }
}).sort({_id: 1})
This should return 1847 documents. However, when executing it, I only get 1000 documents, which is the cursor's batchSize and then the cursor closes (setting its cursorId to 0), as if all documents were returned.
If I take out the sorting, then I get all 1847 documents.
So my question is, why does it silently fail when using sorting with the $in operator?
EDIT
Using explain gives the following output
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "session.logs",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"levelno" : {
"$gte" : 10
}
},
{
"uid.$id" : {
"$in" : [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]
}
}
]
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"uid.$id" : 1,
"levelno" : 1,
"_id" : 1
},
"indexName" : "uid.$id_1_levelno_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"uid.$id" : [
"[ObjectId('580e3397812de36b86d68c04'), ObjectId('580e3397812de36b86d68c04')]",
"[ObjectId('580e339a812de36b86d68c08'), ObjectId('580e339a812de36b86d68c08')]",
"[ObjectId('580e339a812de36b86d68c09'), ObjectId('580e339a812de36b86d68c09')]",
"[ObjectId('580e33a9812de36b86d68c0a'), ObjectId('580e33a9812de36b86d68c0a')]",
"[ObjectId('580e33a9812de36b86d68c0b'), ObjectId('580e33a9812de36b86d68c0b')]",
"[ObjectId('580e33bd812de36b86d68c11'), ObjectId('580e33bd812de36b86d68c11')]",
"[ObjectId('580e33c0812de36b86d68c13'), ObjectId('580e33c0812de36b86d68c13')]"
],
"levelno" : [
"[10.0, inf.0]"
],
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"levelno" : 1,
"_id" : 1,
"uid.$id" : 1
},
"indexName" : "levelno_1__id_1_uid.$id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"levelno" : [
"[10.0, inf.0]"
],
"_id" : [
"[MinKey, MaxKey]"
],
"uid.$id" : [
"[ObjectId('580e3397812de36b86d68c04'), ObjectId('580e3397812de36b86d68c04')]",
"[ObjectId('580e339a812de36b86d68c08'), ObjectId('580e339a812de36b86d68c08')]",
"[ObjectId('580e339a812de36b86d68c09'), ObjectId('580e339a812de36b86d68c09')]",
"[ObjectId('580e33a9812de36b86d68c0a'), ObjectId('580e33a9812de36b86d68c0a')]",
"[ObjectId('580e33a9812de36b86d68c0b'), ObjectId('580e33a9812de36b86d68c0b')]",
"[ObjectId('580e33bd812de36b86d68c11'), ObjectId('580e33bd812de36b86d68c11')]",
"[ObjectId('580e33c0812de36b86d68c13'), ObjectId('580e33c0812de36b86d68c13')]"
]
}
}
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"levelno" : {
"$gte" : 10
}
},
{
"uid.$id" : {
"$in" : [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"ok" : 1
}
What's happening is that this sorted query must be performed in-memory as it's not supported by an index, and this limits the results to 32 MB. This behavior is documented here, with a JIRA about addressing this here.
Furthermore, you can't define an index to support this query as you're sorting on a field that isn't part of the query, and neither of these cases apply:
If the sort keys correspond to the index keys or an index prefix,
MongoDB can use the index to sort the query results. A prefix of a
compound index is a subset that consists of one or more keys at the
start of the index key pattern.
...
An index can support sort operations on a non-prefix subset of the
index key pattern. To do so, the query must include equality
conditions on all the prefix keys that precede the sort keys.
You should be able to work around the limitation by using the aggregation framework which can be instructed to use temporary files for its pipeline stage outputs if required via the allowDiskUse: true option:
db.getCollection('logs').aggregate([
{$match: {'uid.$id': {
'$in': [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]}, levelno: { '$gte': 10 }
}},
{$sort: {_id: 1}}
], { allowDiskUse: true })
You can use objsLeftInBatch() method to determine how many object are left in batch and iterate over it.
You can override the size and limit of the cursor batch size using cursor.batchSize(size) and cursor.limit(limit)
I have an aggregate on a collection with about 1.6M of registers. That consult is a simple example of other more complex, but illustrate the poor optimization of index used in my opinion.
db.getCollection('cbAlters').runCommand("aggregate", {pipeline: [
{
$match: { cre_carteraId: "31" }
},
{
$group: { _id: { ca_tramomora: "$cre_tramoMora" },
count: { $sum: 1 } }
}
]})
That query toke about 5 sec. The colleccion have 25 indexes configured to differents consults. The one used according to query explain is:
{
"v" : 1,
"key" : {
"cre_carteraId" : 1,
"cre_periodo" : 1,
"cre_tramoMora" : 1,
"cre_inactivo" : 1
},
"name" : "cartPerTramInact",
"ns" : "basedatos.cbAlters"
},
I created an index adjusted to this particular query:
{
"v" : 1,
"key" : {
"cre_carteraId" : 1,
"cre_tramoMora" : 1
},
"name" : "cartPerTramTest",
"ns" : "basedatos.cbAlters"
}
The query optimizer reject this index, and suggests me to use the initial index. Output of my query explain seem like this:
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"cre_carteraId" : "31"
},
"fields" : {
"cre_tramoMora" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "basedatos.cbAlters",
"indexFilterSet" : false,
"parsedQuery" : {
"cre_carteraId" : {
"$eq" : "31"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"cre_tramoMora" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"cre_carteraId" : 1,
"cre_periodo" : 1,
"cre_tramoMora" : 1,
"cre_inactivo" : 1
},
"indexName" : "cartPerTramInact",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"cre_carteraId" : [
"[\"31\", \"31\"]"
],
"cre_periodo" : [
"[MinKey, MaxKey]"
],
"cre_tramoMora" : [
"[MinKey, MaxKey]"
],
"cre_inactivo" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "PROJECTION",
"transformBy" : {
"cre_tramoMora" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"cre_carteraId" : 1,
"cre_tramoMora" : 1
},
"indexName" : "cartPerTramTest",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"cre_carteraId" : [
"[\"31\", \"31\"]"
],
"cre_tramoMora" : [
"[MinKey, MaxKey]"
]
}
}
}
]
}
}
},
{
"$group" : {
"_id" : {
"ca_tramomora" : "$cre_tramoMora"
},
"count" : {
"$sum" : {
"$const" : 1.0
}
}
}
}
],
"ok" : 1.0
}
Then, why optimizer prefers an index less adjusted? Should indexFilterSet (result filtered for index) be true for this aggregate?
How can I improve this index, or something goes wrong with the query?
I do not have much experience with mongoDB, I appreciate any help
As long as you have index cartPerTramInact, optimizer won't use your cartPerTramTest index because first fields are same and in same order.
This goes with other indexes too. When there is indexes what have same keys at same order (like a.b.c.d, a.b.d, a.b) and you query use fields a.b, it will favour that a.b.c.d. Anyway you don't need that index a.b because you already have two indexes what covers a.b (a.b.c.d and a.b.d)
Index a.b.d is used only when you do query with those fields a.b.d, BUT if a.b is already very selective, it's probably faster to do select with index a.b.c.d using only part a.b and do "full table scan" to find that d
There is a hint option for aggregations that can help with the index...
See https://www.mongodb.com/docs/upcoming/reference/method/db.collection.aggregate/#mongodb-method-db.collection.aggregate
I am executing an query using $elemMatch and it seems like it is not using the index I added for that.
Here is my document:
{
"_id" : "123466",
"something" : [
{
"someID" : ObjectId("5701b4c3c6b126083332e66f"),
"tags":
[
{
"tagKey": "ErrorCode",
"tagValue": "7001"
},
{
"tagKey": "ErrorDescription",
"tagValue": "nullPointer"
}
],
"removeOnDelivery" : true,
"entryTime" : ISODate("2016-04-04T00:26:43.167Z")
}
]
}
Here are the indexes I am using (I intended to use only first index but I added additional indexes to investigate why none of them are working).
db.test.createIndex( { "something.tags:" : 1 }, { sparse : true, background : true } )
db.test.createIndex( { "something.tags.tagKey:" : 1 }, { sparse : true, background : true } )
db.test.createIndex( { "something.tags.tagValue:" : 1 }, { sparse : true, background : true } )
db.test.createIndex( { "something.tags.tagKey:" : 1, "something.tags.tagValue:" : 1 }, { sparse : true, background : true } )
Here is my query and response:
db.test.find({"something.tags": { $elemMatch: { "tagKey" : "ErrorCode", "tagValue" : "7001" } } } ).explain()
{
"cursor": "BasicCursor",
"isMultiKey": false,
"n": 2,
"nscannedObjects": 2,
"nscanned": 2,
"nscannedObjectsAllPlans": 2,
"nscannedAllPlans": 2,
"scanAndOrder": false,
"indexOnly": false,
"nYields": 0,
"nChunkSkips": 0,
"millis": 0,
"server": "some_server",
"filterSet": false,
"stats": {
"type": "COLLSCAN",
"works": 4,
"yields": 0,
"unyields": 0,
"invalidates": 0,
"advanced": 2,
"needTime": 1,
"needFetch": 0,
"isEOF": 1,
"docsTested": 2,
"children": []
}
}
I don't know if this was a typing mistake. Your createIndex query has : at the end of index name. Just correcting that may get the results you want.
However, it is not necessary that the winning plan always choose the one using index. If COLLSCAN is cheaper, which may be the case in case of collections with less number of elements, Mongo may choose COLLSCAN.
If you want to force index usage, you may use .hint("index_name").
I tried with proper index name without : in name and it used index to query. Your results may be different depending on the collection statistics and server version as #Neil Lunn mentioned in comments .
db.test.createIndex( { "something.tags.tagKey" : 1 }, { sparse : true, background : true } )
And Explain results,
db.test.find({"something.tags": { $elemMatch: { "tagKey" : "ErrorCode"} } } ).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test_db.test",
"indexFilterSet" : false,
"parsedQuery" : {
"something.tags" : {
"$elemMatch" : {
"tagKey" : {
"$eq" : "ErrorCode"
}
}
}
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"something.tags" : {
"$elemMatch" : {
"tagKey" : {
"$eq" : "ErrorCode"
}
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"something.tags.tagKey" : 1
},
"indexName" : "something.tags.tagKey_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : true,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"something.tags.tagKey" : [
"[\"ErrorCode\", \"ErrorCode\"]"
]
}
}
},
"rejectedPlans" : [ ]
},
"ok" : 1
}