Mongo db query time more than expected - mongodb

I am running standalone mongodb server with version 3.4. I am using following query on my collection which contains around 1.8 million document out of which around 1 million document are in "ARCHIVED" status.
db.tender_listing.find({ "tender_id" : { "$gt" : "d"} , "workflow_status" : { "$in" : [ "ARCHIVED"]}}).limit(4000).sort({tender_id:1}).hint({workflow_status:1, tender_id:1}).explain('executionStats')
Each query stage has executionTimeMillisEstimate of not more than 100ms but the total executionTimeMillis is 30992.
For what operation query is taking this much extra time? Also how can I optimise same?
Following is the output
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "ofbTenders.tender_listing",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"workflow_status" : {
"$eq" : "ARCHIVED"
}
},
{
"tender_id" : {
"$gt" : "d"
}
}
]
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 4000,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"workflow_status" : 1,
"tender_id" : 1
},
"indexName" : "workflow_status_1_tender_id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"workflow_status" : [
"[\"ARCHIVED\", \"ARCHIVED\"]"
],
"tender_id" : [
"(\"d\", {})"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 4000,
"executionTimeMillis" : 30992,
"totalKeysExamined" : 4000,
"totalDocsExamined" : 4000,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 90,
"works" : 6129,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 2128,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 4000,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 80,
"works" : 6128,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 2128,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 4000,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 10,
"works" : 4000,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"workflow_status" : 1,
"tender_id" : 1
},
"indexName" : "workflow_status_1_tender_id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"workflow_status" : [
"[\"ARCHIVED\", \"ARCHIVED\"]"
],
"tender_id" : [
"(\"d\", {})"
]
},
"keysExamined" : 4000,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "ofb59-Latitude-3450",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}

I am not sure but you can try below option.
1) instead of using $in for workflow you can give directly "workflow_status" : "ARCHIVED"
2) change order of fields in find first workflow status and then tender_id.
3) run query execution plan without hint. Let MongoDB decide which index to use.

Related

MongoDB sort and limit performance issue

I'm facing with a strange issue. The explain of the query shows usage of index with a fast execution time in all stages but the last stage of LIMIT breaks it all. I have execution time of 60s and more!
The DB is used for marketing tool and we collect data on the campaign activity. We have 100k+ records in DB per each hour and I want to select all the 100k in batches of 5k (I tried to reduce it to 1k also) in order to make statistics aggregation.
Just for tests, I will add an example of the query explanation.
If I reduce the created_at date range to 10 minutes and I set the limit to 1000, it works fast.
If I set the created_at date range to 30 minutes and I set the limit to 1000, it stock again.
I have 8 CPU and 64 memory
Storage is with 6000 IOPS
The table include 900 million records in total
I have the following indexes:
created_at: 1
_id: 1
{created_at: 1, _id: 1} - not used in query
Slow query for 30 minutes
> db.logs.explain('allPlansExecution').aggregate([{"$match":{"created_at":{"$gte":ISODate('2021-06-02T20:00:00.000+00:00'),"$lte":ISODate('2021-06-02T20:30:00.000+00:00')}}},{"$sort":{"_id":1}},{"$limit":1000}], { allowDiskUse: true });
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "webpush.campaign_action_logs",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:30:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"optimizedPipeline" : true,
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 100,
"type" : "simple",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622665800000)]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:30:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 100,
"executionTimeMillis" : 183411,
"totalKeysExamined" : 257959,
"totalDocsExamined" : 257959,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 625,
"works" : 258061,
"advanced" : 100,
"needTime" : 257960,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 100,
"type" : "simple",
"totalDataSizeSorted" : 187871472,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 533,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"docsExamined" : 257959,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 265,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622665800000)]"
]
},
"keysExamined" : 257959,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
},
"allPlansExecution" : [
{
"nReturned" : 100,
"executionTimeMillisEstimate" : 625,
"totalKeysExamined" : 257959,
"totalDocsExamined" : 257959,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 625,
"works" : 258060,
"advanced" : 100,
"needTime" : 257960,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 100,
"type" : "simple",
"totalDataSizeSorted" : 187871472,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 533,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"docsExamined" : 257959,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 265,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622665800000)]"
]
},
"keysExamined" : 257959,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
},
{
"nReturned" : 70,
"executionTimeMillisEstimate" : 178092,
"totalKeysExamined" : 258060,
"totalDocsExamined" : 258060,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 70,
"executionTimeMillisEstimate" : 178092,
"works" : 258060,
"advanced" : 70,
"needTime" : 257990,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:30:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"nReturned" : 70,
"executionTimeMillisEstimate" : 178057,
"works" : 258060,
"advanced" : 70,
"needTime" : 257990,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"docsExamined" : 258060,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 258060,
"executionTimeMillisEstimate" : 645,
"works" : 258060,
"advanced" : 258060,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 258060,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
}
]
},
"serverInfo" : {
"host" : "ip-10-0-3-171",
"port" : 27017,
"version" : "4.4.6",
"gitVersion" : "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
},
"ok" : 1
}
Faster query for 10 minutes:
> db.logs.explain('allPlansExecution').aggregate([{"$match":{"created_at":{"$gte":ISODate('2021-06-02T20:00:00.000+00:00'),"$lte":ISODate('2021-06-02T20:10:00.000+00:00')}}},{"$sort":{"_id":1}},{"$limit":1000}], { allowDiskUse: true });
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "webpush.campaign_action_logs",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:10:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"optimizedPipeline" : true,
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 1000,
"type" : "simple",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622664600000)]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "LIMIT",
"limitAmount" : 1000,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:10:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1000,
"executionTimeMillis" : 1502,
"totalKeysExamined" : 58027,
"totalDocsExamined" : 58027,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1000,
"executionTimeMillisEstimate" : 122,
"works" : 59029,
"advanced" : 1000,
"needTime" : 58028,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 1,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 1000,
"type" : "simple",
"totalDataSizeSorted" : 42213931,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 96,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 1,
"docsExamined" : 58027,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 40,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622664600000)]"
]
},
"keysExamined" : 58027,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
},
"allPlansExecution" : [
{
"nReturned" : 101,
"executionTimeMillisEstimate" : 122,
"totalKeysExamined" : 58027,
"totalDocsExamined" : 58027,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 101,
"executionTimeMillisEstimate" : 122,
"works" : 58129,
"advanced" : 101,
"needTime" : 58028,
"needYield" : 0,
"saveState" : 121,
"restoreState" : 121,
"isEOF" : 0,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 1000,
"type" : "simple",
"totalDataSizeSorted" : 42213931,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 96,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 121,
"restoreState" : 121,
"isEOF" : 1,
"docsExamined" : 58027,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 40,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 121,
"restoreState" : 121,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622664600000)]"
]
},
"keysExamined" : 58027,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
},
{
"nReturned" : 3,
"executionTimeMillisEstimate" : 935,
"totalKeysExamined" : 58129,
"totalDocsExamined" : 58129,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 3,
"executionTimeMillisEstimate" : 935,
"works" : 58129,
"advanced" : 3,
"needTime" : 58126,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 0,
"limitAmount" : 1000,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:10:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"nReturned" : 3,
"executionTimeMillisEstimate" : 935,
"works" : 58129,
"advanced" : 3,
"needTime" : 58126,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 0,
"docsExamined" : 58129,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 58129,
"executionTimeMillisEstimate" : 17,
"works" : 58129,
"advanced" : 58129,
"needTime" : 0,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 0,
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 58129,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
}
]
},
"serverInfo" : {
"host" : "ip-10-0-3-171",
"port" : 27017,
"version" : "4.4.6",
"gitVersion" : "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
},
"ok" : 1
}
Questions:
If the issue is the amount of doc scanned and we used index, how can I solve it? I do not think that looping over 5000k results in a sort order in a 300k docs should cuz an issue but maybe I'm wrong.
Why the LIMIT stage (with another stage of FETCH) causing the issue?
Thank you very much for all experts in advance!
The issue is the sort. Because you are using skip/limit on a sorted set, it must load the entire set from disk, then sort it in memory (possibly spilling to disk) in order to apply the skip/limit.
This means that if you have 100k documents matching the time range, you load all 100k to get the first batch, and then load 100k again to get the second batch.
An index in mongodb can support a sort if the index includes the field that is sorted on, and any fields that precede that key in the index creation spec are matched with an equality predicate.
Fields matched with inequality or ranges can be listed after the sort key.
The existing index on { _id:1 } is inefficient because it is non-selective and therefore requires reading the entire collection from diks.
The index on { created_at:1 } requires loading only those document that match the query, but they must then be sorted in memory.
To support this query, create an index on { _id:1, created_at:1 }. This index should significantly improve the performance because it can both eliminate the in-memory sort, and only require the query executor to load from disk those documents that match the query. This also has the benefit that the query executor can terminate as soon as the limit is satisfied.

Sort of MongoDB using index

Below is the status of the index status of the collection that I want to let you see.
> db.histories.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "development.histories"
},
{
"v" : 1,
"key" : {
"hoge_id" : 1,
"created_at" : 1
},
"name" : "hoge_id_1_created_at_1",
"ns" : "development.histories",
"background" : true
},
{
"v" : 1,
"key" : {
"created_at" : 1
},
"name" : "created_at_1",
"ns" : "development.histories",
"background" : true
}
]
And, I executed the following query.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
And, the result was below.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 0,
"totalKeysExamined" : 1,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 2,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 1,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
The result is fast, I guess it's due to creating index on created_at.
ref. "totalDocsExamined" : 1, "executionTimeMillis" : 0
Then, I did exection the following query. The difference of previous is the field used for sort.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
And, the result was below.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"id" : -1
},
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 1215,
"totalKeysExamined" : 1034353,
"totalDocsExamined" : 1034353,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 1120,
"works" : 1034357,
"advanced" : 1,
"needTime" : 1034355,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"id" : -1
},
"memUsage" : 297,
"memLimit" : 33554432,
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 0,
"executionTimeMillisEstimate" : 950,
"works" : 1034355,
"advanced" : 0,
"needTime" : 1,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 650,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 1034353,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 330,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 1034353,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
>
The result is late this time.
ref. "totalDocsExamined" : 1034353, "executionTimeMillis" : 1215
About totalDocsExamined, That's all in all documents.
Regardress that id is enable for index as created_at, but, when it is sorted using id, the result is late?
For your 1st query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
MongoDB is optimizing the performance by using the compound index on hoge_id and created_at. It firstly looks at the hoge_id and then it uses the index of created_at to sort the query results. In this way, the sort operation can be very fast because of efficient usage of compound index.
However, for your 2nd query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
Since there is no compound index on hoge_id and id(you only have a single index on id), MongoDB is actually manually sorting results by id.
More info on sorting with compound index can be found here.

What index to use for a Mongo request with distinct and query?

I have a Mongo collection which hold millions of IoT device data.
The structure of the document is like this :
{ ObjectID:"...", device:"DEVICE3", topic:"TEMP", vhost:"client1", date:ISODate("2017-08-23T08:00:00.000Z"), value:23.5 }
I have a Rest API with a request that finds all the devices for one specific vhost.
The request looks like that : db.data.distinct("device", { vhost:"client1" })
I added an index on vhost and device : db.data.createIndex( { vhost:1, device:1 }) but it is still a lot of examined documents. What kind of index can I use to optimize the request ?
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 15848881,
"executionTimeMillis" : 42425,
"totalKeysExamined" : 15848881,
"totalDocsExamined" : 15848881,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 15848881,
"executionTimeMillisEstimate" : 36240,
"works" : 15848882,
"advanced" : 15848881,
"needTime" : 0,
"needYield" : 0,
"saveState" : 123949,
"restoreState" : 123949,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 15848881,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 15848881,
"executionTimeMillisEstimate" : 9890,
"works" : 15848882,
"advanced" : 15848881,
"needTime" : 0,
"needYield" : 0,
"saveState" : 123949,
"restoreState" : 123949,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"vhost" : 1,
"device" : 1
},
"indexName" : "vhost_1_device_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"vhost" : [
"[\"client1\", \"client1\"]"
],
"device" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 15848881,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
In final, there is about 30 distinct device.
EDIT :
Here is the queryPlanner as asked :
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "thingsplay.data",
"indexFilterSet" : false,
"parsedQuery" : {
"vhost" : {
"$eq" : "client1"
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"vhost" : 1,
"device" : 1
},
"indexName" : "vhost_1_device_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"vhost" : [
"[\"client1\", \"client1\"]"
],
"device" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
The result :
[
"F000105",
"F000107",
"F000109",
"F000110",
"F000113",
"F000119",
"F000121",
"F000124",
"F000128",
"F000131",
"F000134",
"F000138",
"F000144",
"F000146",
"F000147",
"F000148",
"F000149",
"F000150",
"F000153",
"F000155",
"F000156",
"F000159",
"F000161",
"F000164",
"F000166",
"F000167",
"F000168",
"F000169",
"F000170",
"F000171",
"F000172",
"F000181",
"F000183",
"F000184",
"F000187",
"F000190",
"F000192",
"F000193",
"F000203",
"F000204",
"F000205",
"F000208",
"F000209",
"F000215",
"F000221",
"F000223",
"F000243",
"F000249",
"F000250",
"F000251",
"F000253",
"F000255",
"S0E190E",
"S0E1A45",
"S0E1AC0",
"SYS_STATUS_ID",
"TS4D9292",
"TS4D9294",
"TS4D9296",
"TS4D9297",
"TS4D9298",
"TS4D9299",
"TS4D929B",
"TS4D929D",
"TS4D929F",
"TS4D92A0",
"TS4D92A2",
"TS4D92A6",
"TS4D92AA",
"TS4D92B1",
"TS4D92B2",
"TS4D92B3",
"TS4D92B4",
"TS4D92C2"
]

MongoDB Not Using Index for $in or $or Queries

I have a MongoDB collection with about 350k documents in it, and I am doing simple count queries based on one of the integer fields, usually using $in. The field is indexed with both db.myColl.createIndex({indexedField: 1}) and db.myColl.createIndex({indexedField: -1}).
When I run a query matching one value, the response comes quickly, as expected:
> db.myColl.explain("executionStats").count({indexedField: 1})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"indexedField" : {
"$eq" : 1
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "COUNT_SCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"indexedField" : 1
},
"startKeyInclusive" : true,
"endKey" : {
"indexedField" : 1
},
"endKeyInclusive" : true
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 171,
"totalKeysExamined" : 354783,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 101,
"works" : 354783,
"advanced" : 0,
"needTime" : 354782,
"needYield" : 0,
"saveState" : 2772,
"restoreState" : 2772,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 354782,
"nSkipped" : 0,
"inputStage" : {
"stage" : "COUNT_SCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 91,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2772,
"restoreState" : 2772,
"isEOF" : 1,
"invalidates" : 0,
"keysExamined" : 354783,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"indexedField" : 1
},
"startKeyInclusive" : true,
"endKey" : {
"indexedField" : 1
},
"endKeyInclusive" : true
}
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
However, when I attempt to query for more than one value for indexedField using $in, it slows to a crawl:
> db.myColl.explain("executionStats").count({indexedField: {$in: [1, 2]}})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"indexedField" : {
"$in" : [
1,
2
]
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : -1
},
"indexName" : "indexedField_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[2.0, 2.0]",
"[1.0, 1.0]"
]
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 354782,
"executionTimeMillis" : 215153,
"totalKeysExamined" : 354782,
"totalDocsExamined" : 354782,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 214871,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11371,
"restoreState" : 11371,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 354782,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 748,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11371,
"restoreState" : 11371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
},
"keysExamined" : 354782,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
Using $or instead of $in yields similar bad results:
> db.myColl.explain("executionStats").count({$or: [{indexedField: 1}, {indexedField: 2}] })
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"$or" : [
{
"indexedField" : {
"$eq" : 1
}
},
{
"indexedField" : {
"$eq" : 2
}
}
]
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "SUBPLAN",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 219269,
"totalKeysExamined" : 354782,
"totalDocsExamined" : 354782,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 219170,
"works" : 354783,
"advanced" : 0,
"needTime" : 354782,
"needYield" : 0,
"saveState" : 11384,
"restoreState" : 11384,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 354782,
"nSkipped" : 0,
"inputStage" : {
"stage" : "SUBPLAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 219090,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11384,
"restoreState" : 11384,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 219040,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11383,
"restoreState" : 11383,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 354782,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 686,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11383,
"restoreState" : 11383,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
},
"keysExamined" : 354782,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
Any idea what is going wrong here?
You should be able to reproduce this by applying the following script to a Mongo installation.
Create a file create_test_database.js:
db.indexTestColl.createIndex({indexedField: 1});
var RECORDS_TO_CREATE = 5000000;
for (var i = 0; i < RECORDS_TO_CREATE; i++)
{
// Populate indexedField with random numbers [1 - 3].
db.indexTestColl.insertOne({"indexedField": NumberInt((Math.random() * 10) % 3 + 1)});
if ((i + 1) % 10000 == 0) print("Inserted " + (i + 1) + " documents.");
}
Create and populate the collection:
mongo localhost:27017/indexTestDb create_test_database.js
Then test it with these queries:
use indexTestDb
db.indexTestColl.explain("executionStats").count({indexedField: 1})
db.indexTestColl.explain("executionStats").count({indexedField: {$in: [1, 2]}})
db.indexTestColl.explain("executionStats").count({$or: [{indexedField: 1}, {indexedField: 2}] })
Am I correct in assuming that the $in and $or queries should benefit from the index being there?

Difficulty optimizing Mongo distinct query to use indexes

I am having difficulty persuading Mongo to run a distinct query that looks like it should be covered by the indexes without fetching a large number of documents in the collection.
My documents have the general form:
{
_tenantId: 'someString',
_productCategory: 'some string from a smallish set'
...
}
I have an index on (_tenantId, _productCategory).
I want to find out what the set of distinct product categories is for a given tenant, so the query is:
db.products.distinct( '_productCategory', { _tenantId: '463171c3-d15f-4699-893d-3046327f8e1f'})
This runs rather slowly (several seconds for a collection of around half a million products against a local DB, which is Mongo 3.2.9). Against our pre-production SaaS-based Mongo (which is probably more memory constrained than my local instance which has free run of my machine) it take several 10s of seconds for the same data.
Explaining the query yields:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "engage-prod.products",
"indexFilterSet" : false,
"parsedQuery" : {
"_tenantId" : {
"$eq" : "463171c3-d15f-4699-893d-3046327f8e1f"
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_tenantId" : 1,
"_productCategory" : 1
},
"indexName" : "_tenantId_1__productCategory_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_tenantId" : [
"[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1f\"]"
],
"_productCategory" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 406871,
"executionTimeMillis" : 358,
"totalKeysExamined" : 406871,
"totalDocsExamined" : 406871,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 406871,
"executionTimeMillisEstimate" : 80,
"works" : 406872,
"advanced" : 406871,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3178,
"restoreState" : 3178,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 406871,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 406871,
"executionTimeMillisEstimate" : 40,
"works" : 406872,
"advanced" : 406871,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3178,
"restoreState" : 3178,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_tenantId" : 1,
"_productCategory" : 1
},
"indexName" : "_tenantId_1__productCategory_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_tenantId" : [
"[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1f\"]"
],
"_productCategory" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 406871,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "Stevens-MacBook-Pro.local",
"port" : 27017,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}
Note that even though it runs an IXSCAN it still returns over 400K documents (nReturned).
If I create a compound field _tenantAndProductCategory containing a lexical concatenation (with a : separator) and index that so it's a single field index, then the query:
db.products.explain('executionStats').distinct( '_productTenantAndCategory', { _productTenantAndCategory: {$gte: '463171c3-d15f-4699-893d-3046327f8e1f',$lt: '463171c3-d15f-4699-893d-3046327f8e1g'}})
works entirely within the index and yields:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "engage-prod.products",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"_productTenantAndCategory" : {
"$lt" : "463171c3-d15f-4699-893d-3046327f8e1g"
}
},
{
"_productTenantAndCategory" : {
"$gte" : "463171c3-d15f-4699-893d-3046327f8e1f"
}
}
]
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"_id" : 0,
"_productTenantAndCategory" : 1
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"keyPattern" : {
"_productTenantAndCategory" : 1
},
"indexName" : "_productTenantAndCategory_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_productTenantAndCategory" : [
"[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1g\")"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 62,
"executionTimeMillis" : 0,
"totalKeysExamined" : 63,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 62,
"executionTimeMillisEstimate" : 0,
"works" : 63,
"advanced" : 62,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"_id" : 0,
"_productTenantAndCategory" : 1
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"nReturned" : 62,
"executionTimeMillisEstimate" : 0,
"works" : 63,
"advanced" : 62,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_productTenantAndCategory" : 1
},
"indexName" : "_productTenantAndCategory_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_productTenantAndCategory" : [
"[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1g\")"
]
},
"keysExamined" : 63
}
}
},
"serverInfo" : {
"host" : "Stevens-MacBook-Pro.local",
"port" : 27017,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}
Having to build single field indexes with manually compounded keys for all the aggregation queries I need is not a very desirable path to follow. Since all the information is present in the compound index I started with, why can't Mongo execute the original distinct query with cover by that index? Is there anything I can do to overcome this in the way of query optimization?
Note This is actually a sub-problem of a slightly more complex one involving an aggregation pipeline to actually count the number of occurrences of each category, but I am restricting my question for now to the simpler distinct query since it seems to capture the essence of failure to use an index that should cover things (which I was also seeing in the aggregation pipeline case), while being a simpler overall query.