Below is the status of the index status of the collection that I want to let you see.
> db.histories.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "development.histories"
},
{
"v" : 1,
"key" : {
"hoge_id" : 1,
"created_at" : 1
},
"name" : "hoge_id_1_created_at_1",
"ns" : "development.histories",
"background" : true
},
{
"v" : 1,
"key" : {
"created_at" : 1
},
"name" : "created_at_1",
"ns" : "development.histories",
"background" : true
}
]
And, I executed the following query.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
And, the result was below.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 0,
"totalKeysExamined" : 1,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 2,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 1,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
The result is fast, I guess it's due to creating index on created_at.
ref. "totalDocsExamined" : 1, "executionTimeMillis" : 0
Then, I did exection the following query. The difference of previous is the field used for sort.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
And, the result was below.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"id" : -1
},
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 1215,
"totalKeysExamined" : 1034353,
"totalDocsExamined" : 1034353,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 1120,
"works" : 1034357,
"advanced" : 1,
"needTime" : 1034355,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"id" : -1
},
"memUsage" : 297,
"memLimit" : 33554432,
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 0,
"executionTimeMillisEstimate" : 950,
"works" : 1034355,
"advanced" : 0,
"needTime" : 1,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 650,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 1034353,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 330,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 1034353,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
>
The result is late this time.
ref. "totalDocsExamined" : 1034353, "executionTimeMillis" : 1215
About totalDocsExamined, That's all in all documents.
Regardress that id is enable for index as created_at, but, when it is sorted using id, the result is late?
For your 1st query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
MongoDB is optimizing the performance by using the compound index on hoge_id and created_at. It firstly looks at the hoge_id and then it uses the index of created_at to sort the query results. In this way, the sort operation can be very fast because of efficient usage of compound index.
However, for your 2nd query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
Since there is no compound index on hoge_id and id(you only have a single index on id), MongoDB is actually manually sorting results by id.
More info on sorting with compound index can be found here.
Related
I have two env mongodbs,
the difference between them is:
test mongodb version: 3.2.20 , prod mongodb version : 4.0.18
and test env query plan first stage is Limit , however the other is Sort.
in my test env, it's very quick and totalDocsExamined == limit
they both hit the index:
{
"v" : 1,
"key" : {
"appIds" : 1,
"ctime" : -1,
"background" : 1
},
"name" : "appIds_1_ctime_-1_background_1",
"ns" : "newsmine.newstoapp"
}
query: db.newstoapp.find({"appIds":{"$in":[999]}}).sort({"ctime":-1}).limit(10).explain('executionStats')
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "newsmine.newstoapp",
"indexFilterSet" : false,
"parsedQuery" : {
"appIds" : {
"$in" : [
999
]
}
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 10,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"appIds" : 1,
"ctime" : -1,
"background" : 1
},
"indexName" : "appIds_1_ctime_-1_background_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"appIds" : [
"[999.0, 999.0]"
],
"ctime" : [
"[MaxKey, MinKey]"
],
"background" : [
"[MinKey, MaxKey]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 10,
"executionTimeMillis" : 0,
"totalKeysExamined" : 10,
"totalDocsExamined" : 10,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 10,
"executionTimeMillisEstimate" : 0,
"works" : 11,
"advanced" : 10,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 10,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 10,
"executionTimeMillisEstimate" : 0,
"works" : 10,
"advanced" : 10,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 10,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10,
"executionTimeMillisEstimate" : 0,
"works" : 10,
"advanced" : 10,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"appIds" : 1,
"ctime" : -1,
"background" : 1
},
"indexName" : "appIds_1_ctime_-1_background_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"appIds" : [
"[999.0, 999.0]"
],
"ctime" : [
"[MaxKey, MinKey]"
],
"background" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 10,
"dupsTested" : 10,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "",
"port" : ,
"version" : "3.2.20",
"gitVersion" : "a7a144f40b70bfe290906eb33ff2714933544af8"
},
"ok" : 1
}
in my prod env, it's getting slow query
query: datamongo:PRIMARY> db.newstoapp.find({"appIds":{"$in":[1460]}}).sort({"ctime":-1}).limit(10).explain('executionStats')
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "newsmine.newstoapp",
"indexFilterSet" : false,
"parsedQuery" : {
"appIds" : {
"$eq" : 1460
}
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"ctime" : -1
},
"limitAmount" : 10,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"appIds" : 1,
"ctime" : -1,
"background" : 1
},
"indexName" : "appIds_1_ctime_-1_background_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"appIds" : [
"[1460.0, 1460.0]"
],
"ctime" : [
"[MaxKey, MinKey]"
],
"background" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"ctime" : -1
},
"limitAmount" : 10,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"appIds" : 1
},
"indexName" : "appIds_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"appIds" : [
"[1460.0, 1460.0]"
]
}
}
}
}
},
{
"stage" : "LIMIT",
"limitAmount" : 10,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"appIds" : {
"$eq" : 1460
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"ctime" : 1
},
"indexName" : "ctime_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"ctime" : [
"[MaxKey, MinKey]"
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 10,
"executionTimeMillis" : 40,
"totalKeysExamined" : 405,
"totalDocsExamined" : 405,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 10,
"executionTimeMillisEstimate" : 3,
"works" : 418,
"advanced" : 10,
"needTime" : 407,
"needYield" : 0,
"saveState" : 9,
"restoreState" : 9,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"ctime" : -1
},
"memUsage" : 8471,
"memLimit" : 33554432,
"limitAmount" : 10,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 405,
"executionTimeMillisEstimate" : 3,
"works" : 407,
"advanced" : 405,
"needTime" : 1,
"needYield" : 0,
"saveState" : 9,
"restoreState" : 9,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 405,
"executionTimeMillisEstimate" : 3,
"works" : 406,
"advanced" : 405,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9,
"restoreState" : 9,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 405,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 405,
"executionTimeMillisEstimate" : 1,
"works" : 406,
"advanced" : 405,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9,
"restoreState" : 9,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"appIds" : 1,
"ctime" : -1,
"background" : 1
},
"indexName" : "appIds_1_ctime_-1_background_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"appIds" : [
"[1460.0, 1460.0]"
],
"ctime" : [
"[MaxKey, MinKey]"
],
"background" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 405,
"seeks" : 1,
"dupsTested" : 405,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "",
"port" : ,
"version" : "4.0.18",
"gitVersion" : "6883bdfb8b8cff32176b1fd176df04da9165fd67"
},
"ok" : 1,
"operationTime" : Timestamp(1629988625, 146),
"$clusterTime" : {
"clusterTime" : Timestamp(1629988625, 146),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
i followed Joe, changed my index, ctime before appIds.
but it does not work very well. there still be slow log for my new sql. it's hard to find out why it is slow
2021-10-19T22:38:16.918+0800 I COMMAND [conn2434281164] command newsmine.newstoapp command: find { find: "newstoapp", filter: { appIds: { $elemMatch: { $in: [ 2433 ] } }, ctime: { $gte: 0 } }, sort: { ctime: -1 }, hint: { ctime: -1, appIds: 1 }, skip: 0, limit: 50, batchSize: 50, $readPreference: { mode: "secondaryPreferred" }, $db: "newsmine" } planSummary: IXSCAN { ctime: -1, appIds: 1 } keysExamined:1471582 docsExamined:50 cursorExhausted:1 numYields:11496 nreturned:50 reslen:34043 locks:{ Global: { acquireCount: { r: 11497 } }, Database: { acquireCount: { r: 11497 } }, Collection: { acquireCount: { r: 11497 } } } storage:{ data: { bytesRead: 44958, timeReadingMicros: 618 } } protocol:op_query 7038ms
The cause of the slowness is that MongoDB 4.0.18 hash a blocking sort stage, so all matching documents must be found, retrieved, and sorted in memory before returning the requested batch.
In prior versions of MongoDB it was found that under certain conditions using a multi-key index to support a sort would provide incorrect result.
I never fully understood these conditions or why the results were incorrect, so if you are able to find those details, please edit or comment.
Prior to MongoDB 3.4 the index metadata contained a boolean value to indicate whether or not the index was multi-key (indexed a field that contained an array for at least one document).
MongoDB 3.4 introduced a new index version that also keeps track of which fields in the index are multi-key.
MongoDB 3.6 introduced a change to sorting to avoid the situations where results would be incorrect. This is why your query has a sort stage and is taking longer.
There are a couple things you could try to get back to the previous behavior without a blocking sort:
Drop and rebuild the index.
The existing index is version 1, which does not track multi-key paths. When rebuilding, the index should be created at version 2, which does track these, and may permit the query executor to use the index for sorting.
Create a new index with ctime before appIds.
A multi-key index has an entry in the index for each value in the indexed array. This may cause the query planner to assume it will disrupt sorting on a following key.
An index on {ctime:-1, appIds:1, background:1} would place the sort key ahead of the multi-key field, and while this may require reading more of the index, it may also permit the query executor to use the index for sorting.
I'm facing with a strange issue. The explain of the query shows usage of index with a fast execution time in all stages but the last stage of LIMIT breaks it all. I have execution time of 60s and more!
The DB is used for marketing tool and we collect data on the campaign activity. We have 100k+ records in DB per each hour and I want to select all the 100k in batches of 5k (I tried to reduce it to 1k also) in order to make statistics aggregation.
Just for tests, I will add an example of the query explanation.
If I reduce the created_at date range to 10 minutes and I set the limit to 1000, it works fast.
If I set the created_at date range to 30 minutes and I set the limit to 1000, it stock again.
I have 8 CPU and 64 memory
Storage is with 6000 IOPS
The table include 900 million records in total
I have the following indexes:
created_at: 1
_id: 1
{created_at: 1, _id: 1} - not used in query
Slow query for 30 minutes
> db.logs.explain('allPlansExecution').aggregate([{"$match":{"created_at":{"$gte":ISODate('2021-06-02T20:00:00.000+00:00'),"$lte":ISODate('2021-06-02T20:30:00.000+00:00')}}},{"$sort":{"_id":1}},{"$limit":1000}], { allowDiskUse: true });
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "webpush.campaign_action_logs",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:30:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"optimizedPipeline" : true,
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 100,
"type" : "simple",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622665800000)]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:30:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 100,
"executionTimeMillis" : 183411,
"totalKeysExamined" : 257959,
"totalDocsExamined" : 257959,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 625,
"works" : 258061,
"advanced" : 100,
"needTime" : 257960,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 100,
"type" : "simple",
"totalDataSizeSorted" : 187871472,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 533,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"docsExamined" : 257959,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 265,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622665800000)]"
]
},
"keysExamined" : 257959,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
},
"allPlansExecution" : [
{
"nReturned" : 100,
"executionTimeMillisEstimate" : 625,
"totalKeysExamined" : 257959,
"totalDocsExamined" : 257959,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 625,
"works" : 258060,
"advanced" : 100,
"needTime" : 257960,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 100,
"type" : "simple",
"totalDataSizeSorted" : 187871472,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 533,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"docsExamined" : 257959,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 257959,
"executionTimeMillisEstimate" : 265,
"works" : 257960,
"advanced" : 257959,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622665800000)]"
]
},
"keysExamined" : 257959,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
},
{
"nReturned" : 70,
"executionTimeMillisEstimate" : 178092,
"totalKeysExamined" : 258060,
"totalDocsExamined" : 258060,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 70,
"executionTimeMillisEstimate" : 178092,
"works" : 258060,
"advanced" : 70,
"needTime" : 257990,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:30:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"nReturned" : 70,
"executionTimeMillisEstimate" : 178057,
"works" : 258060,
"advanced" : 70,
"needTime" : 257990,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"docsExamined" : 258060,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 258060,
"executionTimeMillisEstimate" : 645,
"works" : 258060,
"advanced" : 258060,
"needTime" : 0,
"needYield" : 0,
"saveState" : 9571,
"restoreState" : 9571,
"isEOF" : 0,
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 258060,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
}
]
},
"serverInfo" : {
"host" : "ip-10-0-3-171",
"port" : 27017,
"version" : "4.4.6",
"gitVersion" : "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
},
"ok" : 1
}
Faster query for 10 minutes:
> db.logs.explain('allPlansExecution').aggregate([{"$match":{"created_at":{"$gte":ISODate('2021-06-02T20:00:00.000+00:00'),"$lte":ISODate('2021-06-02T20:10:00.000+00:00')}}},{"$sort":{"_id":1}},{"$limit":1000}], { allowDiskUse: true });
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "webpush.campaign_action_logs",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:10:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"optimizedPipeline" : true,
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 1000,
"type" : "simple",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622664600000)]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "LIMIT",
"limitAmount" : 1000,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:10:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1000,
"executionTimeMillis" : 1502,
"totalKeysExamined" : 58027,
"totalDocsExamined" : 58027,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1000,
"executionTimeMillisEstimate" : 122,
"works" : 59029,
"advanced" : 1000,
"needTime" : 58028,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 1,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 1000,
"type" : "simple",
"totalDataSizeSorted" : 42213931,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 96,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 1,
"docsExamined" : 58027,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 40,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622664600000)]"
]
},
"keysExamined" : 58027,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
},
"allPlansExecution" : [
{
"nReturned" : 101,
"executionTimeMillisEstimate" : 122,
"totalKeysExamined" : 58027,
"totalDocsExamined" : 58027,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 101,
"executionTimeMillisEstimate" : 122,
"works" : 58129,
"advanced" : 101,
"needTime" : 58028,
"needYield" : 0,
"saveState" : 121,
"restoreState" : 121,
"isEOF" : 0,
"sortPattern" : {
"_id" : 1
},
"memLimit" : 104857600,
"limitAmount" : 1000,
"type" : "simple",
"totalDataSizeSorted" : 42213931,
"usedDisk" : false,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 96,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 121,
"restoreState" : 121,
"isEOF" : 1,
"docsExamined" : 58027,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 58027,
"executionTimeMillisEstimate" : 40,
"works" : 58028,
"advanced" : 58027,
"needTime" : 0,
"needYield" : 0,
"saveState" : 121,
"restoreState" : 121,
"isEOF" : 1,
"keyPattern" : {
"created_at" : 1
},
"indexName" : "created_at_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"created_at" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"created_at" : [
"[new Date(1622664000000), new Date(1622664600000)]"
]
},
"keysExamined" : 58027,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
},
{
"nReturned" : 3,
"executionTimeMillisEstimate" : 935,
"totalKeysExamined" : 58129,
"totalDocsExamined" : 58129,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 3,
"executionTimeMillisEstimate" : 935,
"works" : 58129,
"advanced" : 3,
"needTime" : 58126,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 0,
"limitAmount" : 1000,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"created_at" : {
"$lte" : ISODate("2021-06-02T20:10:00Z")
}
},
{
"created_at" : {
"$gte" : ISODate("2021-06-02T20:00:00Z")
}
}
]
},
"nReturned" : 3,
"executionTimeMillisEstimate" : 935,
"works" : 58129,
"advanced" : 3,
"needTime" : 58126,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 0,
"docsExamined" : 58129,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 58129,
"executionTimeMillisEstimate" : 17,
"works" : 58129,
"advanced" : 58129,
"needTime" : 0,
"needYield" : 0,
"saveState" : 122,
"restoreState" : 122,
"isEOF" : 0,
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 58129,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
}
]
},
"serverInfo" : {
"host" : "ip-10-0-3-171",
"port" : 27017,
"version" : "4.4.6",
"gitVersion" : "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
},
"ok" : 1
}
Questions:
If the issue is the amount of doc scanned and we used index, how can I solve it? I do not think that looping over 5000k results in a sort order in a 300k docs should cuz an issue but maybe I'm wrong.
Why the LIMIT stage (with another stage of FETCH) causing the issue?
Thank you very much for all experts in advance!
The issue is the sort. Because you are using skip/limit on a sorted set, it must load the entire set from disk, then sort it in memory (possibly spilling to disk) in order to apply the skip/limit.
This means that if you have 100k documents matching the time range, you load all 100k to get the first batch, and then load 100k again to get the second batch.
An index in mongodb can support a sort if the index includes the field that is sorted on, and any fields that precede that key in the index creation spec are matched with an equality predicate.
Fields matched with inequality or ranges can be listed after the sort key.
The existing index on { _id:1 } is inefficient because it is non-selective and therefore requires reading the entire collection from diks.
The index on { created_at:1 } requires loading only those document that match the query, but they must then be sorted in memory.
To support this query, create an index on { _id:1, created_at:1 }. This index should significantly improve the performance because it can both eliminate the in-memory sort, and only require the query executor to load from disk those documents that match the query. This also has the benefit that the query executor can terminate as soon as the limit is satisfied.
I am running standalone mongodb server with version 3.4. I am using following query on my collection which contains around 1.8 million document out of which around 1 million document are in "ARCHIVED" status.
db.tender_listing.find({ "tender_id" : { "$gt" : "d"} , "workflow_status" : { "$in" : [ "ARCHIVED"]}}).limit(4000).sort({tender_id:1}).hint({workflow_status:1, tender_id:1}).explain('executionStats')
Each query stage has executionTimeMillisEstimate of not more than 100ms but the total executionTimeMillis is 30992.
For what operation query is taking this much extra time? Also how can I optimise same?
Following is the output
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "ofbTenders.tender_listing",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"workflow_status" : {
"$eq" : "ARCHIVED"
}
},
{
"tender_id" : {
"$gt" : "d"
}
}
]
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 4000,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"workflow_status" : 1,
"tender_id" : 1
},
"indexName" : "workflow_status_1_tender_id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"workflow_status" : [
"[\"ARCHIVED\", \"ARCHIVED\"]"
],
"tender_id" : [
"(\"d\", {})"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 4000,
"executionTimeMillis" : 30992,
"totalKeysExamined" : 4000,
"totalDocsExamined" : 4000,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 90,
"works" : 6129,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 2128,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 4000,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 80,
"works" : 6128,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 2128,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 4000,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 10,
"works" : 4000,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"workflow_status" : 1,
"tender_id" : 1
},
"indexName" : "workflow_status_1_tender_id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"workflow_status" : [
"[\"ARCHIVED\", \"ARCHIVED\"]"
],
"tender_id" : [
"(\"d\", {})"
]
},
"keysExamined" : 4000,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "ofb59-Latitude-3450",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
I am not sure but you can try below option.
1) instead of using $in for workflow you can give directly "workflow_status" : "ARCHIVED"
2) change order of fields in find first workflow status and then tender_id.
3) run query execution plan without hint. Let MongoDB decide which index to use.
I have a Mongo collection which hold millions of IoT device data.
The structure of the document is like this :
{ ObjectID:"...", device:"DEVICE3", topic:"TEMP", vhost:"client1", date:ISODate("2017-08-23T08:00:00.000Z"), value:23.5 }
I have a Rest API with a request that finds all the devices for one specific vhost.
The request looks like that : db.data.distinct("device", { vhost:"client1" })
I added an index on vhost and device : db.data.createIndex( { vhost:1, device:1 }) but it is still a lot of examined documents. What kind of index can I use to optimize the request ?
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 15848881,
"executionTimeMillis" : 42425,
"totalKeysExamined" : 15848881,
"totalDocsExamined" : 15848881,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 15848881,
"executionTimeMillisEstimate" : 36240,
"works" : 15848882,
"advanced" : 15848881,
"needTime" : 0,
"needYield" : 0,
"saveState" : 123949,
"restoreState" : 123949,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 15848881,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 15848881,
"executionTimeMillisEstimate" : 9890,
"works" : 15848882,
"advanced" : 15848881,
"needTime" : 0,
"needYield" : 0,
"saveState" : 123949,
"restoreState" : 123949,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"vhost" : 1,
"device" : 1
},
"indexName" : "vhost_1_device_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"vhost" : [
"[\"client1\", \"client1\"]"
],
"device" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 15848881,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
In final, there is about 30 distinct device.
EDIT :
Here is the queryPlanner as asked :
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "thingsplay.data",
"indexFilterSet" : false,
"parsedQuery" : {
"vhost" : {
"$eq" : "client1"
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"vhost" : 1,
"device" : 1
},
"indexName" : "vhost_1_device_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"vhost" : [
"[\"client1\", \"client1\"]"
],
"device" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
The result :
[
"F000105",
"F000107",
"F000109",
"F000110",
"F000113",
"F000119",
"F000121",
"F000124",
"F000128",
"F000131",
"F000134",
"F000138",
"F000144",
"F000146",
"F000147",
"F000148",
"F000149",
"F000150",
"F000153",
"F000155",
"F000156",
"F000159",
"F000161",
"F000164",
"F000166",
"F000167",
"F000168",
"F000169",
"F000170",
"F000171",
"F000172",
"F000181",
"F000183",
"F000184",
"F000187",
"F000190",
"F000192",
"F000193",
"F000203",
"F000204",
"F000205",
"F000208",
"F000209",
"F000215",
"F000221",
"F000223",
"F000243",
"F000249",
"F000250",
"F000251",
"F000253",
"F000255",
"S0E190E",
"S0E1A45",
"S0E1AC0",
"SYS_STATUS_ID",
"TS4D9292",
"TS4D9294",
"TS4D9296",
"TS4D9297",
"TS4D9298",
"TS4D9299",
"TS4D929B",
"TS4D929D",
"TS4D929F",
"TS4D92A0",
"TS4D92A2",
"TS4D92A6",
"TS4D92AA",
"TS4D92B1",
"TS4D92B2",
"TS4D92B3",
"TS4D92B4",
"TS4D92C2"
]
I have a MongoDB collection with about 350k documents in it, and I am doing simple count queries based on one of the integer fields, usually using $in. The field is indexed with both db.myColl.createIndex({indexedField: 1}) and db.myColl.createIndex({indexedField: -1}).
When I run a query matching one value, the response comes quickly, as expected:
> db.myColl.explain("executionStats").count({indexedField: 1})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"indexedField" : {
"$eq" : 1
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "COUNT_SCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"indexedField" : 1
},
"startKeyInclusive" : true,
"endKey" : {
"indexedField" : 1
},
"endKeyInclusive" : true
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 171,
"totalKeysExamined" : 354783,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 101,
"works" : 354783,
"advanced" : 0,
"needTime" : 354782,
"needYield" : 0,
"saveState" : 2772,
"restoreState" : 2772,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 354782,
"nSkipped" : 0,
"inputStage" : {
"stage" : "COUNT_SCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 91,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2772,
"restoreState" : 2772,
"isEOF" : 1,
"invalidates" : 0,
"keysExamined" : 354783,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"indexedField" : 1
},
"startKeyInclusive" : true,
"endKey" : {
"indexedField" : 1
},
"endKeyInclusive" : true
}
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
However, when I attempt to query for more than one value for indexedField using $in, it slows to a crawl:
> db.myColl.explain("executionStats").count({indexedField: {$in: [1, 2]}})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"indexedField" : {
"$in" : [
1,
2
]
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : -1
},
"indexName" : "indexedField_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[2.0, 2.0]",
"[1.0, 1.0]"
]
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 354782,
"executionTimeMillis" : 215153,
"totalKeysExamined" : 354782,
"totalDocsExamined" : 354782,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 214871,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11371,
"restoreState" : 11371,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 354782,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 748,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11371,
"restoreState" : 11371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
},
"keysExamined" : 354782,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
Using $or instead of $in yields similar bad results:
> db.myColl.explain("executionStats").count({$or: [{indexedField: 1}, {indexedField: 2}] })
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"$or" : [
{
"indexedField" : {
"$eq" : 1
}
},
{
"indexedField" : {
"$eq" : 2
}
}
]
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "SUBPLAN",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 219269,
"totalKeysExamined" : 354782,
"totalDocsExamined" : 354782,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 219170,
"works" : 354783,
"advanced" : 0,
"needTime" : 354782,
"needYield" : 0,
"saveState" : 11384,
"restoreState" : 11384,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 354782,
"nSkipped" : 0,
"inputStage" : {
"stage" : "SUBPLAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 219090,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11384,
"restoreState" : 11384,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 219040,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11383,
"restoreState" : 11383,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 354782,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 686,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11383,
"restoreState" : 11383,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
},
"keysExamined" : 354782,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
Any idea what is going wrong here?
You should be able to reproduce this by applying the following script to a Mongo installation.
Create a file create_test_database.js:
db.indexTestColl.createIndex({indexedField: 1});
var RECORDS_TO_CREATE = 5000000;
for (var i = 0; i < RECORDS_TO_CREATE; i++)
{
// Populate indexedField with random numbers [1 - 3].
db.indexTestColl.insertOne({"indexedField": NumberInt((Math.random() * 10) % 3 + 1)});
if ((i + 1) % 10000 == 0) print("Inserted " + (i + 1) + " documents.");
}
Create and populate the collection:
mongo localhost:27017/indexTestDb create_test_database.js
Then test it with these queries:
use indexTestDb
db.indexTestColl.explain("executionStats").count({indexedField: 1})
db.indexTestColl.explain("executionStats").count({indexedField: {$in: [1, 2]}})
db.indexTestColl.explain("executionStats").count({$or: [{indexedField: 1}, {indexedField: 2}] })
Am I correct in assuming that the $in and $or queries should benefit from the index being there?