I have several questions about the way mongoDb choose the index which will be more efficient to retrieve datas.
I have the following query :
db.getCollection('myCollection').find({
"TenantId":"1a1a1a1a",
"ContractId": 1111,
"Lists":1111,
"Email":{"$exists":true,"$nin":["",null]},
"Optouts.Media":{"$nin":["EMAIL",null]},
"Deleted":false
})
This query is used in a mapReduce which failed. When I run it with count(), I'll get the result of 290000 in 47s.
So, to understand what happen, I execute the query again with explain mode. And indeed, the index used seems to don't be the best (TenantId_1_ContractId_1_GUID_1_Deleted_1).
I re-run the same query with hint(TenantId_1_ContractId_1_Lists_1_Deleted_1) and this time, I'll get result in 2 sec.
How mongo select the index to retrieve datas?
Good to know:
For the current contractId I've 1,6 million entry.
When filter on the lists, I've got 700 000 entries
In documents, lists is an array of values
I have retrieve index planExecution to try to get more informations, but I can't figure out. See below result of explain('allPlansExecution').
{
"queryPlanner" : {
"mongosPlannerVersion" : 1,
"winningPlan" : {
"stage" : "SINGLE_SHARD",
"shards" : [
{
"shardName" : "rs1",
"connectionString" : "...",
"serverInfo" : {...},
"plannerVersion" : 1,
"namespace" : "",
"indexFilterSet" : false,
"parsedQuery" : {...},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {...},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"GUID" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_GUID_1_Deleted_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"GUID" : [],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {...},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"Lists" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_Lists_1_Deleted_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"Lists" : [
"Lists"
],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...}
}
},
{...}
]
}
]
}
},
"executionStats" : {...},
"allPlansExecution" : [
undefined,
{
"shardName" : "rs1",
"allPlans" : [
{
"nReturned" : 0,
"executionTimeMillisEstimate" : 51,
"totalKeysExamined" : 10618,
"totalDocsExamined" : 10618,
"executionStages" : {
"stage" : "FETCH",
"filter" : {...},
"nReturned" : 0,
"executionTimeMillisEstimate" : 51,
"works" : 10618,
"advanced" : 0,
"needTime" : 10618,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 10618,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10618,
"executionTimeMillisEstimate" : 20,
"works" : 10618,
"advanced" : 10618,
"needTime" : 0,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"Lists" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_Lists_1_Deleted_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"Lists" : [
"Lists"
],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...},
"keysExamined" : 10618,
"seeks" : 1,
"dupsTested" : 10618,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
{
"nReturned" : 101,
"executionTimeMillisEstimate" : 0,
"totalKeysExamined" : 10618,
"totalDocsExamined" : 10616,
"executionStages" : {
"stage" : "FETCH",
"filter" : {...},
"nReturned" : 101,
"executionTimeMillisEstimate" : 0,
"works" : 10618,
"advanced" : 101,
"needTime" : 10517,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 10616,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10616,
"executionTimeMillisEstimate" : 0,
"works" : 10618,
"advanced" : 10616,
"needTime" : 2,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"GUID" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_GUID_1_Deleted_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"GUID" : [],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...},
"keysExamined" : 10618,
"seeks" : 3,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
]
}
]
}
}
I have remove some lines {...}, if you want more details, don't hesitate to ask me.
Thanks a lot for your help!
Related
I have a mongo document as below.
{
"_id":"1234",
"empId" : "1234"
"manager":"456",
"name" :"Mike",
"mgrHierarchy":"456>678>789>901>999",
"mgrs": [
"456",
"678",
"789",
"901",
"999"
]
}
Goal is to get the hierarchy for the given empId. Using mgrHierarchy for sorting, so that the hierarchy can be retrieved easily.
mgrs - will have all the manager from 1st level to the top level.
Mgrs & mgrHierarchy will hold the samething in a different format.
Have index on empId, mgrHierarchy, mgrs, mgrs.0, mgrs.1, mgrs.2, mgrs.3, mgrs.4, .... This collection has around 20 indexes in total.
Query :-
{ "$or" : [ { "mgrs.2" : "901"} , { "_id" : { "$in" : [ "901"]}} , { "mgrs.0" : "901"} , { "mgrs.1" : "901"} , { "mgrs.4" : "901"} , { "mgrs.3" : "901"}]}).sort({ mgrHierarchy: 1 }
This query count returns 100k.
This query is returning memory error.
Exec error: OperationFailed: Sort operation used more than the maximum
33554432 bytes of RAM. Add an index, or specify a smaller limit.,
state: FAILURE.
explain("executionStats") output,
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.test",
"indexFilterSet" : false,
"parsedQuery" : {
"$or" : [
{
"_id" : {
"$eq" : "901"
}
},
{
"mgrs.0" : {
"$eq" : "901"
}
},
{
"mgrs.1" : {
"$eq" : "901"
}
},
{
"mgrs.2" : {
"$eq" : "901"
}
},
{
"mgrs.3" : {
"$eq" : "901"
}
},
{
"mgrs.4" : {
"$eq" : "901"
}
}
]
},
"winningPlan" : {
"stage" : "SUBPLAN",
"inputStage" : {
"stage" : "PROJECTION",
"transformBy" : {
},
"inputStage" : {
"stage" : "SORT",
"sortPattern" : {
"mgrHierarchy" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[\"901\", \"901\"]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"mgrs.0" : 1
},
"indexName" : "mgrs.0_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"mgrs.0" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.0" : [
"[\"901\", \"901\"]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"mgrs.1" : 1
},
"indexName" : "mgrs.1_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"mgrs.1" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.1" : [
"[\"901\", \"901\"]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"mgrs.2" : 1
},
"indexName" : "mgrs.2_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"mgrs.2" : [
"mgrs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.2" : [
"[\"901\", \"901\"]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"mgrs.3" : 1
},
"indexName" : "mgrs.3_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"mgrs.3" : [
"mgrs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.3" : [
"[\"901\", \"901\"]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"mgrs.4" : 1
},
"indexName" : "mgrs.4_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"mgrs.4" : [
"mgrs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.4" : [
"[\"901\", \"901\"]"
]
}
}
]
}
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : false,
"errorMessage" : "Exec error: OperationFailed: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit., state: FAILURE",
"errorCode" : 96,
"nReturned" : 0,
"executionTimeMillis" : 1120,
"totalKeysExamined" : 47184,
"totalDocsExamined" : 47184,
"executionStages" : {
"stage" : "SUBPLAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 1109,
"works" : 47191,
"advanced" : 0,
"needTime" : 47190,
"needYield" : 0,
"saveState" : 2067,
"restoreState" : 2067,
"isEOF" : 0,
"invalidates" : 0,
"inputStage" : {
"stage" : "PROJECTION",
"nReturned" : 0,
"executionTimeMillisEstimate" : 232,
"works" : 47191,
"advanced" : 0,
"needTime" : 47190,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 0,
"invalidates" : 0,
"transformBy" : {
},
"inputStage" : {
"stage" : "SORT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 221,
"works" : 47191,
"advanced" : 0,
"needTime" : 47190,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 0,
"invalidates" : 0,
"sortPattern" : {
"mgrHierarchy" : 1
},
"memUsage" : 33555113,
"memLimit" : 33554432,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 47184,
"executionTimeMillisEstimate" : 201,
"works" : 47190,
"advanced" : 47184,
"needTime" : 6,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 0,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 47184,
"executionTimeMillisEstimate" : 141,
"works" : 47189,
"advanced" : 47184,
"needTime" : 5,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 47184,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "OR",
"nReturned" : 47184,
"executionTimeMillisEstimate" : 91,
"works" : 47189,
"advanced" : 47184,
"needTime" : 5,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 0,
"invalidates" : 0,
"dupsTested" : 47184,
"dupsDropped" : 0,
"recordIdsForgotten" : 0,
"inputStages" : [
{
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 2,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[\"901\", \"901\"]"
]
},
"keysExamined" : 1,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 22,
"executionTimeMillisEstimate" : 0,
"works" : 23,
"advanced" : 22,
"needTime" : 0,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"mgrs.0" : 1
},
"indexName" : "mgrs.0_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"mgrs.0" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.0" : [
"[\"901\", \"901\"]"
]
},
"keysExamined" : 22,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 349,
"executionTimeMillisEstimate" : 0,
"works" : 350,
"advanced" : 349,
"needTime" : 0,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"mgrs.1" : 1
},
"indexName" : "mgrs.1_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"mgrs.1" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.1" : [
"[\"901\", \"901\"]"
]
},
"keysExamined" : 349,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 2859,
"executionTimeMillisEstimate" : 0,
"works" : 2860,
"advanced" : 2859,
"needTime" : 0,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"mgrs.2" : 1
},
"indexName" : "mgrs.2_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"mgrs.2" : [
"mgrs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.2" : [
"[\"901\", \"901\"]"
]
},
"keysExamined" : 2859,
"seeks" : 1,
"dupsTested" : 2859,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 16475,
"executionTimeMillisEstimate" : 40,
"works" : 16476,
"advanced" : 16475,
"needTime" : 0,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"mgrs.3" : 1
},
"indexName" : "mgrs.3_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"mgrs.3" : [
"mgrs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.3" : [
"[\"901\", \"901\"]"
]
},
"keysExamined" : 16475,
"seeks" : 1,
"dupsTested" : 16475,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 27478,
"executionTimeMillisEstimate" : 20,
"works" : 27478,
"advanced" : 27478,
"needTime" : 0,
"needYield" : 0,
"saveState" : 371,
"restoreState" : 371,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"mgrs.4" : 1
},
"indexName" : "mgrs.4_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"mgrs.4" : [
"mgrs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"mgrs.4" : [
"[\"901\", \"822717775\"]"
]
},
"keysExamined" : 27478,
"seeks" : 1,
"dupsTested" : 27478,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
]
}
}
}
}
}
}
},
"serverInfo" : {
"host" : "test",
"port" : 10904,
"version" : "3.4.14",
"gitVersion" : "fd954412dfc10e4d1e3e2dd4fac040f8b476b268"
},
"ok" : 1
}
I have indexes for all the fields, not sure why still getting this error. Any pointer for this.
I am running standalone mongodb server with version 3.4. I am using following query on my collection which contains around 1.8 million document out of which around 1 million document are in "ARCHIVED" status.
db.tender_listing.find({ "tender_id" : { "$gt" : "d"} , "workflow_status" : { "$in" : [ "ARCHIVED"]}}).limit(4000).sort({tender_id:1}).hint({workflow_status:1, tender_id:1}).explain('executionStats')
Each query stage has executionTimeMillisEstimate of not more than 100ms but the total executionTimeMillis is 30992.
For what operation query is taking this much extra time? Also how can I optimise same?
Following is the output
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "ofbTenders.tender_listing",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"workflow_status" : {
"$eq" : "ARCHIVED"
}
},
{
"tender_id" : {
"$gt" : "d"
}
}
]
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 4000,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"workflow_status" : 1,
"tender_id" : 1
},
"indexName" : "workflow_status_1_tender_id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"workflow_status" : [
"[\"ARCHIVED\", \"ARCHIVED\"]"
],
"tender_id" : [
"(\"d\", {})"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 4000,
"executionTimeMillis" : 30992,
"totalKeysExamined" : 4000,
"totalDocsExamined" : 4000,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 90,
"works" : 6129,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 2128,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 4000,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 80,
"works" : 6128,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 2128,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 4000,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 4000,
"executionTimeMillisEstimate" : 10,
"works" : 4000,
"advanced" : 4000,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2128,
"restoreState" : 2128,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"workflow_status" : 1,
"tender_id" : 1
},
"indexName" : "workflow_status_1_tender_id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"workflow_status" : [
"[\"ARCHIVED\", \"ARCHIVED\"]"
],
"tender_id" : [
"(\"d\", {})"
]
},
"keysExamined" : 4000,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "ofb59-Latitude-3450",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
I am not sure but you can try below option.
1) instead of using $in for workflow you can give directly "workflow_status" : "ARCHIVED"
2) change order of fields in find first workflow status and then tender_id.
3) run query execution plan without hint. Let MongoDB decide which index to use.
Below is the status of the index status of the collection that I want to let you see.
> db.histories.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "development.histories"
},
{
"v" : 1,
"key" : {
"hoge_id" : 1,
"created_at" : 1
},
"name" : "hoge_id_1_created_at_1",
"ns" : "development.histories",
"background" : true
},
{
"v" : 1,
"key" : {
"created_at" : 1
},
"name" : "created_at_1",
"ns" : "development.histories",
"background" : true
}
]
And, I executed the following query.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
And, the result was below.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 0,
"totalKeysExamined" : 1,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 2,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 1,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
The result is fast, I guess it's due to creating index on created_at.
ref. "totalDocsExamined" : 1, "executionTimeMillis" : 0
Then, I did exection the following query. The difference of previous is the field used for sort.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
And, the result was below.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"id" : -1
},
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 1215,
"totalKeysExamined" : 1034353,
"totalDocsExamined" : 1034353,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 1120,
"works" : 1034357,
"advanced" : 1,
"needTime" : 1034355,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"id" : -1
},
"memUsage" : 297,
"memLimit" : 33554432,
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 0,
"executionTimeMillisEstimate" : 950,
"works" : 1034355,
"advanced" : 0,
"needTime" : 1,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 650,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 1034353,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 330,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 1034353,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
>
The result is late this time.
ref. "totalDocsExamined" : 1034353, "executionTimeMillis" : 1215
About totalDocsExamined, That's all in all documents.
Regardress that id is enable for index as created_at, but, when it is sorted using id, the result is late?
For your 1st query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
MongoDB is optimizing the performance by using the compound index on hoge_id and created_at. It firstly looks at the hoge_id and then it uses the index of created_at to sort the query results. In this way, the sort operation can be very fast because of efficient usage of compound index.
However, for your 2nd query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
Since there is no compound index on hoge_id and id(you only have a single index on id), MongoDB is actually manually sorting results by id.
More info on sorting with compound index can be found here.
I have a Mongo collection which hold millions of IoT device data.
The structure of the document is like this :
{ ObjectID:"...", device:"DEVICE3", topic:"TEMP", vhost:"client1", date:ISODate("2017-08-23T08:00:00.000Z"), value:23.5 }
I have a Rest API with a request that finds all the devices for one specific vhost.
The request looks like that : db.data.distinct("device", { vhost:"client1" })
I added an index on vhost and device : db.data.createIndex( { vhost:1, device:1 }) but it is still a lot of examined documents. What kind of index can I use to optimize the request ?
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 15848881,
"executionTimeMillis" : 42425,
"totalKeysExamined" : 15848881,
"totalDocsExamined" : 15848881,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 15848881,
"executionTimeMillisEstimate" : 36240,
"works" : 15848882,
"advanced" : 15848881,
"needTime" : 0,
"needYield" : 0,
"saveState" : 123949,
"restoreState" : 123949,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 15848881,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 15848881,
"executionTimeMillisEstimate" : 9890,
"works" : 15848882,
"advanced" : 15848881,
"needTime" : 0,
"needYield" : 0,
"saveState" : 123949,
"restoreState" : 123949,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"vhost" : 1,
"device" : 1
},
"indexName" : "vhost_1_device_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"vhost" : [
"[\"client1\", \"client1\"]"
],
"device" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 15848881,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
In final, there is about 30 distinct device.
EDIT :
Here is the queryPlanner as asked :
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "thingsplay.data",
"indexFilterSet" : false,
"parsedQuery" : {
"vhost" : {
"$eq" : "client1"
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"vhost" : 1,
"device" : 1
},
"indexName" : "vhost_1_device_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"vhost" : [
"[\"client1\", \"client1\"]"
],
"device" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
The result :
[
"F000105",
"F000107",
"F000109",
"F000110",
"F000113",
"F000119",
"F000121",
"F000124",
"F000128",
"F000131",
"F000134",
"F000138",
"F000144",
"F000146",
"F000147",
"F000148",
"F000149",
"F000150",
"F000153",
"F000155",
"F000156",
"F000159",
"F000161",
"F000164",
"F000166",
"F000167",
"F000168",
"F000169",
"F000170",
"F000171",
"F000172",
"F000181",
"F000183",
"F000184",
"F000187",
"F000190",
"F000192",
"F000193",
"F000203",
"F000204",
"F000205",
"F000208",
"F000209",
"F000215",
"F000221",
"F000223",
"F000243",
"F000249",
"F000250",
"F000251",
"F000253",
"F000255",
"S0E190E",
"S0E1A45",
"S0E1AC0",
"SYS_STATUS_ID",
"TS4D9292",
"TS4D9294",
"TS4D9296",
"TS4D9297",
"TS4D9298",
"TS4D9299",
"TS4D929B",
"TS4D929D",
"TS4D929F",
"TS4D92A0",
"TS4D92A2",
"TS4D92A6",
"TS4D92AA",
"TS4D92B1",
"TS4D92B2",
"TS4D92B3",
"TS4D92B4",
"TS4D92C2"
]
I have a MongoDB collection with about 350k documents in it, and I am doing simple count queries based on one of the integer fields, usually using $in. The field is indexed with both db.myColl.createIndex({indexedField: 1}) and db.myColl.createIndex({indexedField: -1}).
When I run a query matching one value, the response comes quickly, as expected:
> db.myColl.explain("executionStats").count({indexedField: 1})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"indexedField" : {
"$eq" : 1
}
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "COUNT_SCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"indexedField" : 1
},
"startKeyInclusive" : true,
"endKey" : {
"indexedField" : 1
},
"endKeyInclusive" : true
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 171,
"totalKeysExamined" : 354783,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 101,
"works" : 354783,
"advanced" : 0,
"needTime" : 354782,
"needYield" : 0,
"saveState" : 2772,
"restoreState" : 2772,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 354782,
"nSkipped" : 0,
"inputStage" : {
"stage" : "COUNT_SCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 91,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2772,
"restoreState" : 2772,
"isEOF" : 1,
"invalidates" : 0,
"keysExamined" : 354783,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"indexedField" : 1
},
"startKeyInclusive" : true,
"endKey" : {
"indexedField" : 1
},
"endKeyInclusive" : true
}
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
However, when I attempt to query for more than one value for indexedField using $in, it slows to a crawl:
> db.myColl.explain("executionStats").count({indexedField: {$in: [1, 2]}})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"indexedField" : {
"$in" : [
1,
2
]
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : -1
},
"indexName" : "indexedField_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[2.0, 2.0]",
"[1.0, 1.0]"
]
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 354782,
"executionTimeMillis" : 215153,
"totalKeysExamined" : 354782,
"totalDocsExamined" : 354782,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 214871,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11371,
"restoreState" : 11371,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 354782,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 748,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11371,
"restoreState" : 11371,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
},
"keysExamined" : 354782,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
Using $or instead of $in yields similar bad results:
> db.myColl.explain("executionStats").count({$or: [{indexedField: 1}, {indexedField: 2}] })
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDb.myColl",
"indexFilterSet" : false,
"parsedQuery" : {
"$or" : [
{
"indexedField" : {
"$eq" : 1
}
},
{
"indexedField" : {
"$eq" : 2
}
}
]
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "SUBPLAN",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 219269,
"totalKeysExamined" : 354782,
"totalDocsExamined" : 354782,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 219170,
"works" : 354783,
"advanced" : 0,
"needTime" : 354782,
"needYield" : 0,
"saveState" : 11384,
"restoreState" : 11384,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 354782,
"nSkipped" : 0,
"inputStage" : {
"stage" : "SUBPLAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 219090,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11384,
"restoreState" : 11384,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 219040,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11383,
"restoreState" : 11383,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 354782,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 354782,
"executionTimeMillisEstimate" : 686,
"works" : 354783,
"advanced" : 354782,
"needTime" : 0,
"needYield" : 0,
"saveState" : 11383,
"restoreState" : 11383,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"indexedField" : 1
},
"indexName" : "indexedField_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"indexedField" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"indexedField" : [
"[1.0, 1.0]",
"[2.0, 2.0]"
]
},
"keysExamined" : 354782,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "...",
"port" : 27017,
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1
}
Any idea what is going wrong here?
You should be able to reproduce this by applying the following script to a Mongo installation.
Create a file create_test_database.js:
db.indexTestColl.createIndex({indexedField: 1});
var RECORDS_TO_CREATE = 5000000;
for (var i = 0; i < RECORDS_TO_CREATE; i++)
{
// Populate indexedField with random numbers [1 - 3].
db.indexTestColl.insertOne({"indexedField": NumberInt((Math.random() * 10) % 3 + 1)});
if ((i + 1) % 10000 == 0) print("Inserted " + (i + 1) + " documents.");
}
Create and populate the collection:
mongo localhost:27017/indexTestDb create_test_database.js
Then test it with these queries:
use indexTestDb
db.indexTestColl.explain("executionStats").count({indexedField: 1})
db.indexTestColl.explain("executionStats").count({indexedField: {$in: [1, 2]}})
db.indexTestColl.explain("executionStats").count({$or: [{indexedField: 1}, {indexedField: 2}] })
Am I correct in assuming that the $in and $or queries should benefit from the index being there?