MongoDB efficient way to search for objectId field with null using Index? - mongodb

I am trying to find an efficient way to search for items in which specific field is null.
In the MongoDB, I have folder schema which has parent field of its parent folder's ObjectId and parent is indexed. For root folders, parent fields are null.
When I try to find all the root folders with parent:null, explain displays indexOnly: false
db.folders.find({parent: null}, {parent: 1, _id: 0}).explain()
{
"cursor" : "BtreeCursor parent_1",
"isMultiKey" : false,
"n" : 126,
"nscannedObjects" : 126,
"nscanned" : 126,
"nscannedObjectsAllPlans" : 126,
"nscannedAllPlans" : 126,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 1,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"parent" : [
[
null,
null
]
]
},
"server" : "c268.candidate.36:10268",
"filterSet" : false,
"stats" : {
"type" : "PROJECTION",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "KEEP_MUTATIONS",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 126,
"children" : [
{
"type" : "IXSCAN",
"works" : 127,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ parent: 1.0 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['parent']: [null, null]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 126,
"children" : []
}
]
}
]
}
]
}
}
I tried to use $type to find any ones which parent folder is not ObjectId. But still indexOnly:false.
db.folders.find({parent: {$ne: {$type: 7}}}, {parent: 1, _id: 0}).explain()
"indexOnly": false
Is there a way to search null value only using index? If not, is there a better value to store instead of null to be able to search with index?
Additional
example of root folder
{
"_id" : ObjectId("55a04a2d754971030059b7ad"),
"active" : true,
"modified" : ISODate("2016-02-05T22:30:08.053Z"),
"created" : ISODate("2015-07-10T22:41:49.009Z"),
"user" : ObjectId("54d3ae187a738c0300f59e61"),
"name" : "2nd Grade",
"parent" : null,
"clientModified" : ISODate("2016-02-05T22:30:07.872Z"),
"userCreated" : ISODate("2015-07-10T22:41:48.328Z"),
"ancestors" : [],
"__v" : 2
}
example of child folder
{
"_id" : ObjectId("56d0b4edb6f05e03009bcabc"),
"active" : true,
"modified" : ISODate("2016-02-26T20:26:21.328Z"),
"created" : ISODate("2016-02-26T20:26:21.328Z"),
"user" : ObjectId("54d3ae187a738c0300f59e61"),
"name" : "music",
"parent" : ObjectId("55a04a2d754971030059b7ad"),
"clientModified" : ISODate("2016-02-26T20:26:20.398Z"),
"userCreated" : ISODate("2016-02-26T20:26:20.398Z"),
"ancestors" : [
ObjectId("55a04a2d754971030059b7ad")
],
"__v" : 0
}
db.folders.getIndexes()
{
"0" : {
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "app29099188.folders"
},
"1" : {
"v" : 1,
"key" : {
"user" : 1,
"_fts" : "text",
"_ftsx" : 1
},
"name" : "user_1_name_text",
"ns" : "app29099188.folders",
"background" : true,
"safe" : null,
"weights" : {
"name" : 1
},
"default_language" : "english",
"language_override" : "language",
"textIndexVersion" : 2
},
"2" : {
"v" : 1,
"key" : {
"user" : 1,
"parent" : 1
},
"name" : "user_1_parent_1",
"ns" : "app29099188.folders",
"background" : true,
"safe" : null
},
"3" : {
"v" : 1,
"key" : {
"parent" : 1.0000000000000000
},
"name" : "parent_1",
"ns" : "app29099188.folders"
}
}

After comment - update:
The way to eliminate docScan is to have a value in parent field. It can be zeroed objectId or just "/" as a root.
db.satoko.insert({"test":"sdsf", parent: "/"})
db.satoko.insert({"test":"sds33f", parent: "/"})
db.satoko.insert({"parent":ObjectId("56d8b2879bd059e7247a6096"), "test":"sdsf"})
explain results:
db.satoko.find({parent:{$eq:"/"} }, {parent: 1, _id: 0}).explain("allPlansExec
ution")
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.satoko",
"indexFilterSet" : false,
"parsedQuery" : {
"parent" : {
"$eq" : "/"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"parent" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1
},
"indexName" : "parent_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[\"/\", \"/\"]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 0,
"totalKeysExamined" : 2,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 2,
"executionTimeMillisEstimate" : 0,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"parent" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 2,
"executionTimeMillisEstimate" : 0,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"parent" : 1
},
"indexName" : "parent_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[\"/\", \"/\"]"
]
},
"keysExamined" : 2,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "greg",
"port" : 27017,
"version" : "3.2.3",
"gitVersion" : "b326ba837cf6f49d65c2f85e1b70f6f31ece7937"
},
"ok" : 1
}

Related

MongoDB not using index when sorting

I'm using mongo 4.0.12 and I'm trying to tune my most executed query:
db.getCollection('ServiceInvoice').find(
{
"Provider.ParentId": "60f9d7631b1f243eb82903ee",
"Provider._id": "60f9d803fa27e34fdc4ec159",
"Environment": 1,
"Status": 2,
"IssuedOn":
{
"$gte": { DateTime: new Date("2022-02-01T00:00:00Z") },
"$lte": { DateTime: new Date("2022-02-01T23:59:59Z") }
}
}).limit(50).skip(1050).sort({ "IssueOn.DateTime": -1 })
using an index like:
{
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn" : 1.0,
"IssuedOn.DateTime" : -1.0
}
and gives me this explain:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.ServiceInvoice",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"Environment" : {
"$eq" : 1.0
}
},
{
"Provider.ParentId" : {
"$eq" : "60f9d7631b1f243eb82903ee"
}
},
{
"Provider._id" : {
"$eq" : "60f9d803fa27e34fdc4ec159"
}
},
{
"Status" : {
"$eq" : 2.0
}
},
{
"IssuedOn" : {
"$lte" : {
"DateTime" : ISODate("2022-02-01T23:59:59.000Z")
}
}
},
{
"IssuedOn" : {
"$gte" : {
"DateTime" : ISODate("2022-02-01T00:00:00.000Z")
}
}
}
]
},
"winningPlan" : {
"stage" : "SKIP",
"skipAmount" : 0,
"inputStage" : {
"stage" : "SORT",
"sortPattern" : {
"IssueOn.DateTime" : -1.0
},
"limitAmount" : 1100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn" : 1.0,
"IssuedOn.DateTime" : -1.0
},
"indexName" : "Environment_1_Provider.ParentId_1_Provider._id_1_Status_1_IssueOn_1_IssueOn.DateTime_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"Environment" : [],
"Provider.ParentId" : [],
"Provider._id" : [],
"Status" : [],
"IssuedOn" : [],
"IssuedOn.DateTime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"Environment" : [
"[1.0, 1.0]"
],
"Provider.ParentId" : [
"[\"60f9d7631b1f243eb82903ee\", \"60f9d7631b1f243eb82903ee\"]"
],
"Provider._id" : [
"[\"60f9d803fa27e34fdc4ec159\", \"60f9d803fa27e34fdc4ec159\"]"
],
"Status" : [
"[2.0, 2.0]"
],
"IssuedOn" : [
"[{ DateTime: new Date(1643673600000) }, { DateTime: new Date(1643759999000) }]"
],
"IssuedOn.DateTime" : [
"[MaxKey, MinKey]"
]
}
}
}
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 50,
"executionTimeMillis" : 99,
"totalKeysExamined" : 31622,
"totalDocsExamined" : 31622,
"executionStages" : {
"stage" : "SKIP",
"nReturned" : 50,
"executionTimeMillisEstimate" : 6,
"works" : 32725,
"advanced" : 50,
"needTime" : 32674,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"skipAmount" : 0,
"inputStage" : {
"stage" : "SORT",
"nReturned" : 1100,
"executionTimeMillisEstimate" : 6,
"works" : 32725,
"advanced" : 1100,
"needTime" : 31624,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"IssueOn.DateTime" : -1.0
},
"memUsage" : 3057213,
"memLimit" : 33554432,
"limitAmount" : 1100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 31622,
"executionTimeMillisEstimate" : 4,
"works" : 31624,
"advanced" : 31622,
"needTime" : 1,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 31622,
"executionTimeMillisEstimate" : 3,
"works" : 31623,
"advanced" : 31622,
"needTime" : 0,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 31622,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 31622,
"executionTimeMillisEstimate" : 1,
"works" : 31623,
"advanced" : 31622,
"needTime" : 0,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn" : 1.0,
"IssuedOn.DateTime" : -1.0
},
"indexName" : "Environment_1_Provider.ParentId_1_Provider._id_1_Status_1_IssueOn_1_IssueOn.DateTime_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"Environment" : [],
"Provider.ParentId" : [],
"Provider._id" : [],
"Status" : [],
"IssuedOn" : [],
"IssuedOn.DateTime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"Environment" : [
"[1.0, 1.0]"
],
"Provider.ParentId" : [
"[\"60f9d7631b1f243eb82903ee\", \"60f9d7631b1f243eb82903ee\"]"
],
"Provider._id" : [
"[\"60f9d803fa27e34fdc4ec159\", \"60f9d803fa27e34fdc4ec159\"]"
],
"Status" : [
"[2.0, 2.0]"
],
"IssuedOn" : [
"[{ DateTime: new Date(1643673600000) }, { DateTime: new Date(1643759999000) }]"
],
"IssuedOn.DateTime" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 31622,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"allPlansExecution" : []
},
"serverInfo" : {
"host" : "d4ef6b3e9c6c",
"port" : 27017,
"version" : "4.0.12",
"gitVersion" : "5776e3cbf9e7afe86e6b29e22520ffb6766e95d4"
},
"ok" : 1.0
}
However, dbKoda keeps me saying that I must create an index for sorting.
I've already tried to create a separated index for IssuedOn.DateTime, but it keeps me recommending the creation and I don't see any effects.
How can I solve this problem? (Changes to the document fields are not an option).
According to these threads - MongoDB - Index not being used when sorting and limiting on ranged query and https://emptysqua.re/blog/optimizing-mongodb-compound-indexes/
A compund Index should be created following this order:
Equality Tests: Add all equality-tested fields to the compound index, in any order;
Sort Fields (ascending / descending only matters if there are multiple sort fields): Add sort fields to the index in the same order and direction as your query's sort;
Range Filters: First, add the range filter for the field with the lowest cardinality (fewest distinct values in the collection). Then the next lowest-cardinality range filter, and so on to the highest-cardinality.
So, the solution was creating an index like this:
{
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn.DateTime" : -1.0,
"IssuedOn" : 1.0
}
And now, the query uses the index for sorting and fetch only the records in range.

Mongo Distinct Index for Dot Notated

I have a mongodb v4.2.2 hosted on Atlas. Have a collection
objects and two fields in it: metadata as array of objects, each
object has one field key AND second field named as model. Added compound index
model->metadata.key as regular one. While running db command
db.objects.explain('executionStats').distinct('metadata.key',
{model: ObjectId('5e18aff58a5aaffdc3d6f26d')}) from console it
doesn't use my model->metadata.key index and doesn't use DISTINCT_SCAN.
But if I move data to another collection lets say objectKeys that
has two fields like key, model and again add regular compound
index model->key and run command
db.objectKeys.explain('executionStats').distinct('key', {model:
ObjectId('5e18aff58a5aaffdc3d6f26d')}) it is successfully applying DISTINCT_SCAN
and use the index.
Question. How to force MongoDB to use indexes for dot notated
field during distinct operation?
UPDATE DETAILS:
1.
db.objects.getIndexes();
{
"v" : 2,
"key" : {
"model" : 1,
"metadata.key" : 1,
"metadata.value" : 1
},
"name" : "model_1_metadata.key_1_metadata.value_1",
"ns" : "my_db.objects",
"background" : true
}
db.objects.explain('executionStats').distinct('metadata.key', {model: ObjectId('5e18aff58a5aaffdc3d6f26d')})
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"keyPattern" : {
"model" : 1,
"metadata.key" : 1,
"metadata.value" : 1
},
"indexName" : "model_1_metadata.key_1_metadata.value_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"model" : [ ],
"metadata.key" : [
"metadata"
],
"metadata.value" : [
"metadata"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"model" : [
"[ObjectId('5e18aff58a5aaffdc3d6f26d'), ObjectId('5e18aff58a5aaffdc3d6f26d')]"
],
"metadata.key" : [
"[MinKey, MaxKey]"
],
"metadata.value" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 0,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
2.
db.object_keys.getIndexes();
{
"v" : 2,
"key" : {
"models" : 1,
"key" : 1
},
"name" : "models_1_key_1",
"ns" : "my_db.object_keys",
"background" : true
},
db.object_keys.explain('executionStats').distinct('key', {models: ObjectId('5e18aff58a5aaffdc3d6f26d')})
"executionStages" : {
"stage" : "PROJECTION_COVERED",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"transformBy" : {
"_id" : 0,
"key" : 1
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"keyPattern" : {
"models" : 1,
"key" : 1
},
"indexName" : "models_1_key_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"models" : [
"models"
],
"key" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"models" : [
"[ObjectId('5e18aff58a5aaffdc3d6f26d'), ObjectId('5e18aff58a5aaffdc3d6f26d')]"
],
"key" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 0
}
}
So as you can see in the first case it uses IXSCAN means index scan and in the second case it uses DISTINCT_SCAN that is considered more faster. The differences is that in first case the metadata field is array of objects and in the second case the field key is string

Sort of MongoDB using index

Below is the status of the index status of the collection that I want to let you see.
> db.histories.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "development.histories"
},
{
"v" : 1,
"key" : {
"hoge_id" : 1,
"created_at" : 1
},
"name" : "hoge_id_1_created_at_1",
"ns" : "development.histories",
"background" : true
},
{
"v" : 1,
"key" : {
"created_at" : 1
},
"name" : "created_at_1",
"ns" : "development.histories",
"background" : true
}
]
And, I executed the following query.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
And, the result was below.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 0,
"totalKeysExamined" : 1,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 2,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 1,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
The result is fast, I guess it's due to creating index on created_at.
ref. "totalDocsExamined" : 1, "executionTimeMillis" : 0
Then, I did exection the following query. The difference of previous is the field used for sort.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
And, the result was below.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"id" : -1
},
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 1215,
"totalKeysExamined" : 1034353,
"totalDocsExamined" : 1034353,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 1120,
"works" : 1034357,
"advanced" : 1,
"needTime" : 1034355,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"id" : -1
},
"memUsage" : 297,
"memLimit" : 33554432,
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 0,
"executionTimeMillisEstimate" : 950,
"works" : 1034355,
"advanced" : 0,
"needTime" : 1,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 650,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 1034353,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 330,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 1034353,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
>
The result is late this time.
ref. "totalDocsExamined" : 1034353, "executionTimeMillis" : 1215
About totalDocsExamined, That's all in all documents.
Regardress that id is enable for index as created_at, but, when it is sorted using id, the result is late?
For your 1st query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
MongoDB is optimizing the performance by using the compound index on hoge_id and created_at. It firstly looks at the hoge_id and then it uses the index of created_at to sort the query results. In this way, the sort operation can be very fast because of efficient usage of compound index.
However, for your 2nd query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
Since there is no compound index on hoge_id and id(you only have a single index on id), MongoDB is actually manually sorting results by id.
More info on sorting with compound index can be found here.

Mongo 3.2 Sub-Document Index issue

Recently we have upgraded our MongoDB 2.6(MMAPV1) to 3.2(MMAPV1), After upgrade indexes in sub-document is not working. I did a small proof of concept on both databases with query explain.
In the MongoDB 3.2, the subdocument index is not considered, can anybody suggest the fix for this?
This is sample mongo document,
{
"_id" : ObjectId("58bff13e4e6904293cc206b4"),
"Value" : NumberLong(158),
"OVGuid" : NumberLong(0),
"Name" : "User 08/03/2017 03.55.42.782",
"CreateDate" : ISODate("2017-03-08T11:55:42.783Z"),
"RoleLst" : [
{
"_id" : NumberLong(146),
"Name" : "Role1"
},
{
"_id" : NumberLong(108),
"Name" : "Role2"
},
{
"_id" : NumberLong(29),
"Name" : "Role3"
}
]
}
I inserted nearly 100,000 data with index enabled on "RoleLst._id"(db.User.createIndex({"RoleLst._id":1})) in both Mongo DB 2.6 and 3.2.
Then I tried query explain
db.User.find({ "RoleLst" : { "$elemMatch" : { "_id" :NumberLong(200)}}}).explain()
This is the result I got from 3.2 is
Explain for sub-document query
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "SubDocmentIndexChecking.User",
"indexFilterSet" : false,
"parsedQuery" : {
"RoleLst" : {
"$elemMatch" : {
"_id" : {
"$eq" : NumberLong(200)
}
}
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"RoleLst" : {
"$elemMatch" : {
"_id" : {
"$eq" : NumberLong(200)
}
}
}
},
"direction" : "forward"
},
"rejectedPlans" : []
},
"serverInfo" : {
"host" : "******",
"port" : ******,
"version" : "3.2.10",
"gitVersion" : "79d9b3ab5ce20f51c272b4411202710a082d0317"
},
"ok" : 1.0
}
This is the result I got from 2.6
Explain for sub-document query
{
"cursor" : "BtreeCursor RoleLst._id_1",
"isMultiKey" : true,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"RoleLst._id" : [
[
NumberLong(200),
NumberLong(200)
]
]
},
"server" : "***********",
"filterSet" : false,
"stats" : {
"type" : "KEEP_MUTATIONS",
"works" : 2,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 0,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 2,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 0,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 0,
"children" : [
{
"type" : "IXSCAN",
"works" : 1,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 0,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ RoleLst._id: 1.0 }",
"isMultiKey" : 1,
"boundsVerbose" : "field #0['RoleLst._id']: [200, 200]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 0,
"children" : []
}
]
}
]
}
}

Why indexOnly==false

I have a collection with index:
{
"UserId" : 1,
"ShareId" : 1,
"ParentId" : 1,
"DeletedDate" : 1
}
If I making query:
db.Files.find({ "UserId" : ObjectId("5450d837f32a1e098c844e2a"),
"ShareId" : ObjectId("5450d879f32a1e098c844e94"),
"ParentId" : ObjectId("5450d8af6a092a0b74a44026"),
"DeletedDate":null},
{_id:0, ShareId:1}).explain()
output says that "indexOnly" : false:
{
"cursor" : "BtreeCursor UserId_1_ShareId_1_ParentId_1_DeletedDate_1",
"isMultiKey" : false,
"n" : 2120,
"nscannedObjects" : 2120,
"nscanned" : 2120,
"nscannedObjectsAllPlans" : 2318,
"nscannedAllPlans" : 2320,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 21,
"nChunkSkips" : 0,
"millis" : 42,
"indexBounds" : {
"UserId" : [
[
ObjectId("5450d837f32a1e098c844e2a"),
ObjectId("5450d837f32a1e098c844e2a")
]
],
"ShareId" : [
[
ObjectId("5450d879f32a1e098c844e94"),
ObjectId("5450d879f32a1e098c844e94")
]
],
"ParentId" : [
[
ObjectId("5450d8af6a092a0b74a44026"),
ObjectId("5450d8af6a092a0b74a44026")
]
],
"DeletedDate" : [
[
null,
null
]
]
},
"server" : "mongowecntprod:27017",
"filterSet" : false,
"stats" : {
"type" : "PROJECTION",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 0,
"needFetch" : 2,
"isEOF" : 1,
"children" : [
{
"type" : "KEEP_MUTATIONS",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 2,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 2,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 2120,
"children" : [
{
"type" : "IXSCAN",
"works" : 2121,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ UserId: 1, ShareId: 1, ParentId: 1, DeletedDate: 1 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['UserId']: [ObjectId('5450d837f32a1e098c844e2a'), ObjectId('5450d837f32a1e098c844e2a')], field #1['ShareId']: [ObjectId('5450d879f32a1e098c844e94'), ObjectId('5450d879f32a1e098c844e94')], field #2['ParentId']: [ObjectId('5450d8af6a092a0b74a44026'), ObjectId('5450d8af6a092a0b74a44026')], field #3['DeletedDate']: [null, null]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 2120,
"children" : []
}
]
}
]
}
]
}
}
but if I making query without DeletedDate:
db.Files.find({ "UserId" : ObjectId("5450d837f32a1e098c844e2a"),
"ShareId" : ObjectId("5450d879f32a1e098c844e94"),
"ParentId" : ObjectId("5450d8af6a092a0b74a44026")},
{_id:0, ShareId:1}).explain()
then "indexOnly" is true.
How I can change first query to making indexOnly=true?
Let me give you a simple example that will hopefully demonstrate what you're seeing when you are querying for a field being null:
db.nullexplain.find()
{ "_id" : ObjectId("5456759f51a9d5271dc55bba"), "a" : 1 }
{ "_id" : ObjectId("545675a251a9d5271dc55bbb"), "a" : null }
{ "_id" : ObjectId("545675a551a9d5271dc55bbc") }
db.nullexplain.ensureIndex({a:1})
db,nullexplain.count({a:1}).count()
1
db.nullexplain.count({a:null}).count()
2
Do you see the issue? When "a" is present and explicitly set to null, it's indexed as null.
When "a" is not present in the document, it's also indexed as null.
When you query:
db.nullexplain.find({a:null},{_id:0,a:1})
{ "a" : null }
{ }
How can we derive from the index only whether the return document should have the field "a" set to null or if the field should not be present at all?
The answer is we cannot and therefore we must examine the document itself.
db.nullexplain.find({a:null},{_id:0,a:1}).explain()
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 2,
"nscannedObjects" : 3,
"nscanned" : 3,
"nscannedObjectsAllPlans" : 3,
"nscannedAllPlans" : 3,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 3,
"server" : "Asyas-MacBook-Pro.local:27017",
"filterSet" : false
}
Hope this helps you understand why querying for DeletedDate:null has to check the document and cannot be answered from the index.