Mongo 3.2 Sub-Document Index issue - mongodb

Recently we have upgraded our MongoDB 2.6(MMAPV1) to 3.2(MMAPV1), After upgrade indexes in sub-document is not working. I did a small proof of concept on both databases with query explain.
In the MongoDB 3.2, the subdocument index is not considered, can anybody suggest the fix for this?
This is sample mongo document,
{
"_id" : ObjectId("58bff13e4e6904293cc206b4"),
"Value" : NumberLong(158),
"OVGuid" : NumberLong(0),
"Name" : "User 08/03/2017 03.55.42.782",
"CreateDate" : ISODate("2017-03-08T11:55:42.783Z"),
"RoleLst" : [
{
"_id" : NumberLong(146),
"Name" : "Role1"
},
{
"_id" : NumberLong(108),
"Name" : "Role2"
},
{
"_id" : NumberLong(29),
"Name" : "Role3"
}
]
}
I inserted nearly 100,000 data with index enabled on "RoleLst._id"(db.User.createIndex({"RoleLst._id":1})) in both Mongo DB 2.6 and 3.2.
Then I tried query explain
db.User.find({ "RoleLst" : { "$elemMatch" : { "_id" :NumberLong(200)}}}).explain()
This is the result I got from 3.2 is
Explain for sub-document query
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "SubDocmentIndexChecking.User",
"indexFilterSet" : false,
"parsedQuery" : {
"RoleLst" : {
"$elemMatch" : {
"_id" : {
"$eq" : NumberLong(200)
}
}
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"RoleLst" : {
"$elemMatch" : {
"_id" : {
"$eq" : NumberLong(200)
}
}
}
},
"direction" : "forward"
},
"rejectedPlans" : []
},
"serverInfo" : {
"host" : "******",
"port" : ******,
"version" : "3.2.10",
"gitVersion" : "79d9b3ab5ce20f51c272b4411202710a082d0317"
},
"ok" : 1.0
}
This is the result I got from 2.6
Explain for sub-document query
{
"cursor" : "BtreeCursor RoleLst._id_1",
"isMultiKey" : true,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 0,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 0,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"RoleLst._id" : [
[
NumberLong(200),
NumberLong(200)
]
]
},
"server" : "***********",
"filterSet" : false,
"stats" : {
"type" : "KEEP_MUTATIONS",
"works" : 2,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 0,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 2,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 0,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 0,
"children" : [
{
"type" : "IXSCAN",
"works" : 1,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 0,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ RoleLst._id: 1.0 }",
"isMultiKey" : 1,
"boundsVerbose" : "field #0['RoleLst._id']: [200, 200]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 0,
"children" : []
}
]
}
]
}
}

Related

MongoDB not using index when sorting

I'm using mongo 4.0.12 and I'm trying to tune my most executed query:
db.getCollection('ServiceInvoice').find(
{
"Provider.ParentId": "60f9d7631b1f243eb82903ee",
"Provider._id": "60f9d803fa27e34fdc4ec159",
"Environment": 1,
"Status": 2,
"IssuedOn":
{
"$gte": { DateTime: new Date("2022-02-01T00:00:00Z") },
"$lte": { DateTime: new Date("2022-02-01T23:59:59Z") }
}
}).limit(50).skip(1050).sort({ "IssueOn.DateTime": -1 })
using an index like:
{
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn" : 1.0,
"IssuedOn.DateTime" : -1.0
}
and gives me this explain:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.ServiceInvoice",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"Environment" : {
"$eq" : 1.0
}
},
{
"Provider.ParentId" : {
"$eq" : "60f9d7631b1f243eb82903ee"
}
},
{
"Provider._id" : {
"$eq" : "60f9d803fa27e34fdc4ec159"
}
},
{
"Status" : {
"$eq" : 2.0
}
},
{
"IssuedOn" : {
"$lte" : {
"DateTime" : ISODate("2022-02-01T23:59:59.000Z")
}
}
},
{
"IssuedOn" : {
"$gte" : {
"DateTime" : ISODate("2022-02-01T00:00:00.000Z")
}
}
}
]
},
"winningPlan" : {
"stage" : "SKIP",
"skipAmount" : 0,
"inputStage" : {
"stage" : "SORT",
"sortPattern" : {
"IssueOn.DateTime" : -1.0
},
"limitAmount" : 1100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn" : 1.0,
"IssuedOn.DateTime" : -1.0
},
"indexName" : "Environment_1_Provider.ParentId_1_Provider._id_1_Status_1_IssueOn_1_IssueOn.DateTime_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"Environment" : [],
"Provider.ParentId" : [],
"Provider._id" : [],
"Status" : [],
"IssuedOn" : [],
"IssuedOn.DateTime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"Environment" : [
"[1.0, 1.0]"
],
"Provider.ParentId" : [
"[\"60f9d7631b1f243eb82903ee\", \"60f9d7631b1f243eb82903ee\"]"
],
"Provider._id" : [
"[\"60f9d803fa27e34fdc4ec159\", \"60f9d803fa27e34fdc4ec159\"]"
],
"Status" : [
"[2.0, 2.0]"
],
"IssuedOn" : [
"[{ DateTime: new Date(1643673600000) }, { DateTime: new Date(1643759999000) }]"
],
"IssuedOn.DateTime" : [
"[MaxKey, MinKey]"
]
}
}
}
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 50,
"executionTimeMillis" : 99,
"totalKeysExamined" : 31622,
"totalDocsExamined" : 31622,
"executionStages" : {
"stage" : "SKIP",
"nReturned" : 50,
"executionTimeMillisEstimate" : 6,
"works" : 32725,
"advanced" : 50,
"needTime" : 32674,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"skipAmount" : 0,
"inputStage" : {
"stage" : "SORT",
"nReturned" : 1100,
"executionTimeMillisEstimate" : 6,
"works" : 32725,
"advanced" : 1100,
"needTime" : 31624,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"IssueOn.DateTime" : -1.0
},
"memUsage" : 3057213,
"memLimit" : 33554432,
"limitAmount" : 1100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 31622,
"executionTimeMillisEstimate" : 4,
"works" : 31624,
"advanced" : 31622,
"needTime" : 1,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 31622,
"executionTimeMillisEstimate" : 3,
"works" : 31623,
"advanced" : 31622,
"needTime" : 0,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 31622,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 31622,
"executionTimeMillisEstimate" : 1,
"works" : 31623,
"advanced" : 31622,
"needTime" : 0,
"needYield" : 0,
"saveState" : 255,
"restoreState" : 255,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn" : 1.0,
"IssuedOn.DateTime" : -1.0
},
"indexName" : "Environment_1_Provider.ParentId_1_Provider._id_1_Status_1_IssueOn_1_IssueOn.DateTime_-1",
"isMultiKey" : false,
"multiKeyPaths" : {
"Environment" : [],
"Provider.ParentId" : [],
"Provider._id" : [],
"Status" : [],
"IssuedOn" : [],
"IssuedOn.DateTime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"Environment" : [
"[1.0, 1.0]"
],
"Provider.ParentId" : [
"[\"60f9d7631b1f243eb82903ee\", \"60f9d7631b1f243eb82903ee\"]"
],
"Provider._id" : [
"[\"60f9d803fa27e34fdc4ec159\", \"60f9d803fa27e34fdc4ec159\"]"
],
"Status" : [
"[2.0, 2.0]"
],
"IssuedOn" : [
"[{ DateTime: new Date(1643673600000) }, { DateTime: new Date(1643759999000) }]"
],
"IssuedOn.DateTime" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 31622,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"allPlansExecution" : []
},
"serverInfo" : {
"host" : "d4ef6b3e9c6c",
"port" : 27017,
"version" : "4.0.12",
"gitVersion" : "5776e3cbf9e7afe86e6b29e22520ffb6766e95d4"
},
"ok" : 1.0
}
However, dbKoda keeps me saying that I must create an index for sorting.
I've already tried to create a separated index for IssuedOn.DateTime, but it keeps me recommending the creation and I don't see any effects.
How can I solve this problem? (Changes to the document fields are not an option).
According to these threads - MongoDB - Index not being used when sorting and limiting on ranged query and https://emptysqua.re/blog/optimizing-mongodb-compound-indexes/
A compund Index should be created following this order:
Equality Tests: Add all equality-tested fields to the compound index, in any order;
Sort Fields (ascending / descending only matters if there are multiple sort fields): Add sort fields to the index in the same order and direction as your query's sort;
Range Filters: First, add the range filter for the field with the lowest cardinality (fewest distinct values in the collection). Then the next lowest-cardinality range filter, and so on to the highest-cardinality.
So, the solution was creating an index like this:
{
"Environment" : 1.0,
"Provider.ParentId" : 1.0,
"Provider._id" : 1.0,
"Status" : 1.0,
"IssuedOn.DateTime" : -1.0,
"IssuedOn" : 1.0
}
And now, the query uses the index for sorting and fetch only the records in range.

Sort of MongoDB using index

Below is the status of the index status of the collection that I want to let you see.
> db.histories.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "development.histories"
},
{
"v" : 1,
"key" : {
"hoge_id" : 1,
"created_at" : 1
},
"name" : "hoge_id_1_created_at_1",
"ns" : "development.histories",
"background" : true
},
{
"v" : 1,
"key" : {
"created_at" : 1
},
"name" : "created_at_1",
"ns" : "development.histories",
"background" : true
}
]
And, I executed the following query.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
And, the result was below.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 0,
"totalKeysExamined" : 1,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 2,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 1,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 1,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
The result is fast, I guess it's due to creating index on created_at.
ref. "totalDocsExamined" : 1, "executionTimeMillis" : 0
Then, I did exection the following query. The difference of previous is the field used for sort.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
And, the result was below.
> db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "development.histories",
"indexFilterSet" : false,
"parsedQuery" : {
"hoge_id" : {
"$eq" : ObjectId("5a5c171010ebfb1a2c901008")
}
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"id" : -1
},
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 1215,
"totalKeysExamined" : 1034353,
"totalDocsExamined" : 1034353,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 1120,
"works" : 1034357,
"advanced" : 1,
"needTime" : 1034355,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"id" : -1
},
"memUsage" : 297,
"memLimit" : 33554432,
"limitAmount" : 1,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 0,
"executionTimeMillisEstimate" : 950,
"works" : 1034355,
"advanced" : 0,
"needTime" : 1,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 650,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 1034353,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1034353,
"executionTimeMillisEstimate" : 330,
"works" : 1034354,
"advanced" : 1034353,
"needTime" : 0,
"needYield" : 0,
"saveState" : 8080,
"restoreState" : 8080,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"hoge_id" : 1,
"created_at" : 1
},
"indexName" : "hoge_id_1_created_at_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"hoge_id" : [
"[ObjectId('5a5c171010ebfb1a2c901008'), ObjectId('5a5c171010ebfb1a2c901008')]"
],
"created_at" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 1034353,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "b9cb1b8d1fc1",
"port" : 27017,
"version" : "3.2.18",
"gitVersion" : "4c1bae566c0c00f996a2feb16febf84936ecaf6f"
},
"ok" : 1
}
>
The result is late this time.
ref. "totalDocsExamined" : 1034353, "executionTimeMillis" : 1215
About totalDocsExamined, That's all in all documents.
Regardress that id is enable for index as created_at, but, when it is sorted using id, the result is late?
For your 1st query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { created_at: -1 } ).limit(1).explain("executionStats");
MongoDB is optimizing the performance by using the compound index on hoge_id and created_at. It firstly looks at the hoge_id and then it uses the index of created_at to sort the query results. In this way, the sort operation can be very fast because of efficient usage of compound index.
However, for your 2nd query:
db.histories.find({hoge_id: ObjectId("5a5c171010ebfb1a2c901008")}).sort( { id: -1 } ).limit(1).explain("executionStats");
Since there is no compound index on hoge_id and id(you only have a single index on id), MongoDB is actually manually sorting results by id.
More info on sorting with compound index can be found here.

MongoDB multikey index performance

Background
I have a collection of users with structure of documents like this:
{
"_id" : ObjectId("54e61137cca5d2ff0a8b4567"),
"login" : "test1",
"emails" : [
{
"email" : "test1#example.com",
"is_primary" : true,
"_id" : ObjectId("57baf3e97323afb2688e639c")
},
{
"email" : "test1_1#example.com",
"is_primary" : false,
"_id" : ObjectId("57baf3e97323afb2688e639d")
}
]
}
Indexes:
{
"v" : 1,
"key" : {
"login" : 1
},
"name" : "login_1",
"ns" : "mydb.users",
"background" : true
},
{
"v" : 1,
"key" : {
"emails.email" : 1
},
"name" : "emails.email_1",
"ns" : "mydb.users"
}
Count of documents is ~700000
Scenario
To explain the search of users by login, I make this:
rs0:PRIMARY> db.users.explain('executionStats').find({'login' : /test123123123/})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "mydb.users",
"indexFilterSet" : false,
"parsedQuery" : {
"login" : /test123123123/
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"login" : /test123123123/
},
"keyPattern" : {
"login" : 1
},
"indexName" : "login_1",
"isMultiKey" : false,
"direction" : "forward",
"indexBounds" : {
"login" : [
"[\"\", {})",
"[/test123123123/, /test123123123/]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 1040,
"totalKeysExamined" : 698993,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 0,
"executionTimeMillisEstimate" : 930,
"works" : 698994,
"advanced" : 0,
"needTime" : 698993,
"needFetch" : 0,
"saveState" : 5460,
"restoreState" : 5460,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"login" : /test123123123/
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 920,
"works" : 698993,
"advanced" : 0,
"needTime" : 698993,
"needFetch" : 0,
"saveState" : 5460,
"restoreState" : 5460,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"login" : 1
},
"indexName" : "login_1",
"isMultiKey" : false,
"direction" : "forward",
"indexBounds" : {
"login" : [
"[\"\", {})",
"[/test123123123/, /test123123123/]"
]
},
"keysExamined" : 698993,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0
}
}
},
"serverInfo" : {
"host" : "myhost",
"port" : 27017,
"version" : "3.0.12",
"gitVersion" : "33934938e0e95d534cebbaff656cde916b9c3573"
},
"ok" : 1
}
As you can see executionStats.executionStages.inputStage.nReturned is 0 and executionStats.totalDocsExamined is so 0. It's ok, I guess there is no documents with login like entered. But if I want search users by email I'll do next:
rs0:PRIMARY> db.users.explain('executionStats').find({'emails.email' : /test123123123/})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "mydb.users",
"indexFilterSet" : false,
"parsedQuery" : {
"emails.email" : /test123123123/
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"emails.email" : /test123123123/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"emails.email" : 1
},
"indexName" : "emails.email_1",
"isMultiKey" : true,
"direction" : "forward",
"indexBounds" : {
"emails.email" : [
"[\"\", {})",
"[/test123123123/, /test123123123/]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 7666,
"totalKeysExamined" : 699016,
"totalDocsExamined" : 698993,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"emails.email" : /test123123123/
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 7355,
"works" : 699017,
"advanced" : 0,
"needTime" : 699016,
"needFetch" : 0,
"saveState" : 5462,
"restoreState" : 5462,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 698993,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 698993,
"executionTimeMillisEstimate" : 1630,
"works" : 699016,
"advanced" : 698993,
"needTime" : 23,
"needFetch" : 0,
"saveState" : 5462,
"restoreState" : 5462,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"emails.email" : 1
},
"indexName" : "emails.email_1",
"isMultiKey" : true,
"direction" : "forward",
"indexBounds" : {
"emails.email" : [
"[\"\", {})",
"[/test123123123/, /test123123123/]"
]
},
"keysExamined" : 699016,
"dupsTested" : 699016,
"dupsDropped" : 23,
"seenInvalidated" : 0,
"matchTested" : 0
}
}
},
"serverInfo" : {
"host" : "myhost",
"port" : 27017,
"version" : "3.0.12",
"gitVersion" : "33934938e0e95d534cebbaff656cde916b9c3573"
},
"ok" : 1
}
And here executionStats.executionStages.inputStage.nReturned (and executionStats.totalDocsExamined) is equal 698993 (executionStats.nReturned is 0 like in first query)
Question
Why when I use search with multikey index (users.user) on the ixscan stage returns all my collection and fetch stage occurs all collection. But If I use search by non-multikey index (login) ixscan stage scans expected values and on the fetch stage I give what I want.
UPD: when I use regular expression not like /smth/, but /^smth/ then scan by emails.email field returns also 0 elements. Why multikey and ordinary index give me different results for regular expression like /smth/ ?
Because it is multikey index.
explained here
When a query filter specifies an exact match for an array as a whole, MongoDB can use the multikey index to look up the first element of the query array but cannot use the multikey index scan to find the whole array. Instead, after using the multikey index to look up the first element of the query array, MongoDB retrieves the associated documents and filters for documents whose array matches the array in the query.

MongoDB efficient way to search for objectId field with null using Index?

I am trying to find an efficient way to search for items in which specific field is null.
In the MongoDB, I have folder schema which has parent field of its parent folder's ObjectId and parent is indexed. For root folders, parent fields are null.
When I try to find all the root folders with parent:null, explain displays indexOnly: false
db.folders.find({parent: null}, {parent: 1, _id: 0}).explain()
{
"cursor" : "BtreeCursor parent_1",
"isMultiKey" : false,
"n" : 126,
"nscannedObjects" : 126,
"nscanned" : 126,
"nscannedObjectsAllPlans" : 126,
"nscannedAllPlans" : 126,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 1,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"parent" : [
[
null,
null
]
]
},
"server" : "c268.candidate.36:10268",
"filterSet" : false,
"stats" : {
"type" : "PROJECTION",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "KEEP_MUTATIONS",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 126,
"children" : [
{
"type" : "IXSCAN",
"works" : 127,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ parent: 1.0 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['parent']: [null, null]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 126,
"children" : []
}
]
}
]
}
]
}
}
I tried to use $type to find any ones which parent folder is not ObjectId. But still indexOnly:false.
db.folders.find({parent: {$ne: {$type: 7}}}, {parent: 1, _id: 0}).explain()
"indexOnly": false
Is there a way to search null value only using index? If not, is there a better value to store instead of null to be able to search with index?
Additional
example of root folder
{
"_id" : ObjectId("55a04a2d754971030059b7ad"),
"active" : true,
"modified" : ISODate("2016-02-05T22:30:08.053Z"),
"created" : ISODate("2015-07-10T22:41:49.009Z"),
"user" : ObjectId("54d3ae187a738c0300f59e61"),
"name" : "2nd Grade",
"parent" : null,
"clientModified" : ISODate("2016-02-05T22:30:07.872Z"),
"userCreated" : ISODate("2015-07-10T22:41:48.328Z"),
"ancestors" : [],
"__v" : 2
}
example of child folder
{
"_id" : ObjectId("56d0b4edb6f05e03009bcabc"),
"active" : true,
"modified" : ISODate("2016-02-26T20:26:21.328Z"),
"created" : ISODate("2016-02-26T20:26:21.328Z"),
"user" : ObjectId("54d3ae187a738c0300f59e61"),
"name" : "music",
"parent" : ObjectId("55a04a2d754971030059b7ad"),
"clientModified" : ISODate("2016-02-26T20:26:20.398Z"),
"userCreated" : ISODate("2016-02-26T20:26:20.398Z"),
"ancestors" : [
ObjectId("55a04a2d754971030059b7ad")
],
"__v" : 0
}
db.folders.getIndexes()
{
"0" : {
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "app29099188.folders"
},
"1" : {
"v" : 1,
"key" : {
"user" : 1,
"_fts" : "text",
"_ftsx" : 1
},
"name" : "user_1_name_text",
"ns" : "app29099188.folders",
"background" : true,
"safe" : null,
"weights" : {
"name" : 1
},
"default_language" : "english",
"language_override" : "language",
"textIndexVersion" : 2
},
"2" : {
"v" : 1,
"key" : {
"user" : 1,
"parent" : 1
},
"name" : "user_1_parent_1",
"ns" : "app29099188.folders",
"background" : true,
"safe" : null
},
"3" : {
"v" : 1,
"key" : {
"parent" : 1.0000000000000000
},
"name" : "parent_1",
"ns" : "app29099188.folders"
}
}
After comment - update:
The way to eliminate docScan is to have a value in parent field. It can be zeroed objectId or just "/" as a root.
db.satoko.insert({"test":"sdsf", parent: "/"})
db.satoko.insert({"test":"sds33f", parent: "/"})
db.satoko.insert({"parent":ObjectId("56d8b2879bd059e7247a6096"), "test":"sdsf"})
explain results:
db.satoko.find({parent:{$eq:"/"} }, {parent: 1, _id: 0}).explain("allPlansExec
ution")
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.satoko",
"indexFilterSet" : false,
"parsedQuery" : {
"parent" : {
"$eq" : "/"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"parent" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1
},
"indexName" : "parent_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[\"/\", \"/\"]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 0,
"totalKeysExamined" : 2,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 2,
"executionTimeMillisEstimate" : 0,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"parent" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 2,
"executionTimeMillisEstimate" : 0,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"parent" : 1
},
"indexName" : "parent_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[\"/\", \"/\"]"
]
},
"keysExamined" : 2,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "greg",
"port" : 27017,
"version" : "3.2.3",
"gitVersion" : "b326ba837cf6f49d65c2f85e1b70f6f31ece7937"
},
"ok" : 1
}

Why indexOnly==false

I have a collection with index:
{
"UserId" : 1,
"ShareId" : 1,
"ParentId" : 1,
"DeletedDate" : 1
}
If I making query:
db.Files.find({ "UserId" : ObjectId("5450d837f32a1e098c844e2a"),
"ShareId" : ObjectId("5450d879f32a1e098c844e94"),
"ParentId" : ObjectId("5450d8af6a092a0b74a44026"),
"DeletedDate":null},
{_id:0, ShareId:1}).explain()
output says that "indexOnly" : false:
{
"cursor" : "BtreeCursor UserId_1_ShareId_1_ParentId_1_DeletedDate_1",
"isMultiKey" : false,
"n" : 2120,
"nscannedObjects" : 2120,
"nscanned" : 2120,
"nscannedObjectsAllPlans" : 2318,
"nscannedAllPlans" : 2320,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 21,
"nChunkSkips" : 0,
"millis" : 42,
"indexBounds" : {
"UserId" : [
[
ObjectId("5450d837f32a1e098c844e2a"),
ObjectId("5450d837f32a1e098c844e2a")
]
],
"ShareId" : [
[
ObjectId("5450d879f32a1e098c844e94"),
ObjectId("5450d879f32a1e098c844e94")
]
],
"ParentId" : [
[
ObjectId("5450d8af6a092a0b74a44026"),
ObjectId("5450d8af6a092a0b74a44026")
]
],
"DeletedDate" : [
[
null,
null
]
]
},
"server" : "mongowecntprod:27017",
"filterSet" : false,
"stats" : {
"type" : "PROJECTION",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 0,
"needFetch" : 2,
"isEOF" : 1,
"children" : [
{
"type" : "KEEP_MUTATIONS",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 2,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 2,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 2120,
"children" : [
{
"type" : "IXSCAN",
"works" : 2121,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ UserId: 1, ShareId: 1, ParentId: 1, DeletedDate: 1 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['UserId']: [ObjectId('5450d837f32a1e098c844e2a'), ObjectId('5450d837f32a1e098c844e2a')], field #1['ShareId']: [ObjectId('5450d879f32a1e098c844e94'), ObjectId('5450d879f32a1e098c844e94')], field #2['ParentId']: [ObjectId('5450d8af6a092a0b74a44026'), ObjectId('5450d8af6a092a0b74a44026')], field #3['DeletedDate']: [null, null]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 2120,
"children" : []
}
]
}
]
}
]
}
}
but if I making query without DeletedDate:
db.Files.find({ "UserId" : ObjectId("5450d837f32a1e098c844e2a"),
"ShareId" : ObjectId("5450d879f32a1e098c844e94"),
"ParentId" : ObjectId("5450d8af6a092a0b74a44026")},
{_id:0, ShareId:1}).explain()
then "indexOnly" is true.
How I can change first query to making indexOnly=true?
Let me give you a simple example that will hopefully demonstrate what you're seeing when you are querying for a field being null:
db.nullexplain.find()
{ "_id" : ObjectId("5456759f51a9d5271dc55bba"), "a" : 1 }
{ "_id" : ObjectId("545675a251a9d5271dc55bbb"), "a" : null }
{ "_id" : ObjectId("545675a551a9d5271dc55bbc") }
db.nullexplain.ensureIndex({a:1})
db,nullexplain.count({a:1}).count()
1
db.nullexplain.count({a:null}).count()
2
Do you see the issue? When "a" is present and explicitly set to null, it's indexed as null.
When "a" is not present in the document, it's also indexed as null.
When you query:
db.nullexplain.find({a:null},{_id:0,a:1})
{ "a" : null }
{ }
How can we derive from the index only whether the return document should have the field "a" set to null or if the field should not be present at all?
The answer is we cannot and therefore we must examine the document itself.
db.nullexplain.find({a:null},{_id:0,a:1}).explain()
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 2,
"nscannedObjects" : 3,
"nscanned" : 3,
"nscannedObjectsAllPlans" : 3,
"nscannedAllPlans" : 3,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 3,
"server" : "Asyas-MacBook-Pro.local:27017",
"filterSet" : false
}
Hope this helps you understand why querying for DeletedDate:null has to check the document and cannot be answered from the index.