Related
I am trying to find an efficient way to search for items in which specific field is null.
In the MongoDB, I have folder schema which has parent field of its parent folder's ObjectId and parent is indexed. For root folders, parent fields are null.
When I try to find all the root folders with parent:null, explain displays indexOnly: false
db.folders.find({parent: null}, {parent: 1, _id: 0}).explain()
{
"cursor" : "BtreeCursor parent_1",
"isMultiKey" : false,
"n" : 126,
"nscannedObjects" : 126,
"nscanned" : 126,
"nscannedObjectsAllPlans" : 126,
"nscannedAllPlans" : 126,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 1,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"parent" : [
[
null,
null
]
]
},
"server" : "c268.candidate.36:10268",
"filterSet" : false,
"stats" : {
"type" : "PROJECTION",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "KEEP_MUTATIONS",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 128,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 126,
"children" : [
{
"type" : "IXSCAN",
"works" : 127,
"yields" : 1,
"unyields" : 1,
"invalidates" : 0,
"advanced" : 126,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ parent: 1.0 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['parent']: [null, null]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 126,
"children" : []
}
]
}
]
}
]
}
}
I tried to use $type to find any ones which parent folder is not ObjectId. But still indexOnly:false.
db.folders.find({parent: {$ne: {$type: 7}}}, {parent: 1, _id: 0}).explain()
"indexOnly": false
Is there a way to search null value only using index? If not, is there a better value to store instead of null to be able to search with index?
Additional
example of root folder
{
"_id" : ObjectId("55a04a2d754971030059b7ad"),
"active" : true,
"modified" : ISODate("2016-02-05T22:30:08.053Z"),
"created" : ISODate("2015-07-10T22:41:49.009Z"),
"user" : ObjectId("54d3ae187a738c0300f59e61"),
"name" : "2nd Grade",
"parent" : null,
"clientModified" : ISODate("2016-02-05T22:30:07.872Z"),
"userCreated" : ISODate("2015-07-10T22:41:48.328Z"),
"ancestors" : [],
"__v" : 2
}
example of child folder
{
"_id" : ObjectId("56d0b4edb6f05e03009bcabc"),
"active" : true,
"modified" : ISODate("2016-02-26T20:26:21.328Z"),
"created" : ISODate("2016-02-26T20:26:21.328Z"),
"user" : ObjectId("54d3ae187a738c0300f59e61"),
"name" : "music",
"parent" : ObjectId("55a04a2d754971030059b7ad"),
"clientModified" : ISODate("2016-02-26T20:26:20.398Z"),
"userCreated" : ISODate("2016-02-26T20:26:20.398Z"),
"ancestors" : [
ObjectId("55a04a2d754971030059b7ad")
],
"__v" : 0
}
db.folders.getIndexes()
{
"0" : {
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "app29099188.folders"
},
"1" : {
"v" : 1,
"key" : {
"user" : 1,
"_fts" : "text",
"_ftsx" : 1
},
"name" : "user_1_name_text",
"ns" : "app29099188.folders",
"background" : true,
"safe" : null,
"weights" : {
"name" : 1
},
"default_language" : "english",
"language_override" : "language",
"textIndexVersion" : 2
},
"2" : {
"v" : 1,
"key" : {
"user" : 1,
"parent" : 1
},
"name" : "user_1_parent_1",
"ns" : "app29099188.folders",
"background" : true,
"safe" : null
},
"3" : {
"v" : 1,
"key" : {
"parent" : 1.0000000000000000
},
"name" : "parent_1",
"ns" : "app29099188.folders"
}
}
After comment - update:
The way to eliminate docScan is to have a value in parent field. It can be zeroed objectId or just "/" as a root.
db.satoko.insert({"test":"sdsf", parent: "/"})
db.satoko.insert({"test":"sds33f", parent: "/"})
db.satoko.insert({"parent":ObjectId("56d8b2879bd059e7247a6096"), "test":"sdsf"})
explain results:
db.satoko.find({parent:{$eq:"/"} }, {parent: 1, _id: 0}).explain("allPlansExec
ution")
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.satoko",
"indexFilterSet" : false,
"parsedQuery" : {
"parent" : {
"$eq" : "/"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"parent" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1
},
"indexName" : "parent_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[\"/\", \"/\"]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 0,
"totalKeysExamined" : 2,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 2,
"executionTimeMillisEstimate" : 0,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"parent" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 2,
"executionTimeMillisEstimate" : 0,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"parent" : 1
},
"indexName" : "parent_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[\"/\", \"/\"]"
]
},
"keysExamined" : 2,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "greg",
"port" : 27017,
"version" : "3.2.3",
"gitVersion" : "b326ba837cf6f49d65c2f85e1b70f6f31ece7937"
},
"ok" : 1
}
I have a collection with index:
{
"UserId" : 1,
"ShareId" : 1,
"ParentId" : 1,
"DeletedDate" : 1
}
If I making query:
db.Files.find({ "UserId" : ObjectId("5450d837f32a1e098c844e2a"),
"ShareId" : ObjectId("5450d879f32a1e098c844e94"),
"ParentId" : ObjectId("5450d8af6a092a0b74a44026"),
"DeletedDate":null},
{_id:0, ShareId:1}).explain()
output says that "indexOnly" : false:
{
"cursor" : "BtreeCursor UserId_1_ShareId_1_ParentId_1_DeletedDate_1",
"isMultiKey" : false,
"n" : 2120,
"nscannedObjects" : 2120,
"nscanned" : 2120,
"nscannedObjectsAllPlans" : 2318,
"nscannedAllPlans" : 2320,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 21,
"nChunkSkips" : 0,
"millis" : 42,
"indexBounds" : {
"UserId" : [
[
ObjectId("5450d837f32a1e098c844e2a"),
ObjectId("5450d837f32a1e098c844e2a")
]
],
"ShareId" : [
[
ObjectId("5450d879f32a1e098c844e94"),
ObjectId("5450d879f32a1e098c844e94")
]
],
"ParentId" : [
[
ObjectId("5450d8af6a092a0b74a44026"),
ObjectId("5450d8af6a092a0b74a44026")
]
],
"DeletedDate" : [
[
null,
null
]
]
},
"server" : "mongowecntprod:27017",
"filterSet" : false,
"stats" : {
"type" : "PROJECTION",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 0,
"needFetch" : 2,
"isEOF" : 1,
"children" : [
{
"type" : "KEEP_MUTATIONS",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 2,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 2124,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 2,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 2120,
"children" : [
{
"type" : "IXSCAN",
"works" : 2121,
"yields" : 21,
"unyields" : 21,
"invalidates" : 0,
"advanced" : 2120,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ UserId: 1, ShareId: 1, ParentId: 1, DeletedDate: 1 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['UserId']: [ObjectId('5450d837f32a1e098c844e2a'), ObjectId('5450d837f32a1e098c844e2a')], field #1['ShareId']: [ObjectId('5450d879f32a1e098c844e94'), ObjectId('5450d879f32a1e098c844e94')], field #2['ParentId']: [ObjectId('5450d8af6a092a0b74a44026'), ObjectId('5450d8af6a092a0b74a44026')], field #3['DeletedDate']: [null, null]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 2120,
"children" : []
}
]
}
]
}
]
}
}
but if I making query without DeletedDate:
db.Files.find({ "UserId" : ObjectId("5450d837f32a1e098c844e2a"),
"ShareId" : ObjectId("5450d879f32a1e098c844e94"),
"ParentId" : ObjectId("5450d8af6a092a0b74a44026")},
{_id:0, ShareId:1}).explain()
then "indexOnly" is true.
How I can change first query to making indexOnly=true?
Let me give you a simple example that will hopefully demonstrate what you're seeing when you are querying for a field being null:
db.nullexplain.find()
{ "_id" : ObjectId("5456759f51a9d5271dc55bba"), "a" : 1 }
{ "_id" : ObjectId("545675a251a9d5271dc55bbb"), "a" : null }
{ "_id" : ObjectId("545675a551a9d5271dc55bbc") }
db.nullexplain.ensureIndex({a:1})
db,nullexplain.count({a:1}).count()
1
db.nullexplain.count({a:null}).count()
2
Do you see the issue? When "a" is present and explicitly set to null, it's indexed as null.
When "a" is not present in the document, it's also indexed as null.
When you query:
db.nullexplain.find({a:null},{_id:0,a:1})
{ "a" : null }
{ }
How can we derive from the index only whether the return document should have the field "a" set to null or if the field should not be present at all?
The answer is we cannot and therefore we must examine the document itself.
db.nullexplain.find({a:null},{_id:0,a:1}).explain()
{
"cursor" : "BasicCursor",
"isMultiKey" : false,
"n" : 2,
"nscannedObjects" : 3,
"nscanned" : 3,
"nscannedObjectsAllPlans" : 3,
"nscannedAllPlans" : 3,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 3,
"server" : "Asyas-MacBook-Pro.local:27017",
"filterSet" : false
}
Hope this helps you understand why querying for DeletedDate:null has to check the document and cannot be answered from the index.
first of all, I googled and searched this forum but found no direct answer to my question, so I decided to ask a new one.
I have a "sensors" collection containing about 20k sensors. My query is very simple:
QueryBuilder qb = new QueryBuilder().and(
new QueryBuilder().put("q1").lessThan(rb).get(),
new QueryBuilder().put("q3").greaterThan(ra).get()
);
DBCursor cursor = sensorColl.find(qb.get());
begin = System.currentTimeMillis();
while (cursor.hasNext()) {
cursor.next();
}
long totalSearchTime = System.currentTimeMillis()-begin;
logger.debug("totalSearchTime: {}", totalSearchTime);
which shows the totalSearchTime is 316647ms! I repeated this code snippet multiple time and on average it takes similar time to complete. Here is the explain() for the query:
{
"cursor" : "BtreeCursor q1_1_q3_-1",
"isMultiKey" : false,
"n" : 16905,
"nscannedObjects" : 16905,
"nscanned" : 16905,
"nscannedObjectsAllPlans" : 16905,
"nscannedAllPlans" : 16905,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 132,
"nChunkSkips" : 0,
"millis" : 102,
"indexBounds" : {
"q1" : [
[
-Infinity,
30
]
],
"q3" : [
[
Infinity,
10
]
]
},
"server" : "localhost:27017",
"filterSet" : false,
"stats" : {
"type" : "FETCH",
"works" : 16907,
"yields" : 132,
"unyields" : 132,
"invalidates" : 0,
"advanced" : 16905,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 0,
"children" : [
{
"type" : "IXSCAN",
"works" : 16906,
"yields" : 132,
"unyields" : 132,
"invalidates" : 0,
"advanced" : 16905,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ q1: 1.0, q3: -1.0 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['q1']: [-inf.0, 30.0), field #1['q3']: [inf.0, 10.0)",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 16905,
"children" : []
}
]
}
}
And here is the stats()
{
"ns" : "sensor_db.sensors",
"count" : 16999,
"size" : 272697744,
"avgObjSize" : 16041,
"storageSize" : 315080704,
"numExtents" : 15,
"nindexes" : 2,
"lastExtentSize" : 84451328,
"paddingFactor" : 1.006,
"systemFlags" : 0,
"userFlags" : 1,
"totalIndexSize" : 1888656,
"indexSizes" : {
"_id_" : 981120,
"q1_1_q3_-1" : 907536
},
"ok" : 1
}
I run my app in my test system (my laptop) with Intel(R) Core(TM)2 Duo CPU T7100 # 1.80GHz, 800 MHz and 1.5GB RAM (of which ~700MB are free at the time the query was run). The disk space is 58GB free so that is a lot.
I hope those information is enough for analysis. Thanks a lot for any suggestion!
I use MongoDB 2.6.4.
My indexes looks like this:
{
"v" : 1,
"key" : {
"isFolder" : 1
},
"name" : "isFolder_1",
"ns" : "Tenant_51.files",
"background" : true
},
{
"v" : 1,
"key" : {
"isForeign" : 1
},
"name" : "isForeign_1",
"ns" : "Tenant_51.files",
"background" : true
},
My query looks like this:
db.files.find({ isFolder: true, isForeign: false }).explain(true)
For some reason, it chooses to use only 1 index (VERY SLOW: 680 seconds!!)
It looks like it does calculate the Complex Plan, however, decides not to use it, and I don't understand why.
Here is the execution plan:
{
"cursor" : "BtreeCursor isFolder_1",
"isMultiKey" : false,
"n" : 107441,
"nscannedObjects" : 110580,
"nscanned" : 110580,
"nscannedObjectsAllPlans" : 110689,
"nscannedAllPlans" : 110801,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 21056,
"nChunkSkips" : 0,
"millis" : 679121,
"indexBounds" : {
"isFolder" : [
[
true,
true
]
]
},
"allPlans" : [
{
"cursor" : "BtreeCursor isFolder_1",
"isMultiKey" : false,
"n" : 107441,
"nscannedObjects" : 110580,
"nscanned" : 110580,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"isFolder" : [
[
true,
true
]
]
}
},
{
"cursor" : "BtreeCursor isForeign_1",
"isMultiKey" : false,
"n" : 68,
"nscannedObjects" : 109,
"nscanned" : 110,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"isForeign" : [
[
false,
false
]
]
}
},
{
"cursor" : "Complex Plan",
"n" : 42,
"nscannedObjects" : 0,
"nscanned" : 111,
"nChunkSkips" : 0
}
],
"server" : "XXX",
"filterSet" : false,
"stats" : {
"type" : "KEEP_MUTATIONS",
"works" : 128743,
"yields" : 21056,
"unyields" : 21056,
"invalidates" : 13834,
"advanced" : 107441,
"needTime" : 3140,
"needFetch" : 18161,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 128743,
"yields" : 21056,
"unyields" : 21056,
"invalidates" : 13834,
"advanced" : 107441,
"needTime" : 3140,
"needFetch" : 18161,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 107441,
"children" : [
{
"type" : "IXSCAN",
"works" : 110581,
"yields" : 21056,
"unyields" : 21056,
"invalidates" : 13834,
"advanced" : 110580,
"needTime" : 1,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ isFolder: 1 }",
"isMultiKey" : 0,
"boundsVerbose" : "field #0['isFolder']: [true, true]",
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 110580,
"children" : [ ]
}
]
}
]
}
}
From the MongoDB docs about indexing:
MongoDB can only use one index to support any given operation.
The solution, however, is as easy as the explanation: Use a compound index.
db.files.ensureIndex({isFolder:1,isForeign:1})
The collection is a sharded collection over the hashed field.
The following query should definitly be indexOnly but explain shows otherwise.
db.collection.ensureIndex({field : "hashed"})
db.collection.ensureIndex({field : 1, "field2" : 1, "field3" : 1})
db.collection.find(
{
field : 100
}
,{field : 1, _id : 0}
)
//.hint({ "field" : 1, "field2" : 1, "field3" : 1})
//.hint({ "field" : "hashed"})
.explain()
"cursor" : "BtreeCursor field_hashed",
"nscannedObjects" : 1,
"nscanned" : 1,
"indexOnly" : false,
I tested to hint both indexes but none of them generate a covered query.
I would appreciate any help or suggestions.
explain():
{
"clusteredType" : "ParallelSort",
"shards" : {
"repset12" : [
{
"cursor" : "BtreeCursor field_hashed",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"field" : [
[
NumberLong(5346856657151215906),
NumberLong(5346856657151215906)
]
]
},
"server" : "server",
"filterSet" : false,
"stats" : {
"type" : "PROJECTION",
"works" : 3,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 1,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "KEEP_MUTATIONS",
"works" : 3,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 1,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "SHARDING_FILTER",
"works" : 2,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 1,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"chunkSkips" : 0,
"children" : [
{
"type" : "FETCH",
"works" : 1,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 1,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 0,
"forcedFetches" : 0,
"matchTested" : 1,
"children" : [
{
"type" : "IXSCAN",
"works" : 1,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 1,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"keyPattern" : "{ field: \"hashed\" }",
"boundsVerbose" : "field #0['field']: [5346856657151215906, 5346856657151215906]",
"isMultiKey" : 0,
"yieldMovedCursor" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0,
"keysExamined" : 1,
"children" : []
}
]
}
]
}
]
}
]
}
}
]
},
"cursor" : "BtreeCursor field_hashed",
"n" : 1,
"nChunkSkips" : 0,
"nYields" : 0,
"nscanned" : 1,
"nscannedAllPlans" : 2,
"nscannedObjects" : 1,
"nscannedObjectsAllPlans" : 2,
"millisShardTotal" : 0,
"millisShardAvg" : 0,
"numQueries" : 1,
"numShards" : 1,
"indexBounds" : {
"field" : [
[
NumberLong(5346856657151215906),
NumberLong(5346856657151215906)
]
]
},
"millis" : 1
}
As at MongoDB 2.6, you won't get a fully covered sharded query because there is an extra query to check if the shard in question owns that document (see SERVER-5022 in the MongoDB issue tracker).
The mongos router filters documents that are found on a shard but that should not live there according to the sharded cluster metadata.
Documents can exist on more than one shard if:
There is a chunk migration in progress: documents are copied from a donor shard to a destination shard and are not removed from the donor shard until the chunk migration successfully completes.
Documents have been "orphaned" on a shard as a result of a failed migration or incomplete clean up. There is a cleanupOrphaned admin command in MongoDB 2.6 which can be run against a sharded mongod to delete orphaned documents.
This covered query limitation is noted in the Limits: Covered Queries in Sharded Clusters section of the MongoDB documentation but should also be highlighted in the tutorial on Creating Covered Queries. I've raised DOCS-3820 to make this more obvious.