How to query Mongodb on Single Field with multiple ranges efficiently? - mongodb

I am trying to build custom geospatial indexes using MongoDB's B-tree indexes, as I find Mongo's native implementation limiting for my own case. In order to fulfill my geospatial queries which will effectively search Mongo using a compound index, I need to filter by the location.locIndexKey field with multiple ranges, among other fields.
So far, the only solution I could come up with to support this kind of queryies was using Mongo's $or operator. However, this performed badly since it's an or query and Mongo has to examine the same keys on the index again and again. In order to overcome this inefficiency, I need a way to make Mongo use multiple index-bounds on that field instead of replicating the query with or phrases for each defined bound in the query.
This is my query:
db.users.find({
"gender":2,
"preferences.feed.gender":1,
"age":{"$gte":18,"$lte":55},
"feedPrefChangeDay":{"$gte":1553461200,"$lte":1554066000},
"$or":[{"location.locIndexKey":{"$gte":NumberLong(1493233547543052300),"$lte":NumberLong(1493242343636074500)}},{"location.locIndexKey":{"$gte":NumberLong(1493242343636074500),"$lte":NumberLong(1493251139729096700)}},{"location.locIndexKey":{"$gte":NumberLong(1493287011295953000),"$lte":NumberLong(1493287148734906400)}}]
}).limit(20);
As you can see, in order to express multiple ranges on field location.locIndexKey, I had to use the $or operator. This is the shortened version of query planner's execution stats:
{
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 17762,
"totalKeysExamined" : 196192,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 351,
"works" : 196193,
"advanced" : 0,
"needTime" : 196191,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 20,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 0,
"executionTimeMillisEstimate" : 351,
"works" : 196193,
"advanced" : 0,
"needTime" : 196191,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "OR",
"nReturned" : 0,
"executionTimeMillisEstimate" : 351,
"works" : 196192,
"advanced" : 0,
"needTime" : 196191,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"recordIdsForgotten" : 0,
"inputStages" : [
{
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 10,
"works" : 4534,
"advanced" : 0,
"needTime" : 4533,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.locIndexKey" : 1.0
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.locIndexKey_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.locIndexKey" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.locIndexKey" : [
"[1493569998101151700, 1493572197124407300]"
]
},
"keysExamined" : 4534,
"seeks" : 4534,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4534,
"advanced" : 0,
"needTime" : 4533,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.locIndexKey" : 1.0
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.locIndexKey_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.locIndexKey" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.locIndexKey" : [
"[1493587581697261600, 1493587590287196200]"
]
},
"keysExamined" : 4534,
"seeks" : 4534,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4534,
"advanced" : 0,
"needTime" : 4533,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.locIndexKey" : 1.0
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.locIndexKey_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.locIndexKey" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.locIndexKey" : [
"[1493981215449940000, 1493990011542962200]"
]
},
"keysExamined" : 4534,
"seeks" : 4534,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
If you check indexBounds above, you will see that each range for location.locIndexKey is applied to a single query and combined with or. However, if I choose to run the same query using Mongo's native geospatial operator $geoWithin:
db.users.find({
"gender" : 2.0,
"preferences.feed.gender" : 1.0,
"age" : {
"$gte" : 18.0,
"$lte" : 55.0
},
"feedPrefChangeDay" : {
"$gte" : 1553461200.0,
"$lte" : 1554066000.0
},
"location.loc" : {
"$geoWithin" : {
"$centerSphere" : [
[
0.0,
0.0
],
0.00784806152880239
]
}
}
}).limit(20);
I get the following response from the query planner:
{
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 7,
"totalKeysExamined" : 4506,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4506,
"advanced" : 0,
"needTime" : 4505,
"needYield" : 0,
"saveState" : 35,
"restoreState" : 35,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 20,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"location.loc" : {
"$geoWithin" : {
"$centerSphere" : [
[
0.0,
0.0
],
0.00784806152880239
]
}
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4506,
"advanced" : 0,
"needTime" : 4505,
"needYield" : 0,
"saveState" : 35,
"restoreState" : 35,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4506,
"advanced" : 0,
"needTime" : 4505,
"needYield" : 0,
"saveState" : 35,
"restoreState" : 35,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.loc" : "2dsphere"
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.loc_2dsphere",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.loc" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.loc" : [
"[360287970189639680, 360287970189639680]",
"[378302368699121664, 378302368699121664]",
"[382805968326492160, 382805968326492160]",
"[383931868233334784, 383931868233334784]",
"[384213343210045440, 384213343210045440]",
"[384230935396089856, 384230935396089856]",
"[384235333442600960, 384235333442600960]",
"[384236432954228736, 384236432954228736]",
"[384236432954228737, 384236982710042623]",
"[384266119768178688, 384266119768178688]",
"[384266119768178689, 384274915861200895]",
"[384274915861200897, 384283711954223103]",
"[384283711954223104, 384283711954223104]",
"[384283711954223105, 384318896326311935]",
"[384318896326311937, 384354080698400767]",
"[1080863910568919040, 1080863910568919040]",
"[1134907106097364992, 1134907106097364992]",
"[1148417904979476480, 1148417904979476480]",
"[1151795604700004352, 1151795604700004352]",
"[1152640029630136320, 1152640029630136320]",
"[1152789563211513857, 1152798359304536063]",
"[1152798359304536064, 1152798359304536064]",
"[1152798359304536065, 1152807155397558271]",
"[1152833543676624896, 1152833543676624896]",
"[1152846737816158208, 1152846737816158208]",
"[1152850036351041536, 1152850036351041536]",
"[1152850586106855425, 1152851135862669311]",
"[1152851135862669312, 1152851135862669312]",
"[1152851135862669313, 1152859931955691519]",
"[1152868728048713728, 1152868728048713728]",
"[1152877524141735937, 1152886320234758143]",
"[1152886320234758145, 1152921504606846975]",
"[1152921504606846977, 1152956688978935807]",
"[1152956688978935809, 1152991873351024639]",
"[1152991873351024640, 1152991873351024640]",
"[1152991873351024641, 1152992423106838527]",
"[1152992972862652416, 1152992972862652416]",
"[1152996271397535744, 1152996271397535744]",
"[1153009465537069056, 1153009465537069056]",
"[1153035853816135681, 1153044649909157887]",
"[1153044649909157888, 1153044649909157888]",
"[1153044649909157889, 1153053446002180095]",
"[1153202979583557632, 1153202979583557632]",
"[1154047404513689600, 1154047404513689600]",
"[1157425104234217472, 1157425104234217472]",
"[1170935903116328960, 1170935903116328960]",
"[1224979098644774912, 1224979098644774912]",
"[1921488928515293185, 1921524112887382015]",
"[1921524112887382017, 1921559297259470847]",
"[1921559297259470848, 1921559297259470848]",
"[1921559297259470849, 1921594481631559679]",
"[1921606026503651329, 1921606576259465215]",
"[1921606576259465216, 1921606576259465216]",
"[1921607675771092992, 1921607675771092992]",
"[1921612073817604096, 1921612073817604096]",
"[1921629666003648512, 1921629666003648512]",
"[1921911140980359168, 1921911140980359168]",
"[1923037040887201792, 1923037040887201792]",
"[1927540640514572288, 1927540640514572288]",
"[1945555039024054272, 1945555039024054272]"
]
},
"keysExamined" : 4506,
"seeks" : 4506,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
As you can see, Mongo takes advantage of multiple index bounds to fulfill this query and its much more effective.
I believe the inefficiency in the original query happens because Mongo's query planner doesn't check what's inside the $or expression. I think that it should be more clever to understand that there is just one field with multiple ranges inside the expression and build the query using multiple index bounds. Sadly, this is not the case.
My question: is there any way that I can force Mongo to use multiple index bounds for my query, so that it's as efficient as the native geospatial query?
Any help would be appreciated.
Thanks!

Related

Mongo Distinct Index for Dot Notated

I have a mongodb v4.2.2 hosted on Atlas. Have a collection
objects and two fields in it: metadata as array of objects, each
object has one field key AND second field named as model. Added compound index
model->metadata.key as regular one. While running db command
db.objects.explain('executionStats').distinct('metadata.key',
{model: ObjectId('5e18aff58a5aaffdc3d6f26d')}) from console it
doesn't use my model->metadata.key index and doesn't use DISTINCT_SCAN.
But if I move data to another collection lets say objectKeys that
has two fields like key, model and again add regular compound
index model->key and run command
db.objectKeys.explain('executionStats').distinct('key', {model:
ObjectId('5e18aff58a5aaffdc3d6f26d')}) it is successfully applying DISTINCT_SCAN
and use the index.
Question. How to force MongoDB to use indexes for dot notated
field during distinct operation?
UPDATE DETAILS:
1.
db.objects.getIndexes();
{
"v" : 2,
"key" : {
"model" : 1,
"metadata.key" : 1,
"metadata.value" : 1
},
"name" : "model_1_metadata.key_1_metadata.value_1",
"ns" : "my_db.objects",
"background" : true
}
db.objects.explain('executionStats').distinct('metadata.key', {model: ObjectId('5e18aff58a5aaffdc3d6f26d')})
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"keyPattern" : {
"model" : 1,
"metadata.key" : 1,
"metadata.value" : 1
},
"indexName" : "model_1_metadata.key_1_metadata.value_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"model" : [ ],
"metadata.key" : [
"metadata"
],
"metadata.value" : [
"metadata"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"model" : [
"[ObjectId('5e18aff58a5aaffdc3d6f26d'), ObjectId('5e18aff58a5aaffdc3d6f26d')]"
],
"metadata.key" : [
"[MinKey, MaxKey]"
],
"metadata.value" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 0,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
2.
db.object_keys.getIndexes();
{
"v" : 2,
"key" : {
"models" : 1,
"key" : 1
},
"name" : "models_1_key_1",
"ns" : "my_db.object_keys",
"background" : true
},
db.object_keys.explain('executionStats').distinct('key', {models: ObjectId('5e18aff58a5aaffdc3d6f26d')})
"executionStages" : {
"stage" : "PROJECTION_COVERED",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"transformBy" : {
"_id" : 0,
"key" : 1
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"keyPattern" : {
"models" : 1,
"key" : 1
},
"indexName" : "models_1_key_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"models" : [
"models"
],
"key" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"models" : [
"[ObjectId('5e18aff58a5aaffdc3d6f26d'), ObjectId('5e18aff58a5aaffdc3d6f26d')]"
],
"key" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 0
}
}
So as you can see in the first case it uses IXSCAN means index scan and in the second case it uses DISTINCT_SCAN that is considered more faster. The differences is that in first case the metadata field is array of objects and in the second case the field key is string

Index management with MongoDB

I have several questions about the way mongoDb choose the index which will be more efficient to retrieve datas.
I have the following query :
db.getCollection('myCollection').find({
"TenantId":"1a1a1a1a",
"ContractId": 1111,
"Lists":1111,
"Email":{"$exists":true,"$nin":["",null]},
"Optouts.Media":{"$nin":["EMAIL",null]},
"Deleted":false
})
This query is used in a mapReduce which failed. When I run it with count(), I'll get the result of 290000 in 47s.
So, to understand what happen, I execute the query again with explain mode. And indeed, the index used seems to don't be the best (TenantId_1_ContractId_1_GUID_1_Deleted_1).
I re-run the same query with hint(TenantId_1_ContractId_1_Lists_1_Deleted_1) and this time, I'll get result in 2 sec.
How mongo select the index to retrieve datas?
Good to know:
For the current contractId I've 1,6 million entry.
When filter on the lists, I've got 700 000 entries
In documents, lists is an array of values
I have retrieve index planExecution to try to get more informations, but I can't figure out. See below result of explain('allPlansExecution').
{
"queryPlanner" : {
"mongosPlannerVersion" : 1,
"winningPlan" : {
"stage" : "SINGLE_SHARD",
"shards" : [
{
"shardName" : "rs1",
"connectionString" : "...",
"serverInfo" : {...},
"plannerVersion" : 1,
"namespace" : "",
"indexFilterSet" : false,
"parsedQuery" : {...},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {...},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"GUID" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_GUID_1_Deleted_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"GUID" : [],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {...},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"Lists" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_Lists_1_Deleted_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"Lists" : [
"Lists"
],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...}
}
},
{...}
]
}
]
}
},
"executionStats" : {...},
"allPlansExecution" : [
undefined,
{
"shardName" : "rs1",
"allPlans" : [
{
"nReturned" : 0,
"executionTimeMillisEstimate" : 51,
"totalKeysExamined" : 10618,
"totalDocsExamined" : 10618,
"executionStages" : {
"stage" : "FETCH",
"filter" : {...},
"nReturned" : 0,
"executionTimeMillisEstimate" : 51,
"works" : 10618,
"advanced" : 0,
"needTime" : 10618,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 10618,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10618,
"executionTimeMillisEstimate" : 20,
"works" : 10618,
"advanced" : 10618,
"needTime" : 0,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"Lists" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_Lists_1_Deleted_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"Lists" : [
"Lists"
],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...},
"keysExamined" : 10618,
"seeks" : 1,
"dupsTested" : 10618,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
{
"nReturned" : 101,
"executionTimeMillisEstimate" : 0,
"totalKeysExamined" : 10618,
"totalDocsExamined" : 10616,
"executionStages" : {
"stage" : "FETCH",
"filter" : {...},
"nReturned" : 101,
"executionTimeMillisEstimate" : 0,
"works" : 10618,
"advanced" : 101,
"needTime" : 10517,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 10616,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10616,
"executionTimeMillisEstimate" : 0,
"works" : 10618,
"advanced" : 10616,
"needTime" : 2,
"needYield" : 0,
"saveState" : 664,
"restoreState" : 664,
"isEOF" : 0,
"invalidates" : 0,
"keyPattern" : {
"TenantId" : 1.0,
"ContractId" : 1.0,
"GUID" : 1.0,
"Deleted" : 1.0
},
"indexName" : "TenantId_1_ContractId_1_GUID_1_Deleted_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"TenantId" : [],
"ContractId" : [],
"GUID" : [],
"Deleted" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {...},
"keysExamined" : 10618,
"seeks" : 3,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
]
}
]
}
}
I have remove some lines {...}, if you want more details, don't hesitate to ask me.
Thanks a lot for your help!

mongodb text index not working

mongo3.4.6
hi export, I am new to mongodb, and I have some problem with indexing. below I create a text index on RequestId(string) inside recordInfo collection:
db.getCollection("RecordInfo").createIndex({"RequestId":"text"})
but when I try to query below:
db.getCollection("RecordInfo").find({"RequestId":"4513456313212313212aaaa"}).explain("executionStats"), you can see it's not using index:
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 17,
"totalKeysExamined" : 0,
"totalDocsExamined" : 9998,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"RequestId" : {
"$eq" : "4513456313212313212aaaa"
}
},
"nReturned" : 1,
"executionTimeMillisEstimate" : 21,
"works" : 10000,
"advanced" : 1,
"needTime" : 9998,
"needYield" : 0,
"saveState" : 78,
"restoreState" : 78,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 9998
}
},
then I try below with $text:
db.getCollection("RecordInfo").find({$text:{$search:"4513456313212313212aaaa"}}).explain("executionStats")
it gives me below,which I think pretty good one:
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 1,
"totalKeysExamined" : 1,
"totalDocsExamined" : 1,
"executionStages" : {
"stage" : "TEXT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 5,
"advanced" : 1,
"needTime" : 3,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"indexPrefix" : {
},
"indexName" : "RequestId_text",
"parsedTextQuery" : {
"terms" : [
"4513456313212313212aaaa"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 5,
"advanced" : 1,
"needTime" : 3,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsRejected" : 0,
"inputStage" : {
"stage" : "TEXT_OR",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 5,
"advanced" : 1,
"needTime" : 3,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 1,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1,
"executionTimeMillisEstimate" : 0,
"works" : 2,
"advanced" : 1,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "RequestId_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
},
"keysExamined" : 1,
"seeks" : 1,
"dupsTested" : 1,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
but why mongo don't use a index by default, so I use hint to force a index query with below:
db.getCollection("RecordInfo").find({"RequestId":"4513456313212313212aaaa"}).hint("RequestId_text").explain("executionStats"), it give me these, which it's not that good:
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 93,
"totalKeysExamined" : 49378,
"totalDocsExamined" : 9998,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"RequestId" : {
"$eq" : "4513456313212313212aaaa"
}
},
"nReturned" : 1,
"executionTimeMillisEstimate" : 91,
"works" : 49379,
"advanced" : 1,
"needTime" : 49377,
"needYield" : 0,
"saveState" : 386,
"restoreState" : 386,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 9998,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 9998,
"executionTimeMillisEstimate" : 50,
"works" : 49379,
"advanced" : 9998,
"needTime" : 39380,
"needYield" : 0,
"saveState" : 386,
"restoreState" : 386,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "RequestId_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"_fts" : [
"[MinKey, MaxKey]"
],
"_ftsx" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 49378,
"seeks" : 1,
"dupsTested" : 49378,
"dupsDropped" : 39380,
"seenInvalidated" : 0
}
}
},
To summaize, I want to use index or whatever method to query RequestId field which is string column fast. but if I force to use index, it seems not work for me only make my query worse. but if I use index another way, like below, I can see a pretty good query. but I can't specify on wich field I want to query this way.
db.getCollection("RecordInfo").find({$text:{$search:"4513456313212313212aaaa"}}).explain("executionStats")
Text search only works using the $text query operator.
When you execute db.getCollection("RecordInfo").find({"RequestId":"4513456313212313212aaaa"}), you're running a query for an exact match on that field.
From the question, it seems what you're looking for is a regular index, which you can create using db.getCollection("RecordInfo").createIndex({"RequestId": 1})
This will make your query using $text fail (because there is no text index) but will make the regular one use the index and avoid COLLSCAN.

Covered Query Slow on 100 million+ Doc Collection

I would greatly appreciate any assistance with the following covered query:
db.searches.find({ $text: { $search: "dragon" }},{product_name:1,_id:0} ).limit(30)
The "searches" collection has 126 million documents and our goal is to achieve sub-second query performance.
The format of the collection is very simple:
{
"_id" : ObjectId("584b15d12bb299260a000006"),
"product_name" : "This is a product name",
"price" : 876
}
Both the index and the System Profile result are below. What we don't understand is why the stages are so slow -- total time is 161 seconds. As well, less common words (e.g. dog) are much faster than common words (e.g. dragon) which seems odd given that we're just asking for 30 documents.
This is running on MongoDB 3.4.0 with Wired Tiger. The indexes should be in-memory since they consume less than 50% of the 128GB of memory on the server.
INDEX
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "marketplaces.searches"
},
{
"v" : 2,
"key" : {
"_fts" : "text",
"_ftsx" : 1,
"price" : 1.0
},
"name" : "search_index_name_price",
"ns" : "marketplaces.searches",
"background" : true,
"default_language" : "en",
"language_override" : "en",
"weights" : {
"product_name" : 1
},
"textIndexVersion" : 3
}
]
SYSTEM PROFILE
{
"op" : "query",
"ns" : "marketplaces.searches",
"query" : {
"find" : "searches",
"filter" : {
"$text" : {
"$search" : "dragon"
}
},
"limit" : 30.0,
"singleBatch" : false,
"projection" : {
"product_name" : 1.0,
"_id" : 0.0
}
},
"keysExamined" : 1563505,
"docsExamined" : 1563505,
"cursorExhausted" : true,
"numYield" : 14547,
"locks" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(29098)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(14549)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(14549)
}
}
},
"nreturned" : 30,
"responseLength" : 4132,
"protocol" : "op_command",
"millis" : 161315,
"planSummary" : "IXSCAN { _fts: \"text\", _ftsx: 1, price: 1.0 }",
"execStats" : {
"stage" : "LIMIT",
"nReturned" : 30,
"executionTimeMillisEstimate" : 145005,
"works" : 1563538,
"advanced" : 30,
"needTime" : 1563507,
"needYield" : 0,
"saveState" : 14547,
"restoreState" : 14547,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 30,
"inputStage" : {
"stage" : "PROJECTION",
"nReturned" : 30,
"executionTimeMillisEstimate" : 144635,
"works" : 1563537,
"advanced" : 30,
"needTime" : 1563507,
"needYield" : 0,
"saveState" : 14547,
"restoreState" : 14547,
"isEOF" : 0,
"invalidates" : 0,
"transformBy" : {
"product_name" : 1.0,
"_id" : 0.0
},
"inputStage" : {
"stage" : "TEXT",
"nReturned" : 30,
"executionTimeMillisEstimate" : 144258,
"works" : 1563537,
"advanced" : 30,
"needTime" : 1563507,
"needYield" : 0,
"saveState" : 14547,
"restoreState" : 14547,
"isEOF" : 0,
"invalidates" : 0,
"indexPrefix" : {},
"indexName" : "search_index_name_price",
"parsedTextQuery" : {
"terms" : [
"dragon"
],
"negatedTerms" : [],
"phrases" : [],
"negatedPhrases" : []
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"nReturned" : 30,
"executionTimeMillisEstimate" : 143819,
"works" : 1563537,
"advanced" : 30,
"needTime" : 1563507,
"needYield" : 0,
"saveState" : 14547,
"restoreState" : 14547,
"isEOF" : 0,
"invalidates" : 0,
"docsRejected" : 0,
"inputStage" : {
"stage" : "TEXT_OR",
"nReturned" : 30,
"executionTimeMillisEstimate" : 143608,
"works" : 1563537,
"advanced" : 30,
"needTime" : 1563507,
"needYield" : 0,
"saveState" : 14547,
"restoreState" : 14547,
"isEOF" : 0,
"invalidates" : 0,
"docsExamined" : 1563505,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1563505,
"executionTimeMillisEstimate" : 20499,
"works" : 1563506,
"advanced" : 1563505,
"needTime" : 0,
"needYield" : 0,
"saveState" : 14547,
"restoreState" : 14547,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1,
"price" : 1.0
},
"indexName" : "search_index_name_price",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {},
"keysExamined" : 1563505,
"seeks" : 1,
"dupsTested" : 1563505,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
}
},
"ts" : ISODate("2017-04-28T14:34:57.320Z"),
"client" : "127.0.0.1",
"appName" : "MongoDB Shell",
"allUsers" : [],
"user" : ""
}
May consider Elasticsearch unless we can tune this further.

How to improve mongo queries with multikey index in array

I just found out that 1 of our queries is quite slow. We use a multikey index in an array and not sure how we can actually improve this query.
Our data structure:
account: [{
accountId:string,
service:string
}]
Index key is:
{"account.accountId":1, "account.service":1}
Here's my query:
db.model.find({"account.accountId":"12345", "account.service":"GameCenter"}).explain();
executionStats (1 million records)
"executionStats" : {
"nReturned" : 1,
"executionTimeMillis" : 325,
"totalKeysExamined" : 122744,
"totalDocsExamined" : 122743,
"executionStages" : {
"stage" : "SHARD_MERGE",
"nReturned" : 1,
"executionTimeMillis" : 325,
"totalKeysExamined" : 122744,
"totalDocsExamined" : 122743,
"totalChildMillis" : NumberLong(834),
"shards" : [
{
"shardName" : "rs1",
"executionSuccess" : true,
"executionStages" : {
"stage" : "SHARDING_FILTER",
"nReturned" : 0,
"executionTimeMillisEstimate" : 300,
"works" : 40999,
"advanced" : 0,
"needTime" : 40998,
"needYield" : 0,
"saveState" : 320,
"restoreState" : 320,
"isEOF" : 1,
"invalidates" : 0,
"chunkSkips" : 0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"account.accountId" : {
"$eq" : "G:8183971619"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 300,
"works" : 40999,
"advanced" : 0,
"needTime" : 40998,
"needYield" : 0,
"saveState" : 320,
"restoreState" : 320,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 40998,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 40998,
"executionTimeMillisEstimate" : 30,
"works" : 40999,
"advanced" : 40998,
"needTime" : 0,
"needYield" : 0,
"saveState" : 320,
"restoreState" : 320,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"account.service" : 1,
"account.accountId" : 1
},
"indexName" : "account.service_1_account.accountId_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"account.service" : [
"[\"GameCenter\", \"GameCenter\"]"
],
"account.accountId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 40998,
"dupsTested" : 40998,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
{
"shardName" : "rs2",
"executionSuccess" : true,
"executionStages" : {
"stage" : "SHARDING_FILTER",
"nReturned" : 1,
"executionTimeMillisEstimate" : 240,
"works" : 40612,
"advanced" : 1,
"needTime" : 40610,
"needYield" : 0,
"saveState" : 317,
"restoreState" : 317,
"isEOF" : 1,
"invalidates" : 0,
"chunkSkips" : 0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"account.accountId" : {
"$eq" : "G:8183971619"
}
},
"nReturned" : 1,
"executionTimeMillisEstimate" : 230,
"works" : 40612,
"advanced" : 1,
"needTime" : 40610,
"needYield" : 0,
"saveState" : 317,
"restoreState" : 317,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 40610,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 40611,
"executionTimeMillisEstimate" : 70,
"works" : 40612,
"advanced" : 40611,
"needTime" : 0,
"needYield" : 0,
"saveState" : 317,
"restoreState" : 317,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"account.service" : 1,
"account.accountId" : 1
},
"indexName" : "account.service_1_account.accountId_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"account.service" : [
"[\"GameCenter\", \"GameCenter\"]"
],
"account.accountId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 40611,
"dupsTested" : 40611,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
},
{
"shardName" : "rs3",
"executionSuccess" : true,
"executionStages" : {
"stage" : "SHARDING_FILTER",
"nReturned" : 0,
"executionTimeMillisEstimate" : 270,
"works" : 41136,
"advanced" : 0,
"needTime" : 41135,
"needYield" : 0,
"saveState" : 321,
"restoreState" : 321,
"isEOF" : 1,
"invalidates" : 0,
"chunkSkips" : 0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"account.accountId" : {
"$eq" : "G:8183971619"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 270,
"works" : 41136,
"advanced" : 0,
"needTime" : 41135,
"needYield" : 0,
"saveState" : 321,
"restoreState" : 321,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 41135,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 41135,
"executionTimeMillisEstimate" : 90,
"works" : 41136,
"advanced" : 41135,
"needTime" : 0,
"needYield" : 0,
"saveState" : 321,
"restoreState" : 321,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"account.service" : 1,
"account.accountId" : 1
},
"indexName" : "account.service_1_account.accountId_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"account.service" : [
"[\"GameCenter\", \"GameCenter\"]"
],
"account.accountId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 41135,
"dupsTested" : 41135,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
]
}
},
To summarise, the query do use the indexes but scan a lot of documents to get the results (122k / 1million). We couldn't use covered index as we need to return some other information too.
Also something werid: I'm querying for a unique accoundId, but my indexBounds for account.accountId is:
"account.accountId" : [
"[MinKey, MaxKey]"
]
Thank you for your help!
Mars
After reading the executionStatus carefully, the query is actually scanning all indexes since "accountId" indexBounds is [MinKey, MaxKey].
I'm able to resolve this issue with $elemMatch
db.model.find({
account:{
$elemMatch:{
accountId:"111",
service:"facebook"
}
}
})