MongoDB querying array slow - mongodb

We're trying to optimise our read performance on our MongoDB cluster.
We serve a social media like application where users are member of 1 or multiple groups.
We were storing who is in which group and whether he/she is an admin of that group in a separate collection. However we noticed it was quite slow to retrieve the group information for the groups the user is member of. (find(+filter) groupMember documents, populate the groups).
Therefor we recently migrated all the group members to an array on the group collection documents itself.
The schema now looks as following:
The query we execute is simply:
this.model.find({
members: {
$elemMatch: {
userId: new ObjectId(userId),
},
},
})
We expected this to be much more performed because you don't need to populate/lookup anything. The opposite is true however, after deploying this change we noticed a performance decrease.
We have around 40k group documents where the largest groups have around 3k members, most groups are much smaller however.
The groups are indexed and the index is also used. This is an explain plan:
{
"explainVersion": "1",
"queryPlanner": {
"namespace": "***.groups",
"indexFilterSet": false,
"parsedQuery": {
"members": {
"$elemMatch": {
"userId": {
"$eq": "61b091ee9b50220e75208eb6"
}
}
}
},
"queryHash": "DCF50157",
"planCacheKey": "DCF50157",
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"stage": "FETCH",
"filter": {
"members": {
"$elemMatch": {
"userId": {
"$eq": "61b091ee9b50220e75208eb6"
}
}
}
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"members.userId": 1
},
"indexName": "members.userId_1",
"isMultiKey": true,
"multiKeyPaths": {
"members.userId": [
"members"
]
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"members.userId": [
"[ObjectId('61b091ee9b50220e75208eb6'), ObjectId('61b091ee9b50220e75208eb6')]"
]
}
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 17,
"executionTimeMillis": 0,
"totalKeysExamined": 17,
"totalDocsExamined": 17,
"executionStages": {
"stage": "FETCH",
"filter": {
"members": {
"$elemMatch": {
"userId": {
"$eq": "61b091ee9b50220e75208eb6"
}
}
}
},
"nReturned": 17,
"executionTimeMillisEstimate": 0,
"works": 18,
"advanced": 17,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsExamined": 17,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 17,
"executionTimeMillisEstimate": 0,
"works": 18,
"advanced": 17,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"keyPattern": {
"members.userId": 1
},
"indexName": "members.userId_1",
"isMultiKey": true,
"multiKeyPaths": {
"members.userId": [
"members"
]
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"members.userId": [
"[ObjectId('61b091ee9b50220e75208eb6'), ObjectId('61b091ee9b50220e75208eb6')]"
]
},
"keysExamined": 17,
"seeks": 1,
"dupsTested": 17,
"dupsDropped": 0
}
},
"allPlansExecution": []
},
"command": {
"find": "groups",
"filter": {
"members": {
"$elemMatch": {
"userId": "61b091ee9b50220e75208eb6"
}
}
},
"projection": {},
"readConcern": {
"level": "majority"
},
"$db": "***"
},
"serverInfo": {
"host": "***",
"port": 27017,
"version": "6.0.3",
"gitVersion": "f803681c3ae19817d31958965850193de067c516"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
},
"ok": 1,
"operationTime": {
"$timestamp": "7168789227251957761"
}
}
Under load the query takes 300-400ms, which is not acceptable for us.
However right now we don't really know anymore what would be the best next step in improving the solution. Mongo does not advise any additional indexes or schema improvements at this moment.
What can we do best to get this query really performand?

Related

Mongodb - executionStats executionTimeMillis way longer than cumulative of executionTimeMillisEstimate from each stage

I have a mongo aggregation with multiple stages and part of the $match stage is a geoWithin operation across a large set of points.
I was analyzing the aggregation using explain with executionStats and noticed that the execution stats of winning plan had each stage with very low executionTimeMillisEstimate but the overall executionTimeMillis was massive. I'm talking about ~150 fold difference.
I noticed that the queryPlanner has a rejected plan with query utilizing all minor indexes instead of just the location index for geoWithin, which is what is used in the winning plan. But since the winning plan gets cached I didn't think it should matter much.
But again the difference b/w Time is too large to be just because of the rejected plan construction, what else could be the reason for this?
Execution Plan:
{
"executionSuccess": true,
"nReturned": 101,
"executionTimeMillis": 85264,
"totalKeysExamined": 196,
"totalDocsExamined": 315,
"executionStages": {
"stage": "FETCH",
"filter": {
"$and": [{
"something": {
"$eq": "a"
}
},
{
"other": {
"$eq": "abc"
}
}
]
},
"nReturned": 101,
"executionTimeMillisEstimate": 312,
"works": 196,
"advanced": 101,
"needTime": 95,
"needYield": 0,
"saveState": 88,
"restoreState": 88,
"isEOF": 0,
"docsExamined": 150,
"alreadyHasObj": 150,
"inputStage": {
"stage": "FETCH",
"filter": {
"$or": [{
"location": {
"$geoWithin": {
"$centerSphere": [
[
0,
1
],
0.0000783927971443699
]
}
}
},
{},
{}
]
},
"nReturned": 150,
"executionTimeMillisEstimate": 312,
"works": 196,
"advanced": 150,
"needTime": 46,
"needYield": 0,
"saveState": 88,
"restoreState": 88,
"isEOF": 0,
"docsExamined": 165,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 165,
"executionTimeMillisEstimate": 0,
"works": 196,
"advanced": 165,
"needTime": 31,
"needYield": 0,
"saveState": 88,
"restoreState": 88,
"isEOF": 0,
"keyPattern": {
"location": "2dsphere"
},
"indexName": "location",
"isMultiKey": false,
"multiKeyPaths": {
"location": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"location": [
"[0, 1]",
""
]
},
"keysExamined": 196,
"seeks": 32,
"dupsTested": 0,
"dupsDropped": 0
}
}
}
}
The total executionTimeMillis includes a few things that are not accounted for in the individual plan, like:
Time spent planning
The query planner evaluates all of the candidate indexes and plans to determine which to test. This can take non-zero time per candidate plan, and adds to the total execution time.
Lock acquisition
When planning only a small subset of the index/documents are examined. Once the plan is selected, it is run to completion to obtain the execution stats. If Other operations are occurring that cause the query executor to wait on locks, this will increase the total time above the estimate
Disk latency
Similar to locks, if reading the documents from the disk is very fast during the planning stage, but considerably slower during execution, the overall time will be greater than the estimate
There are probably other considerations as well, If I think of any I'll add them here. If anyone else remembers one I've forgotten, please feel free to suggest an edit!

Mongodb compound index is not in use with query

Hi can somebody explain me please how to make compound index for this simple query?
find({
"userId":{"$in":["5c778c4f52732c06d3384269","5b274b672a35d168960b1bd5","5bc43d6552732c527345c35d","58ec836e899753a22b95fb19"]},
"end":{"$gte": ISODate('2019-05-03 11:05:00+02:00')}
})
I have done two compound indexes:
userId: 1 + end: 1
userId: 1 + end: -1
but query does not use any of them. What is wrong with this indexes?
Here is the result of explain()
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "WebApp.SessionLog",
"indexFilterSet": false,
"parsedQuery": {
"$and": [
{
"end": {
"$gte": ISODate("2019-05-03T07:05:00Z")
}
},
{
"userId": {
"$in": [
"58ec836e899753a22b95fb19",
"5b274b672a35d168960b1bd5",
"5bc43d6552732c527345c35d",
"5c778c4f52732c06d3384269"
]
}
}
]
},
"winningPlan": {
"stage": "FETCH",
"filter": {
"userId": {
"$in": [
"58ec836e899753a22b95fb19",
"5b274b672a35d168960b1bd5",
"5bc43d6552732c527345c35d",
"5c778c4f52732c06d3384269"
]
}
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"end": -1
},
"indexName": "end",
"isMultiKey": false,
"multiKeyPaths": {
"end": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"end": [
"[new Date(9223372036854775807), new Date(1556867100000)]"
]
}
}
},
"rejectedPlans": [
{
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"userId": 1,
"end": -1
},
"indexName": "userId_1_end_-1",
"isMultiKey": false,
"multiKeyPaths": {
"userId": [],
"end": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"userId": [
"[\"58ec836e899753a22b95fb19\", \"58ec836e899753a22b95fb19\"]",
"[\"5b274b672a35d168960b1bd5\", \"5b274b672a35d168960b1bd5\"]",
"[\"5bc43d6552732c527345c35d\", \"5bc43d6552732c527345c35d\"]",
"[\"5c778c4f52732c06d3384269\", \"5c778c4f52732c06d3384269\"]"
],
"end": [
"[new Date(9223372036854775807), new Date(1556867100000)]"
]
}
}
},
{
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"userId": 1,
"end": 1
},
"indexName": "userId_1_end_1",
"isMultiKey": false,
"multiKeyPaths": {
"userId": [],
"end": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"userId": [
"[\"58ec836e899753a22b95fb19\", \"58ec836e899753a22b95fb19\"]",
"[\"5b274b672a35d168960b1bd5\", \"5b274b672a35d168960b1bd5\"]",
"[\"5bc43d6552732c527345c35d\", \"5bc43d6552732c527345c35d\"]",
"[\"5c778c4f52732c06d3384269\", \"5c778c4f52732c06d3384269\"]"
],
"end": [
"[new Date(1556867100000), new Date(9223372036854775807)]"
]
}
}
},
{
"stage": "FETCH",
"filter": {
"end": {
"$gte": ISODate("2019-05-03T07:05:00Z")
}
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"userId": 1
},
"indexName": "userId",
"isMultiKey": false,
"multiKeyPaths": {
"userId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"userId": [
"[\"58ec836e899753a22b95fb19\", \"58ec836e899753a22b95fb19\"]",
"[\"5b274b672a35d168960b1bd5\", \"5b274b672a35d168960b1bd5\"]",
"[\"5bc43d6552732c527345c35d\", \"5bc43d6552732c527345c35d\"]",
"[\"5c778c4f52732c06d3384269\", \"5c778c4f52732c06d3384269\"]"
]
}
}
}
]
},
"serverInfo": {
"host": "mongo1",
"port": 9000,
"version": "3.4.2",
"gitVersion": "3f76e40c105fc223b3e5aac3e20dcd026b83b38b"
},
"ok": 1
}
here is the documents example:
{
"_id": ObjectId("56c49c147569943e7d4e92f4"),
"ip": "213.81.143.50",
"count": 17,
"end": ISODate("2014-12-02T08:39:56Z"),
"userId": "546dda33899753840584752b",
"date": ISODate("2014-12-02T08:36:47Z"),
"logs": [
{
"parameters": {
"action": "dashboard",
"id": null,
"package": ""
},
"ip": "213.81.143.50",
"date": ISODate("2014-12-02T08:36:47Z"),
"presenter": "Dashboard",
"action": "dashboard"
},
{
"parameters": {
"action": "dashboard",
"id": null,
"backlink": ""
},
"ip": "213.81.143.50",
"date": ISODate("2014-12-02T08:36:48Z"),
"presenter": "Dashboard",
"action": "dashboard"
}
]
}
Thanks.
PS: This PS text is only for Stackoverflow editor error.
In query explain result you can see "indexName": "end" under winningPlan.
This mean your query used index with end name.

How to fix Very Slow Query in mongodb

MongoDB shell version v4.0.5
: Very Slow Query | Taking time in fetching | Data Size : 2-3 Million documents in a collection and there are around 13-15 such collections.
System Configuration
: Architecture:
x86_64 CPU op-mode(s):
32-bit, 64-bit Byte Order:
Little Endian CPU(s):
6 On-line CPU(s) list:
0-5 Thread(s) per core:
1 Core(s) per socket:
6 Socket(s):
1 NUMA node(s):
1 Vendor ID:
GenuineIntel CPU family:
6 Model:
79 Stepping:
1 CPU MHz:
2097.570 BogoMIPS:
4195.14 Hypervisor vendor:
VMware Virtualization type:
full L1d cache:
32K L1i cache:
32K L2 cache:
256K L3 cache:
20480K NUMA node0 CPU(s): 0-5
MemTotal: 16432268 kB
I have done indexing on the tables and that is working fine but still, the system is taking a lot of time:
Query:
db.vnms_vccells_5.find({auid:"<1e000097>",
hub_ip:"10.252.0.105",sector_ip:"<1e000046>",
last_updated_time:{$gt:"2016-12-24 05:49:00"}})
.explain("executionStats")
Below is the query planner:
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "opennms.vnms_vccells_5",
"indexFilterSet": false,
"parsedQuery": {
"$and": [{
"auid": {
"$eq": "<1e000097>"
}
}, {
"hub_ip": {
"$eq": "10.252.0.105"
}
}, {
"sector_ip": {
"$eq": "<1e000046>"
}
}, {
"last_updated_time": {
"$gt": "2016-12-24 05:49:00"
}
}]
},
"winningPlan": {
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"auid": 1,
"hub_ip": 1,
"sector_ip": 1,
"last_updated_time": -1
},
"indexName": "auid_1_hub_ip_1_sector_ip_1_last_updated_time_-1",
"isMultiKey": false,
"multiKeyPaths": {
"auid": [],
"hub_ip": [],
"sector_ip": [],
"last_updated_time": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"auid": ["[\"<1e000097>\", \"<1e000097>\"]"],
"hub_ip": ["[\"10.252.0.105\", \"10.252.0.105\"]"],
"sector_ip": ["[\"<1e000046>\", \"<1e000046>\"]"],
"last_updated_time": ["({}, \"2016-12-24 05:49:00\")"]
}
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 28788,
"executionTimeMillis": 34989,
"totalKeysExamined": 28788,
"totalDocsExamined": 28788,
"executionStages": {
"stage": "FETCH",
"nReturned": 28788,
"executionTimeMillisEstimate": 34475,
"works": 28789,
"advanced": 28788,
"needTime": 0,
"needYield": 0,
"saveState": 1039,
"restoreState": 1039,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 28788,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 28788,
"executionTimeMillisEstimate": 7730,
"works": 28789,
"advanced": 28788,
"needTime": 0,
"needYield": 0,
"saveState": 1039,
"restoreState": 1039,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"auid": 1,
"hub_ip": 1,
"sector_ip": 1,
"last_updated_time": -1
},
"indexName": "auid_1_hub_ip_1_sector_ip_1_last_updated_time_-1",
"isMultiKey": false,
"multiKeyPaths": {
"auid": [],
"hub_ip": [],
"sector_ip": [],
"last_updated_time": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"auid": ["[\"<1e000097>\", \"<1e000097>\"]"],
"hub_ip": ["[\"10.252.0.105\", \"10.252.0.105\"]"],
"sector_ip": ["[\"<1e000046>\", \"<1e000046>\"]"],
"last_updated_time": ["({}, \"2016-12-24 05:49:00\")"]
},
"keysExamined": 28788,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0,
"seenInvalidated": 0
}
}
},
"serverInfo": {
"host": "vnms",
"port": 27017,
"version": "4.0.5",
"gitVersion": "3739429dd92b92d1b0ab120911a23d50bf03c412"
},
"ok": 1
}
mongostat data:
insert query update delete getmore command dirty used flushes vsize res qrw arw net_in net_out conn time
*0 *0 *0 *0 0 3|0 0.0% 80.0% 0 5.26G 3.52G 0|0 3|0 254b 101k 7 Jan 23 10:20:41.082
The query performance is limited by your resources. Here are a few tips
check your resources by following the Operations Checklists.
From the query plan, not only fetching docs was slow, examining indexes keys was also slow. You need to Ensure Indexes Fit in RAM.
Applying additional random indexes does not help and you can cause high numbers of page faults. You can use $indexStats to check your indexes usages.
Try MongoDB's Cloud Manager to help collect performance metrics. It offers free trial.
Query-wise, project only the fields you need instead of fetching the entire document.

MongoDB exact match on multiple document fields

I am trying to build a Python script using PyMongo that will be able to hit a Mongo DB that can get exact matches of n number of objects that may exist in the database. Currently, I have this setup:
db.entries.find({'$or': [<list-of-objects]})
Where the list of objects looks something like this:
[{'email': 'some#email.com', 'zip': '11111'}, {'email': 'another#email.com', 'zip': '11112'}, ...]
Using $or works okay when I have 10 or so items in the list. I am testing now with 100, and it is taking a very long time to return. I have considered using multiple $in filters, but I don't know if that is the best option.
I'm sure there is a better way to handle this, but I am fairly new to Mongo.
EDIT: Output of .explain() below:
{
"executionStats": {
"executionTimeMillis": 228734,
"nReturned": 2,
"totalKeysExamined": 0,
"allPlansExecution": [],
"executionSuccess": true,
"executionStages": {
"needYield": 0,
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"inputStage": {
"needYield": 0,
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"inputStage": {
"needYield": 0,
"direction": "forward",
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"docsExamined": 5453000,
"nReturned": 2,
"needTime": 5452999,
"filter": {
"$or": [{
"$and": [{
"email": {
"$eq": "some#email.com"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "another#email.com"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"executionTimeMillisEstimate": 208083,
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "COLLSCAN"
},
"nReturned": 2,
"needTime": 5452999,
"executionTimeMillisEstimate": 211503,
"transformBy": {
"_id": false
},
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "PROJECTION"
},
"nReturned": 2,
"needTime": 5452999,
"executionTimeMillisEstimate": 213671,
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "SUBPLAN"
},
"totalDocsExamined": 5453000
},
"queryPlanner": {
"parsedQuery": {
"$or": [{
"$and": [{
"email": {
"$eq": "some#email.com"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "another#email.com"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"rejectedPlans": [],
"namespace": "db.entries",
"winningPlan": {
"inputStage": {
"transformBy": {
"_id": false
},
"inputStage": {
"filter": {
"$or": [{
"$and": [{
"email": {
"$eq": "some#email.com"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "another#email.com"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"direction": "forward",
"stage": "COLLSCAN"
},
"stage": "PROJECTION"
},
"stage": "SUBPLAN"
},
"indexFilterSet": false,
"plannerVersion": 1
},
"ok": 1.0,
"serverInfo": {
"host": "somehost",
"version": "3.4.6",
"port": 27017,
"gitVersion": "c55eb86ef46ee7aede3b1e2a5d184a7df4bfb5b5"
}
}
I suggest to create a new index (a compound index) as in your case you are using two fields for your search:
db.entries.createIndex( {"email": 1, "zip": 1} )
Now run your query appending the explain() command at your query, you should see that instead of COLLSCAN it has started to use IXSCAN.
To avoid indexing and re-indexing (this query will not just pertain to email/zip, would be dynamic), I build the lists of data with each header and use them as $in arguments, then pass those into an $and. It seems to be working well enough and it hasn't queried longer than 3 minutes.
Example:
{'$and': [{'email': {'$in': ['some#example.com', 'fake#example.com', 'email#example.com']}, 'zipcode': {'$in': ['12345', '11111', '11112']}}]}

MongoDB $or query with index

I need make a query with indice at the mongodb, I will show below a minimal case example of my real case.
I have following collection with flowing data:
devsrv(mongod-3.0.4) test> db.teste.find()
{
"_id": ObjectId("57b324c341aaa4b930ef3b92"),
"a": 1,
"b": 1
}
{
"_id": ObjectId("57b324c941aaa4b930ef3b93"),
"a": 1,
"b": 2
}
{
"_id": ObjectId("57b324cd41aaa4b930ef3b94"),
"a": 1,
"b": 3
}
{
"_id": ObjectId("57b324d141aaa4b930ef3b95"),
"a": 1,
"b": 4
}
{
"_id": ObjectId("57b324d541aaa4b930ef3b96"),
"a": 1,
"b": 5
}
{
"_id": ObjectId("57b324da41aaa4b930ef3b97"),
"a": 1,
"b": 6
}
{
"_id": ObjectId("57b324df41aaa4b930ef3b98"),
"a": 1,
"b": 7
}
{
"_id": ObjectId("57b324e441aaa4b930ef3b99"),
"a": 1,
"b": 8
}
{
"_id": ObjectId("57b324f341aaa4b930ef3b9a"),
"a": 1,
"b": ""
}
{
"_id": ObjectId("57b324f641aaa4b930ef3b9b"),
"a": 1,
"b": " "
}
{
"_id": ObjectId("57b324fc41aaa4b930ef3b9c"),
"a": 1,
"b": null
}
{
"_id": ObjectId("57b3250341aaa4b930ef3b9d"),
"a": 1
}
{
"_id": ObjectId("57b46ace41aaa4b930ef3b9e"),
"a": 2
}
And I have the following indexes:
devsrv(mongod-3.0.4) test> db.teste.getIndexes()
[
{
"v": 1,
"key": {
"_id": 1
},
"name": "_id_",
"ns": "test.teste"
},
{
"v": 1,
"key": {
"a": 1,
"b": 1
},
"name": "a_1_b_1",
"ns": "test.teste"
},
{
"v": 1,
"key": {
"b": 1
},
"name": "b_1",
"ns": "test.teste"
}
]
And I need make a query equal this:
devsrv(mongod-3.0.4) test> db.teste.find({$or:[{"b":null},{"b":""},{"b":" "},{"b":{$lt:3}}],"a":1}).explain("executionStats")
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.teste",
"indexFilterSet": false,
"parsedQuery": {
"$and": [
{
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
{
"a": {
"$eq": 1
}
}
]
},
"winningPlan": {
"stage": "FETCH",
"filter": {
"a": {
"$eq": 1
}
},
"inputStage": {
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"b": 1
},
"indexName": "b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"b": [
"[null, null]",
"[-inf.0, 3.0)",
"[\"\", \"\"]",
"[\" \", \" \"]"
]
}
}
}
},
"rejectedPlans": [
{
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"a": 1,
"b": 1
},
"indexName": "a_1_b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"a": [
"[1.0, 1.0]"
],
"b": [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"executionStats": {
"executionSuccess": true,
"nReturned": 6,
"executionTimeMillis": 0,
"totalKeysExamined": 8,
"totalDocsExamined": 14,
"executionStages": {
"stage": "FETCH",
"filter": {
"a": {
"$eq": 1
}
},
"nReturned": 6,
"executionTimeMillisEstimate": 0,
"works": 10,
"advanced": 6,
"needTime": 2,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 7,
"alreadyHasObj": 7,
"inputStage": {
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"nReturned": 7,
"executionTimeMillisEstimate": 0,
"works": 8,
"advanced": 7,
"needTime": 1,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 7,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 7,
"executionTimeMillisEstimate": 0,
"works": 8,
"advanced": 7,
"needTime": 1,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"b": 1
},
"indexName": "b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"b": [
"[null, null]",
"[-inf.0, 3.0)",
"[\"\", \"\"]",
"[\" \", \" \"]"
]
},
"keysExamined": 8,
"dupsTested": 0,
"dupsDropped": 0,
"seenInvalidated": 0,
"matchTested": 0
}
}
}
},
"serverInfo": {
"host": "devsrv",
"port": 27017,
"version": "3.0.4",
"gitVersion": "0481c958daeb2969800511e7475dc66986fa9ed5"
},
"ok": 1
}
But MongoDB isn't using the two indexes together.
Each $or term is effectively a separate query, so it helps to structure your query so that each term aligns with the index you're hoping to use. In this case that means moving the a: 1 part inside of each $or term:
db.teste.find({
$or:[
{a: 1, b: null},
{a: 1, b: ""},
{a: 1, b: " "},
{a: 1, b: {$lt: 3}}
]}).explain('executionStats')
The explain output shows that the a_1_b_1 is used for this query.
But you can simplify this a bit more by using $in to combine the first three terms into one:
db.teste.find({
$or:[
{a: 1, b: {$in: [null, "", " "]}},
{a: 1, b: {$lt: 3}}
]}).explain('executionStats')
This is also able to use the a_1_b_1 index.
The code is
db.teste.explain("executionStats").find({a: 1,
$or:[{b: null},
{b: ""},
{b: " "},
{b: {$lt:3}}]
}).hint({a: 1, b: 1})
Be careful with the hint command as the query optimizer chooses the most efficient query by measuring actual performance of the query with every suitable index.