I need make a query with indice at the mongodb, I will show below a minimal case example of my real case.
I have following collection with flowing data:
devsrv(mongod-3.0.4) test> db.teste.find()
{
"_id": ObjectId("57b324c341aaa4b930ef3b92"),
"a": 1,
"b": 1
}
{
"_id": ObjectId("57b324c941aaa4b930ef3b93"),
"a": 1,
"b": 2
}
{
"_id": ObjectId("57b324cd41aaa4b930ef3b94"),
"a": 1,
"b": 3
}
{
"_id": ObjectId("57b324d141aaa4b930ef3b95"),
"a": 1,
"b": 4
}
{
"_id": ObjectId("57b324d541aaa4b930ef3b96"),
"a": 1,
"b": 5
}
{
"_id": ObjectId("57b324da41aaa4b930ef3b97"),
"a": 1,
"b": 6
}
{
"_id": ObjectId("57b324df41aaa4b930ef3b98"),
"a": 1,
"b": 7
}
{
"_id": ObjectId("57b324e441aaa4b930ef3b99"),
"a": 1,
"b": 8
}
{
"_id": ObjectId("57b324f341aaa4b930ef3b9a"),
"a": 1,
"b": ""
}
{
"_id": ObjectId("57b324f641aaa4b930ef3b9b"),
"a": 1,
"b": " "
}
{
"_id": ObjectId("57b324fc41aaa4b930ef3b9c"),
"a": 1,
"b": null
}
{
"_id": ObjectId("57b3250341aaa4b930ef3b9d"),
"a": 1
}
{
"_id": ObjectId("57b46ace41aaa4b930ef3b9e"),
"a": 2
}
And I have the following indexes:
devsrv(mongod-3.0.4) test> db.teste.getIndexes()
[
{
"v": 1,
"key": {
"_id": 1
},
"name": "_id_",
"ns": "test.teste"
},
{
"v": 1,
"key": {
"a": 1,
"b": 1
},
"name": "a_1_b_1",
"ns": "test.teste"
},
{
"v": 1,
"key": {
"b": 1
},
"name": "b_1",
"ns": "test.teste"
}
]
And I need make a query equal this:
devsrv(mongod-3.0.4) test> db.teste.find({$or:[{"b":null},{"b":""},{"b":" "},{"b":{$lt:3}}],"a":1}).explain("executionStats")
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.teste",
"indexFilterSet": false,
"parsedQuery": {
"$and": [
{
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
{
"a": {
"$eq": 1
}
}
]
},
"winningPlan": {
"stage": "FETCH",
"filter": {
"a": {
"$eq": 1
}
},
"inputStage": {
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"b": 1
},
"indexName": "b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"b": [
"[null, null]",
"[-inf.0, 3.0)",
"[\"\", \"\"]",
"[\" \", \" \"]"
]
}
}
}
},
"rejectedPlans": [
{
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"a": 1,
"b": 1
},
"indexName": "a_1_b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"a": [
"[1.0, 1.0]"
],
"b": [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"executionStats": {
"executionSuccess": true,
"nReturned": 6,
"executionTimeMillis": 0,
"totalKeysExamined": 8,
"totalDocsExamined": 14,
"executionStages": {
"stage": "FETCH",
"filter": {
"a": {
"$eq": 1
}
},
"nReturned": 6,
"executionTimeMillisEstimate": 0,
"works": 10,
"advanced": 6,
"needTime": 2,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 7,
"alreadyHasObj": 7,
"inputStage": {
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"nReturned": 7,
"executionTimeMillisEstimate": 0,
"works": 8,
"advanced": 7,
"needTime": 1,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 7,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 7,
"executionTimeMillisEstimate": 0,
"works": 8,
"advanced": 7,
"needTime": 1,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"b": 1
},
"indexName": "b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"b": [
"[null, null]",
"[-inf.0, 3.0)",
"[\"\", \"\"]",
"[\" \", \" \"]"
]
},
"keysExamined": 8,
"dupsTested": 0,
"dupsDropped": 0,
"seenInvalidated": 0,
"matchTested": 0
}
}
}
},
"serverInfo": {
"host": "devsrv",
"port": 27017,
"version": "3.0.4",
"gitVersion": "0481c958daeb2969800511e7475dc66986fa9ed5"
},
"ok": 1
}
But MongoDB isn't using the two indexes together.
Each $or term is effectively a separate query, so it helps to structure your query so that each term aligns with the index you're hoping to use. In this case that means moving the a: 1 part inside of each $or term:
db.teste.find({
$or:[
{a: 1, b: null},
{a: 1, b: ""},
{a: 1, b: " "},
{a: 1, b: {$lt: 3}}
]}).explain('executionStats')
The explain output shows that the a_1_b_1 is used for this query.
But you can simplify this a bit more by using $in to combine the first three terms into one:
db.teste.find({
$or:[
{a: 1, b: {$in: [null, "", " "]}},
{a: 1, b: {$lt: 3}}
]}).explain('executionStats')
This is also able to use the a_1_b_1 index.
The code is
db.teste.explain("executionStats").find({a: 1,
$or:[{b: null},
{b: ""},
{b: " "},
{b: {$lt:3}}]
}).hint({a: 1, b: 1})
Be careful with the hint command as the query optimizer chooses the most efficient query by measuring actual performance of the query with every suitable index.
Related
We're trying to optimise our read performance on our MongoDB cluster.
We serve a social media like application where users are member of 1 or multiple groups.
We were storing who is in which group and whether he/she is an admin of that group in a separate collection. However we noticed it was quite slow to retrieve the group information for the groups the user is member of. (find(+filter) groupMember documents, populate the groups).
Therefor we recently migrated all the group members to an array on the group collection documents itself.
The schema now looks as following:
The query we execute is simply:
this.model.find({
members: {
$elemMatch: {
userId: new ObjectId(userId),
},
},
})
We expected this to be much more performed because you don't need to populate/lookup anything. The opposite is true however, after deploying this change we noticed a performance decrease.
We have around 40k group documents where the largest groups have around 3k members, most groups are much smaller however.
The groups are indexed and the index is also used. This is an explain plan:
{
"explainVersion": "1",
"queryPlanner": {
"namespace": "***.groups",
"indexFilterSet": false,
"parsedQuery": {
"members": {
"$elemMatch": {
"userId": {
"$eq": "61b091ee9b50220e75208eb6"
}
}
}
},
"queryHash": "DCF50157",
"planCacheKey": "DCF50157",
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"stage": "FETCH",
"filter": {
"members": {
"$elemMatch": {
"userId": {
"$eq": "61b091ee9b50220e75208eb6"
}
}
}
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"members.userId": 1
},
"indexName": "members.userId_1",
"isMultiKey": true,
"multiKeyPaths": {
"members.userId": [
"members"
]
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"members.userId": [
"[ObjectId('61b091ee9b50220e75208eb6'), ObjectId('61b091ee9b50220e75208eb6')]"
]
}
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 17,
"executionTimeMillis": 0,
"totalKeysExamined": 17,
"totalDocsExamined": 17,
"executionStages": {
"stage": "FETCH",
"filter": {
"members": {
"$elemMatch": {
"userId": {
"$eq": "61b091ee9b50220e75208eb6"
}
}
}
},
"nReturned": 17,
"executionTimeMillisEstimate": 0,
"works": 18,
"advanced": 17,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsExamined": 17,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 17,
"executionTimeMillisEstimate": 0,
"works": 18,
"advanced": 17,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"keyPattern": {
"members.userId": 1
},
"indexName": "members.userId_1",
"isMultiKey": true,
"multiKeyPaths": {
"members.userId": [
"members"
]
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"members.userId": [
"[ObjectId('61b091ee9b50220e75208eb6'), ObjectId('61b091ee9b50220e75208eb6')]"
]
},
"keysExamined": 17,
"seeks": 1,
"dupsTested": 17,
"dupsDropped": 0
}
},
"allPlansExecution": []
},
"command": {
"find": "groups",
"filter": {
"members": {
"$elemMatch": {
"userId": "61b091ee9b50220e75208eb6"
}
}
},
"projection": {},
"readConcern": {
"level": "majority"
},
"$db": "***"
},
"serverInfo": {
"host": "***",
"port": 27017,
"version": "6.0.3",
"gitVersion": "f803681c3ae19817d31958965850193de067c516"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
},
"ok": 1,
"operationTime": {
"$timestamp": "7168789227251957761"
}
}
Under load the query takes 300-400ms, which is not acceptable for us.
However right now we don't really know anymore what would be the best next step in improving the solution. Mongo does not advise any additional indexes or schema improvements at this moment.
What can we do best to get this query really performand?
I'm having documents that are having this structures
x = {
"scalar": 1,
"array": [
{"key": 1, "value": 2},
{"key": 2, "value": 3},
],
"array2": [
{"key": 1, "value": 2},
{"key": 2, "value": 3},
],
}
and
y = {
"scalar": 2,
"array": [
{"key": 1, "value": 3},
{"key": 3, "value": 0},
],
"array2": [
{"key": 1, "value": 3},
{"key": 3, "value": 0},
],
}
The end results I'm trying to find is this
{
"scalar": 3, # SUM of scalar
"array": [
{"key": 1, "value": 5}, # SUM by key = 1
{"key": 2, "value": 3},
{"key": 3, "value": 0},
],
"array2": [
{"key": 1, "value": 5}, # SUM by key = 1
{"key": 2, "value": 3},
{"key": 3, "value": 0},
],
}
I've tried to use double $unwind and then do push by. I'm thinking of using $reduce to get the final results
Query
one way to do it, is by facet, you want 3 groupings and facet can do that , like break into 3 seperate parts, to not mix the unwinds, i think this is the most simple way to do it
Test code here
db.collection.aggregate([
{
"$facet": {
"scalar": [
{
"$project": {
"scalar": 1
}
},
{
"$group": {
"_id": null,
"sum": {
"$sum": "$scalar"
}
}
},
{
"$unset": [
"_id"
]
}
],
"array": [
{
"$project": {
"array": 1
}
},
{
"$unwind": {
"path": "$array"
}
},
{
"$group": {
"_id": "$array.key",
"sum": {
"$sum": "$array.value"
}
}
},
{
"$project": {
"_id": 0,
"key": "$_id",
"value": "$sum"
}
}
],
"array2": [
{
"$project": {
"array2": 1
}
},
{
"$unwind": {
"path": "$array2"
}
},
{
"$group": {
"_id": "$array2.key",
"sum": {
"$sum": "$array2.value"
}
}
},
{
"$project": {
"_id": 0,
"key": "$_id",
"value": "$sum"
}
}
]
}
},
{
"$set": {
"scalar": {
"$arrayElemAt": [
"$scalar.sum",
0
]
}
}
}
])
Other alternative is to unwind both arrays, but then unwinds and groups will be mixed, making things complicated i think.
Also $reduce cant be used for grouping in MongoDB i think, because we can't construct dynamic paths.
If group-reduce and have this data (key=key value=value)
{"1" : 5 , "2" : 3}
And we see {"key" 1, "value" : 5} how we can check if the above data contains the 1 as key? We cant construct dynamic paths, like $$this.1 . Only way it to convert it to an array and back to object that will be so slow.
Hi can somebody explain me please how to make compound index for this simple query?
find({
"userId":{"$in":["5c778c4f52732c06d3384269","5b274b672a35d168960b1bd5","5bc43d6552732c527345c35d","58ec836e899753a22b95fb19"]},
"end":{"$gte": ISODate('2019-05-03 11:05:00+02:00')}
})
I have done two compound indexes:
userId: 1 + end: 1
userId: 1 + end: -1
but query does not use any of them. What is wrong with this indexes?
Here is the result of explain()
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "WebApp.SessionLog",
"indexFilterSet": false,
"parsedQuery": {
"$and": [
{
"end": {
"$gte": ISODate("2019-05-03T07:05:00Z")
}
},
{
"userId": {
"$in": [
"58ec836e899753a22b95fb19",
"5b274b672a35d168960b1bd5",
"5bc43d6552732c527345c35d",
"5c778c4f52732c06d3384269"
]
}
}
]
},
"winningPlan": {
"stage": "FETCH",
"filter": {
"userId": {
"$in": [
"58ec836e899753a22b95fb19",
"5b274b672a35d168960b1bd5",
"5bc43d6552732c527345c35d",
"5c778c4f52732c06d3384269"
]
}
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"end": -1
},
"indexName": "end",
"isMultiKey": false,
"multiKeyPaths": {
"end": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"end": [
"[new Date(9223372036854775807), new Date(1556867100000)]"
]
}
}
},
"rejectedPlans": [
{
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"userId": 1,
"end": -1
},
"indexName": "userId_1_end_-1",
"isMultiKey": false,
"multiKeyPaths": {
"userId": [],
"end": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"userId": [
"[\"58ec836e899753a22b95fb19\", \"58ec836e899753a22b95fb19\"]",
"[\"5b274b672a35d168960b1bd5\", \"5b274b672a35d168960b1bd5\"]",
"[\"5bc43d6552732c527345c35d\", \"5bc43d6552732c527345c35d\"]",
"[\"5c778c4f52732c06d3384269\", \"5c778c4f52732c06d3384269\"]"
],
"end": [
"[new Date(9223372036854775807), new Date(1556867100000)]"
]
}
}
},
{
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"userId": 1,
"end": 1
},
"indexName": "userId_1_end_1",
"isMultiKey": false,
"multiKeyPaths": {
"userId": [],
"end": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"userId": [
"[\"58ec836e899753a22b95fb19\", \"58ec836e899753a22b95fb19\"]",
"[\"5b274b672a35d168960b1bd5\", \"5b274b672a35d168960b1bd5\"]",
"[\"5bc43d6552732c527345c35d\", \"5bc43d6552732c527345c35d\"]",
"[\"5c778c4f52732c06d3384269\", \"5c778c4f52732c06d3384269\"]"
],
"end": [
"[new Date(1556867100000), new Date(9223372036854775807)]"
]
}
}
},
{
"stage": "FETCH",
"filter": {
"end": {
"$gte": ISODate("2019-05-03T07:05:00Z")
}
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"userId": 1
},
"indexName": "userId",
"isMultiKey": false,
"multiKeyPaths": {
"userId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"userId": [
"[\"58ec836e899753a22b95fb19\", \"58ec836e899753a22b95fb19\"]",
"[\"5b274b672a35d168960b1bd5\", \"5b274b672a35d168960b1bd5\"]",
"[\"5bc43d6552732c527345c35d\", \"5bc43d6552732c527345c35d\"]",
"[\"5c778c4f52732c06d3384269\", \"5c778c4f52732c06d3384269\"]"
]
}
}
}
]
},
"serverInfo": {
"host": "mongo1",
"port": 9000,
"version": "3.4.2",
"gitVersion": "3f76e40c105fc223b3e5aac3e20dcd026b83b38b"
},
"ok": 1
}
here is the documents example:
{
"_id": ObjectId("56c49c147569943e7d4e92f4"),
"ip": "213.81.143.50",
"count": 17,
"end": ISODate("2014-12-02T08:39:56Z"),
"userId": "546dda33899753840584752b",
"date": ISODate("2014-12-02T08:36:47Z"),
"logs": [
{
"parameters": {
"action": "dashboard",
"id": null,
"package": ""
},
"ip": "213.81.143.50",
"date": ISODate("2014-12-02T08:36:47Z"),
"presenter": "Dashboard",
"action": "dashboard"
},
{
"parameters": {
"action": "dashboard",
"id": null,
"backlink": ""
},
"ip": "213.81.143.50",
"date": ISODate("2014-12-02T08:36:48Z"),
"presenter": "Dashboard",
"action": "dashboard"
}
]
}
Thanks.
PS: This PS text is only for Stackoverflow editor error.
In query explain result you can see "indexName": "end" under winningPlan.
This mean your query used index with end name.
I have collection of user, and this is the following of documents :
{ "_id": 1, "name": "A", "online": 1, "like": 10, "score": 1 },
{ "_id": 2, "name": "B", "online": 0, "like": 9, "score": 0 },
{ "_id": 3, "name": "C", "online": 0, "like": 8, "score": 1 },
{ "_id": 4, "name": "D", "online": 1, "like": 8, "score": 0 },
{ "_id": 5, "name": "E", "online": 1, "like": 7, "score": 1 },
{ "_id": 6, "name": "F", "online": 0, "like": 10, "score": 1 },
{ "_id": 7, "name": "G", "online": 0, "like": 5, "score": 0 },
{ "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 }
{ "_id": 9, "name": "I", "online": 0, "like": 6, "score": 0 }
I want to show the list of users with some of criterias and ordering with some conditons, online users and most liked in the top of the list,after online user list is show offline users with most scored & most liked. The following of rules :
If online is 1 must be sort by descending of like.
If online is 0 and score is 1 must be sort by descending of score.
If online is 0 and score is 0 must be sort by descending of like.
So, the result can be like :
{ "_id": 1, "name": "A", "online": 1, "like": 10, "score": 1 },
{ "_id": 4, "name": "D", "online": 1, "like": 8, "score": 0 },
{ "_id": 5, "name": "E", "online": 1, "like": 7, "score": 1 },
{ "_id": 6, "name": "F", "online": 0, "like": 10, "score": 1 },
{ "_id": 3, "name": "C", "online": 0, "like": 8, "score": 1 },
{ "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 }
{ "_id": 2, "name": "B", "online": 0, "like": 9, "score": 0 },
{ "_id": 9, "name": "I", "online": 0, "like": 6, "score": 0 },
{ "_id": 7, "name": "G", "online": 0, "like": 5, "score": 0 }
I have finished until point 2, my query following :
db.users.aggregate([
{
$project :
{
"id" : 1,
"name" : 1,
"online: 1,
"like" : 1,
"score" : 1,
"sort" : {
$cond:
{
"if" :
{
$eq : ["$online", true]
},
"then" : "$like",
"else" : "$score"
}
}
}
},
{
$sort :
{
"online" : -1,
"sort" : -1,
"id" : 1
}
},
{
$skip : 0
},
{
$limit : 9
}
])
But I have the current result following :
{ "_id": 1, "name": "A", "online": 1, "like": 10, "score": 1 },
{ "_id": 4, "name": "D", "online": 1, "like": 8, "score": 0 },
{ "_id": 5, "name": "E", "online": 1, "like": 7, "score": 1 },
{ "_id": 6, "name": "F", "online": 0, "like": 10, "score": 1 },
{ "_id": 3, "name": "C", "online": 0, "like": 8, "score": 1 },
{ "_id": 2, "name": "B", "online": 0, "like": 9, "score": 0 },
{ "_id": 7, "name": "G", "online": 0, "like": 5, "score": 0 },
{ "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 }
{ "_id": 9, "name": "I", "online": 0, "like": 6, "score": 0 },
You can see, based on point 3, instance { "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 } should be on top with score is 0
First create additional column call point with value (1 - online)*score
After this sort data by:
online desc
point desc (online = 1 pointbe always 0, online is 0 point is score)
like desc
You can use this query
db.yourtable.aggregate(
[
{ $project:{
"id" : 1,
"name" : 1,
"online": 1,
"like" : 1,
"score" : 1,
point: { $multiply: [
{$subtract: [1,"$online"]}
, "$score"
]}
}
}
,{ $sort : { online: -1, point : -1, like : -1 } }
]
);
Please check below query :
db.getCollection('yourTable').aggregate([
{
$project :
{
"id" : 1,
"name" : 1,
"online": 1,
"like" : 1,
"score" : 1,
onlineSortLike: {
$cond: {
if: { $and: [{ $eq: ['$online',1 ] }] },
then: '$like',
else: 0,
},
},
sortOfflineScore: {
$cond: {
if: { $and: [{ $eq: ['$online',0] }] },
then: '$score',
else: 0,
},
},
sortOfflineScoreLike: {
$cond: {
if: { $and: [{ $eq: ['$online', 0] }] },
then: '$like',
else: 0,
},
},
}
},
{
$sort :
{
"online" : -1,
"onlineSortLike" : -1,
"sortOfflineScore" : -1,
"sortOfflineScoreLike" : -1
}
},
{
$skip : 0
},
{
$limit : 9
}
])
I am trying to build a Python script using PyMongo that will be able to hit a Mongo DB that can get exact matches of n number of objects that may exist in the database. Currently, I have this setup:
db.entries.find({'$or': [<list-of-objects]})
Where the list of objects looks something like this:
[{'email': 'some#email.com', 'zip': '11111'}, {'email': 'another#email.com', 'zip': '11112'}, ...]
Using $or works okay when I have 10 or so items in the list. I am testing now with 100, and it is taking a very long time to return. I have considered using multiple $in filters, but I don't know if that is the best option.
I'm sure there is a better way to handle this, but I am fairly new to Mongo.
EDIT: Output of .explain() below:
{
"executionStats": {
"executionTimeMillis": 228734,
"nReturned": 2,
"totalKeysExamined": 0,
"allPlansExecution": [],
"executionSuccess": true,
"executionStages": {
"needYield": 0,
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"inputStage": {
"needYield": 0,
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"inputStage": {
"needYield": 0,
"direction": "forward",
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"docsExamined": 5453000,
"nReturned": 2,
"needTime": 5452999,
"filter": {
"$or": [{
"$and": [{
"email": {
"$eq": "some#email.com"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "another#email.com"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"executionTimeMillisEstimate": 208083,
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "COLLSCAN"
},
"nReturned": 2,
"needTime": 5452999,
"executionTimeMillisEstimate": 211503,
"transformBy": {
"_id": false
},
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "PROJECTION"
},
"nReturned": 2,
"needTime": 5452999,
"executionTimeMillisEstimate": 213671,
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "SUBPLAN"
},
"totalDocsExamined": 5453000
},
"queryPlanner": {
"parsedQuery": {
"$or": [{
"$and": [{
"email": {
"$eq": "some#email.com"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "another#email.com"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"rejectedPlans": [],
"namespace": "db.entries",
"winningPlan": {
"inputStage": {
"transformBy": {
"_id": false
},
"inputStage": {
"filter": {
"$or": [{
"$and": [{
"email": {
"$eq": "some#email.com"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "another#email.com"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"direction": "forward",
"stage": "COLLSCAN"
},
"stage": "PROJECTION"
},
"stage": "SUBPLAN"
},
"indexFilterSet": false,
"plannerVersion": 1
},
"ok": 1.0,
"serverInfo": {
"host": "somehost",
"version": "3.4.6",
"port": 27017,
"gitVersion": "c55eb86ef46ee7aede3b1e2a5d184a7df4bfb5b5"
}
}
I suggest to create a new index (a compound index) as in your case you are using two fields for your search:
db.entries.createIndex( {"email": 1, "zip": 1} )
Now run your query appending the explain() command at your query, you should see that instead of COLLSCAN it has started to use IXSCAN.
To avoid indexing and re-indexing (this query will not just pertain to email/zip, would be dynamic), I build the lists of data with each header and use them as $in arguments, then pass those into an $and. It seems to be working well enough and it hasn't queried longer than 3 minutes.
Example:
{'$and': [{'email': {'$in': ['some#example.com', 'fake#example.com', 'email#example.com']}, 'zipcode': {'$in': ['12345', '11111', '11112']}}]}