mongodb find $text search query is changed - mongodb

So I am running into some weird behavior in my mongo environment when using the $text operator. I am getting results back when I am expecting none. I expect the string ABCD to match, but there are no entries that contain ACBDE. a wild card index for text is defined('$**': text)
As you can see in the explain the parsed text query is modifying what I have in $search. Any ideas of what might be going on?
> db.catalogentries.find({$text: {$search: 'abcde'}}).pretty().explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.catalogentries",
"indexFilterSet" : false,
"parsedQuery" : {
"$text" : {
"$search" : "abcde",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
},
"winningPlan" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "$**_text",
"parsedTextQuery" : {
"terms" : [
"abcd"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "TEXT_OR",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "$**_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
}
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "mongo01",
"port" : 27017,
"version" : "3.2.4",
"gitVersion" : "e2ee9ffcf9f5a94fad76802e28cc978718bb7a30"
},
"ok" : 1
}
It also should be noted that if I search ACBDF, ABCDD, or ABC that I get no results as expected.

If your search term isn't an actual word, then you should add $language: 'none' to your query to disable word stemming that can lead to unexpected results like this.
So change your query to:
db.catalogentries.find({$text: {$search: 'abcde', $language: 'none'}})

Related

Why aggregation framework is slower than simple find query

I am new to mongodb and came across some strange behaviour of aggregation framework.
I have a collection named 'billingData', this collection has approximately 2M documents.
I am comparing two queries which give me same output but their execution time different.
Query 1:
db.billingData.find().sort({"_id":-1}).skip(100000).limit(50)
Execution Plan:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "billingDetails.billingData",
"indexFilterSet" : false,
"parsedQuery" : {},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 50,
"inputStage" : {
"stage" : "SKIP",
"skipAmount" : 100000,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : []
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
"rejectedPlans" : []
},
"serverInfo" : {
"host" : "ip-172-60-62-125",
"port" : 27017,
"version" : "3.6.3",
"gitVersion" : "9586e557d54ef70f9ca4b43c26892cd55257e1a5"
},
"ok" : 1.0
}
Query 2:
db.billingData.aggregate([
{$sort : {"_id":-1}},
{$skip:100000},
{$limit:50}
])
Execution Plan:
{
"stages" : [
{
"$cursor" : {
"query" : {},
"sort" : {
"_id" : -1
},
"limit" : NumberLong(100050),
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "billingDetails.billingData",
"indexFilterSet" : false,
"parsedQuery" : {},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : []
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"_id" : [
"[MaxKey, MinKey]"
]
}
}
},
"rejectedPlans" : []
}
}
},
{
"$skip" : NumberLong(100000)
}
],
"ok" : 1.0
}
I was expecting same results from aggregation framework and find query but find query returned results in 2sec and aggregation took 16sec.
Although in both the queries, I am sorting my documents in descending order(on the basis of _id) and fetching 50 records after skipping 100,000 records.
Can someone explain me why aggregation framework is working this way?
What can I do to make it performance wise similar to find query?

Explain why results from mongo are being returned in reverse ObjectId order?

I have a list of news article items which I am tagging for entities, and topic tags.
my query
db["fmetadata"].find({'$and': [{'$text': {'$search': 'apple trump'}}, {'$or':
[{'entities': {'$elemMatch': {'$regex': 'apple|trump'}}}, {'tags': {'$elemMatch': {'$regex': 'apple|trump'}}}]}]}).explain()
query plan
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "dfabric.fmetadata",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"entities" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
},
{
"tags" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
}
]
},
{
"$text" : {
"$search" : "apple trump",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"entities" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
},
{
"tags" : {
"$elemMatch" : {
"$regex" : "apple|trump"
}
}
}
]
},
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "title_text_tags_text_entities_text",
"parsedTextQuery" : {
"terms" : [
"appl",
"trump"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title_text_tags_text_entities_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title_text_tags_text_entities_text",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
}
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "fabric-dev",
"port" : 27017,
"version" : "4.0.2",
"gitVersion" : "fc1573ba18aee42f97a3bb13b67af7d837826b47"
},
"ok" : 1
}
I see that
["queryPlanner"]["winningPlan"]["inputStage"]["inputStage"]["inputStages"]
"stage": "IXSCAN"
"direction": "backward"
Can this please be explained why?
I was developing a pagination cursor using >lastId, and limit technique. But since, results are being returned backwards, I have to use < lastId which seems counterintuitive.
If I don't sort my results in the natural order, can it be guaranteed that it will always be backwards/reverse?
Edit: as mentioned in the comment below
My objective here is to get the intuition as to why the index was scanned backwards- is it the way I formulated my query? or something else entirely? The ordering- forwards or backwards doesn't matter as much as the consistency of it remaining always so does- either always forwards or vice versa
I came across this question on stackoverflow, and I believe the accepted answer, with the comments below satisfactorily gives me the intuition I was looking for.
How does MongoDB sort records when no sort order is specified?

Why is MongoDB treating queries differently when served from same index?

I have a collection where my documents looks like:
{
"_id" : ObjectId("591dbe4a77d4ede22d765250"),
"name" : [
{
"de" : true,
"text" : "Brunhilde"
},
{
"sk" : true,
"text" : "Šimon"
}
]
}
I have defined an index as:
> db.names.createIndex({ 'name.de': 1, 'name.text': 1 }, { name: 'name_de', partialFilterExpression: { 'name.de': { $exists: true } }, collation: { locale: 'de' } });
When I do a query like:
> db.names.find({ 'name.de': true, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
The explain plan looks like:
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"name.text" : {
"$eq" : "Rüdi"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[true, true]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
It does IXSCAN followed by FETCH stage with filter. I've already created an question about the filter here.
The more interesting is what will happen when I just change the matching part of the query to:
> db.names.find({ 'name.de': { $exists: true }, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
i.e. expression 'name.de': { $exists: true } should be still subset of partialFilterExpression. As stated in documentation:
To use the partial index, a query must contain the filter expression (or a modified filter expression that specifies a subset of the filter expression) as part of its query condition.
But the explain plan looks like this:
...
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"name.de" : {
"$exists" : true
}
},
{
"name.text" : {
"$eq" : "Rüdi"
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[MinKey, MaxKey]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
...
As you can see index is used, but the whole filtering is happening in FETCH stage.
Question is: why the filtering is done in FETCH stage and what is so different between these 2 queries that MongoDB them differently?
Additionaly, sort query with $exists as:
> db.names.find({ 'name.de': { $exists: true } }).sort({ 'name.text': 1 }).collation({ locale: "de" })
Behaves the same, whole filtering and sorting is done after IXSCAN stage:
...
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"name.text" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"name.de" : {
"$exists" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
}
...
It even produces the incorrect results, while index is not used for sorting.

MongoDB use index with $nin seems not to work in combination with $regex

It seems that my index in my MongoDB is not correct.
I have created 3 indexes. These:
{
_id: 1
}
{
isbn: 1
}
{
_id: 1,
isbn: 1
}
When doing a query with isbn or _id its working perfect. Even with isbn and _id. For example:
db.getCollection('books').find({
isbn: {
$regex: '^978048627.*'
},
_id: 'vGXejKQH5kw8Kfutk'
}
needs around 3ms.
But lets now say I want to search for an ISBN and need to exclude some _ids - I do this:
db.getCollection('books').find({
isbn: {
$regex: '^97804862731.*'
},
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Now its not working as it should. The query took more then 10 seconds!
When I do a isbn search without $regex but with $nin its works perfect - again around 3ms for the query. Example:
db.getCollection('books').find({
isbn: '9780486273136',
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Am I doing something wrong ? And why the index is not working correctly as it should ?
Here is the .explain() output when querying the 10 seconds query:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}
Solution
My solution - I do not know why - but is to use $and and $ne instead of $nin.
My query looks like this now:
db.getCollection('books').find({isbn:{$regex: '^97804862731.*'}, $and: [
{
_id: {
$ne: 'vGXejKQH5kw8Kfutk'
}
},
{
_id: {
$ne: 'another-id'
}
}
]})
and just takes around 3ms
Maybe someone can explain how this can happen ?
The explain() of this query
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}

Sorting with $in not returning all docs

I have the following query.
db.getCollection('logs').find({'uid.$id': {
'$in': [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]}, levelno: { '$gte': 10 }
}).sort({_id: 1})
This should return 1847 documents. However, when executing it, I only get 1000 documents, which is the cursor's batchSize and then the cursor closes (setting its cursorId to 0), as if all documents were returned.
If I take out the sorting, then I get all 1847 documents.
So my question is, why does it silently fail when using sorting with the $in operator?
EDIT
Using explain gives the following output
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "session.logs",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"levelno" : {
"$gte" : 10
}
},
{
"uid.$id" : {
"$in" : [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]
}
}
]
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"uid.$id" : 1,
"levelno" : 1,
"_id" : 1
},
"indexName" : "uid.$id_1_levelno_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"uid.$id" : [
"[ObjectId('580e3397812de36b86d68c04'), ObjectId('580e3397812de36b86d68c04')]",
"[ObjectId('580e339a812de36b86d68c08'), ObjectId('580e339a812de36b86d68c08')]",
"[ObjectId('580e339a812de36b86d68c09'), ObjectId('580e339a812de36b86d68c09')]",
"[ObjectId('580e33a9812de36b86d68c0a'), ObjectId('580e33a9812de36b86d68c0a')]",
"[ObjectId('580e33a9812de36b86d68c0b'), ObjectId('580e33a9812de36b86d68c0b')]",
"[ObjectId('580e33bd812de36b86d68c11'), ObjectId('580e33bd812de36b86d68c11')]",
"[ObjectId('580e33c0812de36b86d68c13'), ObjectId('580e33c0812de36b86d68c13')]"
],
"levelno" : [
"[10.0, inf.0]"
],
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"_id" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"levelno" : 1,
"_id" : 1,
"uid.$id" : 1
},
"indexName" : "levelno_1__id_1_uid.$id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"levelno" : [
"[10.0, inf.0]"
],
"_id" : [
"[MinKey, MaxKey]"
],
"uid.$id" : [
"[ObjectId('580e3397812de36b86d68c04'), ObjectId('580e3397812de36b86d68c04')]",
"[ObjectId('580e339a812de36b86d68c08'), ObjectId('580e339a812de36b86d68c08')]",
"[ObjectId('580e339a812de36b86d68c09'), ObjectId('580e339a812de36b86d68c09')]",
"[ObjectId('580e33a9812de36b86d68c0a'), ObjectId('580e33a9812de36b86d68c0a')]",
"[ObjectId('580e33a9812de36b86d68c0b'), ObjectId('580e33a9812de36b86d68c0b')]",
"[ObjectId('580e33bd812de36b86d68c11'), ObjectId('580e33bd812de36b86d68c11')]",
"[ObjectId('580e33c0812de36b86d68c13'), ObjectId('580e33c0812de36b86d68c13')]"
]
}
}
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"levelno" : {
"$gte" : 10
}
},
{
"uid.$id" : {
"$in" : [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"ok" : 1
}
What's happening is that this sorted query must be performed in-memory as it's not supported by an index, and this limits the results to 32 MB. This behavior is documented here, with a JIRA about addressing this here.
Furthermore, you can't define an index to support this query as you're sorting on a field that isn't part of the query, and neither of these cases apply:
If the sort keys correspond to the index keys or an index prefix,
MongoDB can use the index to sort the query results. A prefix of a
compound index is a subset that consists of one or more keys at the
start of the index key pattern.
...
An index can support sort operations on a non-prefix subset of the
index key pattern. To do so, the query must include equality
conditions on all the prefix keys that precede the sort keys.
You should be able to work around the limitation by using the aggregation framework which can be instructed to use temporary files for its pipeline stage outputs if required via the allowDiskUse: true option:
db.getCollection('logs').aggregate([
{$match: {'uid.$id': {
'$in': [
ObjectId("580e3397812de36b86d68c04"),
ObjectId("580e33a9812de36b86d68c0b"),
ObjectId("580e339a812de36b86d68c09"),
ObjectId("580e339a812de36b86d68c08"),
ObjectId("580e33a9812de36b86d68c0a"),
ObjectId("580e33bd812de36b86d68c11"),
ObjectId("580e33c0812de36b86d68c13")
]}, levelno: { '$gte': 10 }
}},
{$sort: {_id: 1}}
], { allowDiskUse: true })
You can use objsLeftInBatch() method to determine how many object are left in batch and iterate over it.
You can override the size and limit of the cursor batch size using cursor.batchSize(size) and cursor.limit(limit)