MongoDB – Slow text search query with multiple words on a large dataset - mongodb

I have a M10 Atlas instance.
The issue happens in a DB with 2.5m of documents, but only 900k of { status: "active" } ones.
The search is quick when no sorting/projection is performed, but in order to show the most accurate results first, it needs to be done otherwise the results are a mess.
For some reason this text search is being very slow for medium/long phrases and I have no idea how to speed it up.
Having the following document:
{
_id: "FOO123",
track_date: { $date: "2019-03-09T05:49:22.000Z" },
category_id: "foo",
parent_category_id: "foo",
title: "Disco Ssd Solido Kingston 480g Macbook Pro Air iMac Martinez",
status: "active",
site_id: "foo",
seller_id: 9999,
price: 9999,
permalink:
"https://example.com",
secure_thumbnail:
"https://example.com/img.jpg",
images: [
"https://example.com/img.jpg",
],
fluctuation: 80,
last_update: { $date: "2020-12-11T14:00:24.715Z" },
original_price: null,
}
Performing the following query:
db.articles.aggregate([
{
"$match": {
"status": "active",
"site_id": "foo",
"$text": {
"$search": "Disco Ssd solido kingston 480g macbook pro air",
"$caseSensitive": false
}
}
},
{
"$sort": {
"score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 10
}
])
Takes around 35s to yield results.
These are the indexes that I have set up for the active and title fields (I also have compound indexes that mix these two and a few more properties, I can show them here if needed)
{
v: 2,
key: { _fts: "text", _ftsx: 1 },
name: "title_text",
language_override: "language",
weights: { title: 1 },
default_language: "english",
ns: "foo.articles",
textIndexVersion: 3,
},
{
v: 2,
key: { status: 1 },
name: "status_1",
ns: "foo.articles",
}
This is the result of explain("executionStats")
Query (as aggregation did not work with "executionStats":
collection.find(
{
status: "active",
site_id: "foo",
$text: {
$search: "Disco Ssd solido kingston 480g macbook pro air",
$caseSensitive: false,
},
},
{ score: { $meta: "textScore" } }
)
.sort({ score: { $meta: "textScore" } })
.skip(0)
.limit(10)
.explain("executionStats")
Explain with execution stats:
{
queryPlanner: {
plannerVersion: 1,
namespace: "foo.articles",
indexFilterSet: false,
parsedQuery: {
$and: [
{ site_id: { $eq: "foo" } },
{ status: { $eq: "active" } },
{
$text: {
$search: "Disco Ssd solido kingston 480g macbook pro air",
$language: "english",
$caseSensitive: false,
$diacriticSensitive: false,
},
},
],
},
winningPlan: {
stage: "PROJECTION_DEFAULT",
transformBy: { score: { $meta: "textScore" } },
inputStage: {
stage: "SORT",
sortPattern: { score: { $meta: "textScore" } },
limitAmount: 10,
inputStage: {
stage: "SORT_KEY_GENERATOR",
inputStage: {
stage: "FETCH",
filter: {
$and: [
{ site_id: { $eq: "foo" } },
{ status: { $eq: "active" } },
],
},
inputStage: {
stage: "TEXT",
indexPrefix: {},
indexName: "title_text",
parsedTextQuery: {
terms: [
"480g",
"air",
"disco",
"kingston",
"macbook",
"pro",
"solido",
"ssd",
],
negatedTerms: [],
phrases: [],
negatedPhrases: [],
},
textIndexVersion: 3,
inputStage: {
stage: "TEXT_MATCH",
inputStage: {
stage: "TEXT_OR",
inputStages: [
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
],
},
},
},
},
},
},
},
rejectedPlans: [],
},
executionStats: {
executionSuccess: true,
nReturned: 10,
executionTimeMillis: 30458,
totalKeysExamined: 169654,
totalDocsExamined: 279454,
executionStages: {
stage: "PROJECTION_DEFAULT",
nReturned: 10,
executionTimeMillisEstimate: 27307,
works: 309403,
advanced: 10,
needTime: 309392,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
transformBy: { score: { $meta: "textScore" } },
inputStage: {
stage: "SORT",
nReturned: 10,
executionTimeMillisEstimate: 27299,
works: 309403,
advanced: 10,
needTime: 309392,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
sortPattern: { score: { $meta: "textScore" } },
memUsage: 9275,
memLimit: 33554432,
limitAmount: 10,
inputStage: {
stage: "SORT_KEY_GENERATOR",
nReturned: 31606,
executionTimeMillisEstimate: 27262,
works: 309392,
advanced: 31606,
needTime: 277785,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
inputStage: {
stage: "FETCH",
filter: {
$and: [
{ site_id: { $eq: "foo" } },
{ status: { $eq: "active" } },
],
},
nReturned: 31606,
executionTimeMillisEstimate: 27251,
works: 309391,
advanced: 31606,
needTime: 277784,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
docsExamined: 139727,
alreadyHasObj: 139727,
inputStage: {
stage: "TEXT",
nReturned: 139727,
executionTimeMillisEstimate: 27191,
works: 309391,
advanced: 139727,
needTime: 169663,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
indexPrefix: {},
indexName: "title_text",
parsedTextQuery: {
terms: [
"480g",
"air",
"disco",
"kingston",
"macbook",
"pro",
"solido",
"ssd",
],
negatedTerms: [],
phrases: [],
negatedPhrases: [],
},
textIndexVersion: 3,
inputStage: {
stage: "TEXT_MATCH",
nReturned: 139727,
executionTimeMillisEstimate: 27191,
works: 309391,
advanced: 139727,
needTime: 169663,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
docsRejected: 0,
inputStage: {
stage: "TEXT_OR",
nReturned: 139727,
executionTimeMillisEstimate: 27191,
works: 309391,
advanced: 139727,
needTime: 169663,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
docsExamined: 139727,
inputStages: [
{
stage: "IXSCAN",
nReturned: 291,
executionTimeMillisEstimate: 0,
works: 292,
advanced: 291,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 291,
seeks: 1,
dupsTested: 291,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 26973,
executionTimeMillisEstimate: 121,
works: 26974,
advanced: 26973,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 26973,
seeks: 1,
dupsTested: 26973,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 29687,
executionTimeMillisEstimate: 69,
works: 29688,
advanced: 29687,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 29687,
seeks: 1,
dupsTested: 29687,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 13595,
executionTimeMillisEstimate: 28,
works: 13596,
advanced: 13595,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 13595,
seeks: 1,
dupsTested: 13595,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 10730,
executionTimeMillisEstimate: 28,
works: 10731,
advanced: 10730,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 10730,
seeks: 1,
dupsTested: 10730,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 53358,
executionTimeMillisEstimate: 248,
works: 53359,
advanced: 53358,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 53358,
seeks: 1,
dupsTested: 53358,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 7847,
executionTimeMillisEstimate: 13,
works: 7848,
advanced: 7847,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 7847,
seeks: 1,
dupsTested: 7847,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 27173,
executionTimeMillisEstimate: 17,
works: 27174,
advanced: 27173,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 27173,
seeks: 1,
dupsTested: 27173,
dupsDropped: 0,
},
],
},
},
},
},
},
},
},
},
serverInfo: {
host: "host",
port: "port",
version: "4.2.11",
gitVersion: "ea38428f0c6742c7c2c7f677e73d79e17a2aab96",
},
ok: 1,
$clusterTime: {
clusterTime: Timestamp(1, 1609902178),
signature: {
hash: BinData(0, "cunb+7FEjXmbwN22uz3F+nV0LKE="),
keyId: NumberLong("6911938552179720195"),
},
},
operationTime: Timestamp(1, 1609902178),
}
The issue is not related to the status field, as I've tried removing it and the issue persists.
I don't really mind a query that's a bit slow, but more than half a minute is too much for a search.
At first I was doing an AND query for the text search instead of an OR, but it was even slower for long phrases, so decided to give the score a try.
I've also tried removing the sort pipeline, but it still takes 30s~
I'll take any tip anyone can give me at this point.
EDIT: According to the explain("executionStats") bit, it seems that the sort/projection of score is the time sink, but I've no idea how to improve that in order to get the best matches first

Mongodb Atlas specific feature
Since you are using Atlas you may find https://docs.atlas.mongodb.com/atlas-search a better alternative. It uses Lucene engine similar to Elasticsearch, Solr, etc, instead of rather limited native text index which is available on self-hosted versions of the database.
Atlas search is way more flexible and in most cases faster than mongo text index. The downside is a noticeable lag (like very eventual consistency), and higher demand on disk space (especially for ngram tokenizers) but these are not critical in your case.
The search is available on clusters of v4.2+:
Give it a try, it's included in your subscription.
The highlighted parts of the question must be confusing and are deserved some explanations:
The issue happens in a DB with 2.5m of documents, but only 900k of { status: "active" } ones.
It doesn't really matter how many active documents there are. There is no index intersection so the only text index is used. Filter by status and site_id is done only on the FETCH stage. In other words you search all 2.5m documents regardless of other filters.
The search is quick when no sorting/projection is performed
$sort is a blocking stage, means it requires previous stage to complete. You have $skip-$limit stages to return 10 documents only. Without $sort stage the pipeline returns 10 documents that were found first. With $sort stage the pipeline waits for all documents to be searched to find 10 with highest score.
For some reason this text search is being very slow for medium/long phrases
The reason is obvious - the more words in the phrase the more searches mongo need to do. It's one search per word, then calculation of the score based on the results of all searches.
Memory shouldn't be an issue for the sorting stage. The limit is 100 MB, then it starts using disk regardless of how much RAM you have.
Lack of memory might be a problem if all indexes and the working set don't fit into RAM. If it is the case it should be highlighted in the "Performance Advisor" tab.
db.collection.totalIndexSize() can also give you some idea of how much RAM you would need for optimal performance. It is not specific to the text index and affects performance of the whole cluster.

Related

mongodb example: why is this not a covered query?

i created the following collection:
students> db.students.find()
[
{ _id: 1, grades: [ 95, 92, 90 ], average: 145 },
{ _id: 2, grades: [ 98, 100, 102 ], average: 145 },
{ _id: 3, grades: [ 95, 110, 100 ], average: 145 }
]
these are the available indexes:
students> db.students.getIndexes()
[
{ v: 2, key: { _id: 1 }, name: '_id_' },
{ v: 2, key: { grades: 1 }, name: 'grades_1' },
{ v: 2, key: { average: 1 }, name: 'average_1' }
]
If i run this query, i expect it to be covered, since it needs just the 'average' field, which is an indexed field.
db.students.find({average:145}, {_id:0, grades:0})
Instead, i get a the following output using explain("executionStats"):
[...]
winningPlan: {
stage: 'PROJECTION_DEFAULT',
transformBy: { _id: 0, grades: 0 },
inputStage: {
stage: 'FETCH',
inputStage: {
stage: 'IXSCAN',
keyPattern: { average: 1 },
indexName: 'average_1',
isMultiKey: false,
multiKeyPaths: { average: [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: { average: [ '[145, 145]' ] }
}
}
}
And this:
totalDocsExamined: 3
Which means that the query had to read the documents in the collection... what am i missing?
Thanks
You need to specify projection to extract only indexed fields:
db.students.find({average:145}, {_id:0, average:1})
Mongodb is schemaless so there is no way to know what fields are in the document until FETCH phase.
One of the conditions for covered query to happen (from https://docs.mongodb.com/manual/core/query-optimization/#covered-query) is
all the fields returned in the results are in the same index.
The {_id:0, grades:0} spells "I want all fields but grades", and {_id:0, average:1} means "I want only average".

How to make MongoDB use indexes?

I am using a copy of the listingsAndReviews collection from the sample_airbnb sample database.
I have added an index on the address.country field, thus when performing
db.listingsAndReviews.getIndexes ()
I get
[
{ v: 2, key: { _id: 1 }, name: '_id_' },
{
v: 2,
key: { 'address.country': 1 },
name: 'address_country',
background: false,
collation: {
locale: 'en_US',
caseLevel: false,
caseFirst: 'off',
strength: 1,
numericOrdering: false,
alternate: 'non-ignorable',
maxVariable: 'punct',
normalization: false,
backwards: false,
version: '57.1'
}
}
]
Now, I am trying to use the following aggregate:
db.listingsAndReviews.aggregate( [{ $sort: { "address.country": 1 } },
{ $project: { "address.country": 1, _id: 0 } },
{ $group: { _id: "$address.country", count: { $sum: 1 } } }],
{ cursor: { batchSize: 32 }, allowDiskUse: false })
Using explain("executionStats") I see that my index is not used!
{
stages: [
{
'$cursor': {
queryPlanner: {
plannerVersion: 1,
namespace: 'mytests.listingsAndReviews',
indexFilterSet: false,
parsedQuery: {},
queryHash: '6E8AB948',
planCacheKey: '6E8AB948',
winningPlan: {
stage: 'SORT',
sortPattern: { 'address.country': 1 },
memLimit: 104857600,
type: 'simple',
inputStage: {
stage: 'PROJECTION_DEFAULT',
transformBy: { address: { country: true }, _id: false },
inputStage: { stage: 'COLLSCAN', direction: 'forward' }
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 5555,
executionTimeMillis: 17,
totalKeysExamined: 0,
totalDocsExamined: 5555,
executionStages: {
stage: 'SORT',
nReturned: 5555,
executionTimeMillisEstimate: 1,
works: 11113,
advanced: 5555,
needTime: 5557,
needYield: 0,
saveState: 12,
restoreState: 12,
isEOF: 1,
sortPattern: { 'address.country': 1 },
memLimit: 104857600,
type: 'simple',
totalDataSizeSorted: 318167,
usedDisk: false,
inputStage: {
stage: 'PROJECTION_DEFAULT',
nReturned: 5555,
executionTimeMillisEstimate: 1,
works: 5557,
advanced: 5555,
needTime: 1,
needYield: 0,
saveState: 12,
restoreState: 12,
isEOF: 1,
transformBy: { address: { country: true }, _id: false },
inputStage: {
stage: 'COLLSCAN',
nReturned: 5555,
executionTimeMillisEstimate: 0,
works: 5557,
advanced: 5555,
needTime: 1,
needYield: 0,
saveState: 12,
restoreState: 12,
isEOF: 1,
direction: 'forward',
docsExamined: 5555
}
}
}
}
},
nReturned: Long("5555"),
executionTimeMillisEstimate: Long("15")
},
{
'$group': { _id: '$address.country', count: { '$sum': { '$const': 1 } } },
nReturned: Long("9"),
executionTimeMillisEstimate: Long("15")
}
],
serverInfo: {
host: 'ldp-2103',
port: 27017,
version: '4.4.6',
gitVersion: '72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7'
},
ok: 1
}
Why the index is not used?
I've tried the same query using mongodb-compass and it clearly shows
Index Keys Examined:0

MongoDB very slow $count after $lookup

Help, I am using MongoDB 4.2.6, and writing an aggregate to obtain the number of filtered data from collections with 40000+ data. Before applying the $count method, I need to $lookup an extra collection as well.
Here is my aggregate
db.exams.aggregate([{
$match: {
schoolId: ObjectId("5d91c9ec098506001b426cb5")
}
}, {
$lookup: {
from: 'students',
localField: 'studentId',
foreignField: '_id',
as: 'student'
}
}, {
$unwind: "$student"
}, {
$match: {
"student.gender": 1
}
},{
$count: 'count'
}])
But it looks more than 10 seconds. I have already add indexes on every ID: exams._id, students._id, exams.studentId, exams.schoolId, student.gender, etc...
Can someone gives me some suggestions in order to make the query faster?
Explains:
{
stages: [
{
$cursor: {
query: {
schoolId: ObjectId('5d91c9ec098506001b426cb5')
},
fields: {
_id: 1
},
queryPlanner: {
plannerVersion: 1,
namespace: 'happya.exams',
indexFilterSet: false,
parsedQuery: {
schoolId: {
$eq: ObjectId('5d91c9ec098506001b426cb5')
}
},
queryHash: '9533F340',
planCacheKey: 'CE7F9610',
winningPlan: {
stage: 'FETCH',
inputStage: {
stage: 'IXSCAN',
keyPattern: {
schoolId: 1
},
indexName: 'schoolId_1',
isMultiKey: false,
multiKeyPaths: {
schoolId: []
},
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
schoolId: [
"[ObjectId('5d91c9ec098506001b426cb5'), ObjectId('5d91c9ec098506001b426cb5')]"
]
}
}
},
rejectedPlans: [
{
stage: 'FETCH',
inputStage: {
stage: 'IXSCAN',
keyPattern: {
schoolId: 1,
referenceNo: 1
},
indexName: 'schoolId_1_referenceNo_1',
isMultiKey: false,
multiKeyPaths: {
schoolId: [],
referenceNo: []
},
isUnique: true,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
schoolId: [
"[ObjectId('5d91c9ec098506001b426cb5'), ObjectId('5d91c9ec098506001b426cb5')]"
],
referenceNo: ['[MinKey, MaxKey]']
}
}
}
]
}
}
},
{
$lookup: {
from: 'students',
as: 'student',
localField: 'studentId',
foreignField: '_id',
unwinding: {
preserveNullAndEmptyArrays: false
},
matching: {
gender: {
$eq: 1
}
}
}
},
{
$group: {
_id: {
$const: null
},
count: {
$sum: {
$const: 1
}
}
}
},
{
$project: {
_id: false,
count: true
}
}
],
serverInfo: {
host: 'a98010d6dcf4',
port: 27017,
version: '4.2.6',
gitVersion: '20364840b8f1af16917e4c23c1b5f5efd8b352f8'
},
ok: 1,
$clusterTime: {
clusterTime: Timestamp(1597720010, 1),
signature: {
hash: BinData(0, '1PiNaAzDzNRrnZl/mpVJP4oneyU='),
keyId: NumberLong('6819213090182135813')
}
},
operationTime: Timestamp(1597720010, 1)
};

Sort exceeds limit on text search?

I have defined an index like this:
db.imageProperties.createIndex(
{
"imageProperties.cameraMaker": "text",
"imageProperties.cameraModel": "text",
"imageProperties.dateTimeOriginal": -1,
},
{ name: "TextIndex" }
)
But, when I try to run a query with a sort like this:
db.imageProperties.find( { $text: { $search: "nikon" } }, {"imagePath" : 1, _id: 0 } ).sort( { "imageProperties.dateTimeOriginal": -1 } )
I get this error:
Error: error: {
"ok" : 0,
"errmsg" : "Executor error during find command :: caused by :: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
"code" : 96,
"codeName" : "OperationFailed"
It is my understanding from reading the documentation that it would be possible to combine text search with sorting by creating a combined index as I have done.
This is the output from .explain() on the above query:
> db.imageProperties.find( { $text: { $search: "nikon" } }, {"imagePath" : 1, _id: 0 } ).sort( { "imageProperties.dateTimeOriginal": -1 } ).explain()
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "olavt-images.imageProperties",
"indexFilterSet": false,
"parsedQuery": {
"$text": {
"$search": "nikon",
"$language": "english",
"$caseSensitive": false,
"$diacriticSensitive": false
}
},
"queryHash": "1DCFCE0B",
"planCacheKey": "650B3A8E",
"winningPlan": {
"stage": "PROJECTION_SIMPLE",
"transformBy": {
"imagePath": 1,
"_id": 0
},
"inputStage": {
"stage": "SORT",
"sortPattern": {
"imageProperties.dateTimeOriginal": -1
},
"inputStage": {
"stage": "SORT_KEY_GENERATOR",
"inputStage": {
"stage": "TEXT",
"indexPrefix": {
},
"indexName": "TextIndex",
"parsedTextQuery": {
"terms": [
"nikon"
],
"negatedTerms": [],
"phrases": [],
"negatedPhrases": []
},
"textIndexVersion": 3,
"inputStage": {
"stage": "TEXT_MATCH",
"inputStage": {
"stage": "FETCH",
"inputStage": {
"stage": "OR",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"_fts": "text",
"_ftsx": 1,
"imageProperties.dateTimeOriginal": -1
},
"indexName": "TextIndex",
"isMultiKey": true,
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "backward",
"indexBounds": {
}
}
}
}
}
}
}
}
},
"rejectedPlans": []
},
"serverInfo": {
"host": "4794df1ed9c4",
"port": 27017,
"version": "4.2.5",
"gitVersion": "2261279b51ea13df08ae708ff278f0679c59dc32"
},
"ok": 1
}
How can I get the desired behavior?
The error suggests that sorting the result requires more memory than what is configured.
The field imagePath that you want to project is not covered by the TextIndex try adding a new index:
db.imageProperties.createIndex(
{
"imageProperties.cameraMaker": "text",
"imageProperties.cameraModel": "text",
"imageProperties.dateTimeOriginal": -1,
"imagePath": 1
}
)
Then try the following steps:
Check that the indexes are created successfully by running:
db.imageProperties.getIndexes()
Check whether the correct index is being used:
db.imageProperties.find( { $text: { $search: "nikon" } }, {"imagePath" : 1, _id: 0 } )
.sort( { "imageProperties.dateTimeOriginal": -1 } ).explain()
If you only want a limited rows of results, also add the limit
db.imageProperties.find( { $text: { $search: "nikon" } }, {"imagePath" : 1, _id: 0 } )
.sort( { "imageProperties.dateTimeOriginal": -1 } ).limit(100)
You can also allow disk usage by using aggregation framework with allowDiskUse
db.imageProperties.aggregate([{
$match: { $text: { $search: "nikon" } }
}, {
$sort: { "imageProperties.dateTimeOriginal": -1 }
} , {
$project: { imagePath: 1 }
}], {
allowDiskUse: true
})

mongodb runs slow on IXSCAN

i cannot figure out with some requests in mongodb, i created an index which match all fields from request but it didn't help
is the problem in index ? which fields i should include in index?
here is a part of slowlog
query: {
$query: {
moderation.blocked: false,
project_id: "9fd6db37-049f-4af3-bea4-8301345dc109",
dates.posted: { $gt: 1458720426.177019 },
parent_ids: "31cd1be3-adbd-4108-a05e-58c283176738",
moderation.visible: true },
$orderby: { dates.posted: 1 }
}
planSummary: IXSCAN {
parent_ids: 1,
dates.posted: 1,
owner_id: 1,
project_id: 1,
moderation.blocked: 1,
moderation.visible: -1
}
ntoreturn:0
ntoskip:0
nscanned:0
nscannedObjects:0
keyUpdates:0
writeConflicts:0
numYields:1
nreturned:0
reslen:20
locks:{ Global: { acquireCount: { r: 4 } }, Database: { acquireCount: { r: 2 } }, Collection: { acquireCount: { r: 2 } } } 235ms
explain() for same query
{
'queryPlanner': {
'plannerVersion': 1,
'parsedQuery': {
'$and': [{
'moderation.blocked': {
'$eq': False
}
}, {
'moderation.visible': {
'$eq': True
}
}, {
'parent_ids': {
'$eq': '31cd1be3-adbd-4108-a05e-58c283176738'
}
}, {
'project_id': {
'$eq': '9fd6db37-049f-4af3-bea4-8301345dc109'
}
}, {
'dates.posted': {
'$gt': 1458720426.177019
}
}]
},
'namespace': 'comments.comments',
'rejectedPlans': [],
'winningPlan': {
'filter': {
'moderation.visible': {
'$eq': True
}
},
'stage': 'FETCH',
'inputStage': {
'indexName': 'parent_ids_1_dates.posted_1_owner_id_1_project_id_1_moderation.blocked_1_moderation.visible_-1',
'keyPattern': {
'parent_ids': 1,
'moderation.blocked': 1,
'owner_id': 1,
'moderation.visible': -1,
'dates.posted': 1,
'project_id': 1
},
'isMultiKey': True,
'indexBounds': {
'parent_ids': ['["31cd1be3-adbd-4108-a05e-58c283176738", "31cd1be3-adbd-4108-a05e-58c283176738"]'],
'moderation.blocked': ['[false, false]'],
'owner_id': ['[MinKey, MaxKey]'],
'moderation.visible': ['[MaxKey, MinKey]'],
'dates.posted': ['(1458720426.177019, inf.0]'],
'project_id': ['["9fd6db37-049f-4af3-bea4-8301345dc109", "9fd6db37-049f-4af3-bea4-8301345dc109"]']
},
'stage': 'IXSCAN',
'direction': 'forward'
}
},
'indexFilterSet': False
},
'executionStats': {
'totalDocsExamined': 0,
'allPlansExecution': [],
'totalKeysExamined': 0,
'executionSuccess': True,
'nReturned': 0,
'executionStages': {
'invalidates': 0,
'filter': {
'moderation.visible': {
'$eq': True
}
},
'isEOF': 1,
'needTime': 0,
'alreadyHasObj': 0,
'advanced': 0,
'inputStage': {
'invalidates': 0,
'isEOF': 1,
'needTime': 0,
'dupsDropped': 0,
'dupsTested': 0,
'keyPattern': {
'parent_ids': 1,
'moderation.blocked': 1,
'owner_id': 1,
'moderation.visible': -1,
'dates.posted': 1,
'project_id': 1
},
'advanced': 0,
'seenInvalidated': 0,
'nReturned': 0,
'indexBounds': {
'parent_ids': ['["31cd1be3-adbd-4108-a05e-58c283176738", "31cd1be3-adbd-4108-a05e-58c283176738"]'],
'moderation.blocked': ['[false, false]'],
'owner_id': ['[MinKey, MaxKey]'],
'moderation.visible': ['[MaxKey, MinKey]'],
'dates.posted': ['(1458720426.177019, inf.0]'],
'project_id': ['["9fd6db37-049f-4af3-bea4-8301345dc109", "9fd6db37-049f-4af3-bea4-8301345dc109"]']
},
'needFetch': 0,
'saveState': 0,
'keysExamined': 0,
'matchTested': 0,
'executionTimeMillisEstimate': 0,
'indexName': 'parent_ids_1_dates.posted_1_owner_id_1_project_id_1_moderation.blocked_1_moderation.visible_-1',
'restoreState': 0,
'works': 1,
'direction': 'forward',
'stage': 'IXSCAN',
'isMultiKey': True
},
'stage': 'FETCH',
'saveState': 0,
'docsExamined': 0,
'nReturned': 0,
'restoreState': 0,
'works': 1,
'executionTimeMillisEstimate': 0,
'needFetch': 0
},
'executionTimeMillis': 0
}