I have field 'user_name' populated with data.
This code gives me no results:
history = db.history
history.create_index([('user_name', 'text')])
history.find({'$text' : {'$search' : 'a'}})
But when I specify the exact name, it works
history.find({'$text' : {'$search' : 'exact name'}})
Here is the output of explain() for 'a' search:
{
"executionSuccess": true,
"nReturned": 0,
"executionTimeMillis": 0,
"totalKeysExamined": 0,
"totalDocsExamined": 0,
"executionStages": {
"stage": "TEXT",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"indexPrefix": {},
"indexName": "user_name_text",
"parsedTextQuery": { "terms": [], "negatedTerms": [], "phrases": [], "negatedPhrases": [] },
"textIndexVersion": 3,
"inputStage": {
"stage": "TEXT_MATCH",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 0,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsRejected": 0,
"inputStage": {
"stage": "FETCH",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 0,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsExamined": 0,
"alreadyHasObj": 0,
"inputStage": { "stage": "OR", "nReturned": 0, "executionTimeMillisEstimate": 0, "works": 0, "advanced": 0, "needTime": 0, "needYield": 0, "saveState": 0, "restoreState": 0, "isEOF": 1, "dupsTested": 0, "dupsDropped": 0 }
}
}
},
"allPlansExecution": []
}
Here is the output of explain() for exact match of username ('akkcess'):
{
"executionSuccess": true,
"nReturned": 39,
"executionTimeMillis": 1,
"totalKeysExamined": 39,
"totalDocsExamined": 39,
"executionStages": {
"stage": "TEXT",
"nReturned": 39,
"executionTimeMillisEstimate": 0,
"works": 40,
"advanced": 39,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"indexPrefix": {},
"indexName": "user_name_text",
"parsedTextQuery": { "terms": ["akkcess"], "negatedTerms": [], "phrases": [], "negatedPhrases": [] },
"textIndexVersion": 3,
"inputStage": {
"stage": "TEXT_MATCH",
"nReturned": 39,
"executionTimeMillisEstimate": 0,
"works": 40,
"advanced": 39,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsRejected": 0,
"inputStage": {
"stage": "FETCH",
"nReturned": 39,
"executionTimeMillisEstimate": 0,
"works": 40,
"advanced": 39,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsExamined": 39,
"alreadyHasObj": 0,
"inputStage": {
"stage": "OR",
"nReturned": 39,
"executionTimeMillisEstimate": 0,
"works": 40,
"advanced": 39,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"dupsTested": 39,
"dupsDropped": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 39,
"executionTimeMillisEstimate": 0,
"works": 40,
"advanced": 39,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"keyPattern": { "_fts": "text", "_ftsx": 1 },
"indexName": "user_name_text",
"isMultiKey": false,
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "backward",
"indexBounds": {},
"keysExamined": 39,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0
}
}
}
}
},
"allPlansExecution": []
}
Do you have any idea why it behaves this way?
According to docs and tutorials, this it should work.
"a" is almost surely a stop word. Almost every natural language text would include it. Therefore if it was searched for, you'd get every single document in the result set. Since this isn't very useful, text search drops stop words like "a" from the query.
Separately, MongoDB text search does include exact matching functionality, but it requires the query to be quoted which you haven't done therefore you are using the regular stemmed matching, not exact matching in your posted query.
I have the following format of data in my collection,
{
"dName": "d1",
"city": "c1",
"state": "s1"
}, {
"dName": "d2",
"city": "c1",
"state": "s1"
}, {
"dName": "d2",
"city": "c1",
"state": "s2"
}
I have a compound index on all three fields combined.
dName is unique across documents. I want to get list of dNames, given city and state. I have found the following queries does the same,
db.collection.find({city: 'c1', state: 's1'}, {dName: 1, _id: 0}); -> returns [{dName: 'd1'}, {dName: 'd2'}]
db.collection.distinct('dName', {city: 'c1', state: 's1'}); -> returns ['d1', 'd2']
The first one returns an array of objects and seconds one returns an array of string. Other than that is there any performance improvement using one over another. I think distinct is costlier since it is trying to maintain the uniqueness of the response. Is that true?
Winning plans for both queries,
Of find query (1)
{
...
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"dName" : 1.0,
"_id" : 0.0
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"city" : 1,
"state" : 1,
"dName" : 1
},
"indexName" : "city_1_state_1_dName_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"city" : [],
"state" : [],
"dName" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"city" : [
"[\"c1\", \"c1\"]"
],
"state" : [
"[\"s1\", \"s1\"]"
],
"dName" : [
"[MinKey, MaxKey]"
]
}
}
}
...
}
Of distinct query (2)
{
...
"winningPlan": {
"stage": "PROJECTION",
"transformBy": {"_id": 0, "dName": 1},
"inputStage": {
"stage": "DISTINCT_SCAN",
"keyPattern": {
"city": 1,
"state": 1,
"dName": 1
},
"indexName": "city_1_state_1_dName_1",
"isMultiKey": false,
"multiKeyPaths": {
"city": [],
"state": [],
"dName": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"city": [
"[\"c1\", \"c1\"]"
],
"state": [
"[\"s1\", \"s1\"]"
],
"dName": [
"[MinKey, MaxKey]"
]
}
}
}
...
}
I've executed one and the same query several times and the execution time differ from about a second to more than 20 seconds.
MongoDB version is 3.2.10.
Below is the output from explain method of a fast and a slow query.
Fast query:
{
"executionTimeMillis": 309,
"allPlansExecution": [
{
"shardName": "sh001-rs",
"allPlans": []
}
],
"totalKeysExamined": 18478,
"nReturned": 15096,
"executionStages": {
"executionTimeMillis": 309,
"shards": [
{
"shardName": "sh001-rs",
"executionSuccess": true,
"executionStages": {
"needYield": 0,
"docsExamined": 18378,
"saveState": 144,
"restoreState": 144,
"isEOF": 1,
"inputStage": {
"saveState": 144,
"isEOF": 1,
"seenInvalidated": 0,
"keysExamined": 18478,
"nReturned": 18378,
"invalidates": 0,
"keyPattern": {
"_id": 1
},
"isUnique": true,
"needTime": 99,
"isMultiKey": false,
"executionTimeMillisEstimate": 30,
"dupsTested": 0,
"restoreState": 144,
"direction": "forward",
"indexName": "_id_",
"isSparse": false,
"advanced": 18378,
"stage": "IXSCAN",
"dupsDropped": 0,
"needYield": 0,
"isPartial": false,
"indexBounds": {
"_id": []
},
"works": 18478,
"indexVersion": 1
},
"nReturned": 15096,
"needTime": 3381,
"filter": {
"available": {
"$gt": 0
}
},
"executionTimeMillisEstimate": 180,
"alreadyHasObj": 0,
"invalidates": 0,
"works": 18478,
"advanced": 15096,
"stage": "FETCH"
}
}
],
"nReturned": 15096,
"totalKeysExamined": 18478,
"totalChildMillis": 251,
"totalDocsExamined": 18378,
"stage": "SINGLE_SHARD"
},
"totalDocsExamined": 18378
}
Slow query:
{
"executionTimeMillis": 16139,
"allPlansExecution": [
{
"shardName": "sh001-rs",
"allPlans": []
}
],
"totalKeysExamined": 18478,
"nReturned": 15096,
"executionStages": {
"executionTimeMillis": 16139,
"shards": [
{
"shardName": "sh001-rs",
"executionSuccess": true,
"executionStages": {
"needYield": 0,
"docsExamined": 18378,
"saveState": 677,
"restoreState": 677,
"isEOF": 1,
"inputStage": {
"saveState": 677,
"isEOF": 1,
"seenInvalidated": 0,
"keysExamined": 18478,
"nReturned": 18378,
"invalidates": 0,
"keyPattern": {
"_id": 1
},
"isUnique": true,
"needTime": 99,
"isMultiKey": false,
"executionTimeMillisEstimate": 270,
"dupsTested": 0,
"restoreState": 677,
"direction": "forward",
"indexName": "_id_",
"isSparse": false,
"advanced": 18378,
"stage": "IXSCAN",
"dupsDropped": 0,
"needYield": 0,
"isPartial": false,
"indexBounds": {
"_id": []
},
"works": 18478,
"indexVersion": 1
},
"nReturned": 15096,
"needTime": 3381,
"filter": {
"available": {
"$gt": 0
}
},
"executionTimeMillisEstimate": 14518,
"alreadyHasObj": 0,
"invalidates": 0,
"works": 18478,
"advanced": 15096,
"stage": "FETCH"
}
}
],
"nReturned": 15096,
"totalKeysExamined": 18478,
"totalChildMillis": 16076,
"totalDocsExamined": 18378,
"stage": "SINGLE_SHARD"
},
"totalDocsExamined": 18378
}
The results from db.stats():
{
"raw" : {
"sh001-rs/host101-prod:27017,host102-prod:27018" : {
"db" : "records",
"collections" : 2,
"objects" : 124335,
"avgObjSize" : 48253.87085695902,
"dataSize" : 5999645033,
"storageSize" : 5008375808,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 17960960,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000018")
}
},
"sh002-rs/host101-prod:27018,host102-prod:27017" : {
"db" : "records",
"collections" : 2,
"objects" : 100643,
"avgObjSize" : 58044.42780918693,
"dataSize" : 5841765348,
"storageSize" : 4884041728,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 13737984,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000002")
}
},
"sh003-rs/host103-prod:27017,host104-prod:27018" : {
"db" : "records",
"collections" : 2,
"objects" : 191296,
"avgObjSize" : 31400.14176459518,
"dataSize" : 6006721519,
"storageSize" : 5967814656,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 32346112,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000012")
}
},
"sh004-rs/host103-prod:27018,host104-prod:27017" : {
"db" : "records",
"collections" : 2,
"objects" : 100904,
"avgObjSize" : 58444.951716482996,
"dataSize" : 5897329408,
"storageSize" : 5684531200,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 14114816,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff000000000000000c")
}
},
"sh005-rs/host105-prod:27017,host106-prod:27018" : {
"db" : "records",
"collections" : 16,
"objects" : 851626,
"avgObjSize" : 10900.204212882181,
"dataSize" : 9282897313,
"storageSize" : 7225233408,
"numExtents" : 0,
"indexes" : 43,
"indexSize" : 31690752,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff000000000000000e")
}
},
"sh006-rs/host105-prod:27018,host106-prod:27017" : {
"db" : "records",
"collections" : 2,
"objects" : 100946,
"avgObjSize" : 58688.667386523484,
"dataSize" : 5924386218,
"storageSize" : 7723163648,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 13565952,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000059")
}
},
"sh007-rs/host107-prod:27017,host108-prod:27018" : {
"db" : "records",
"collections" : 2,
"objects" : 100988,
"avgObjSize" : 58563.519497366025,
"dataSize" : 5914212707,
"storageSize" : 4643889152,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 14073856,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff000000000000000c")
}
},
"sh008-rs/host107-prod:27018,host108-prod:27017" : {
"db" : "records",
"collections" : 2,
"objects" : 100747,
"avgObjSize" : 58695.07362005817,
"dataSize" : 5913352582,
"storageSize" : 4877357056,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 13676544,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000002")
}
},
"sh009-rs/host109-prod:27017,host110-prod:27018" : {
"db" : "records",
"collections" : 4,
"objects" : 69101,
"avgObjSize" : 152884.28821580007,
"dataSize" : 10564457200,
"storageSize" : 16441020352,
"numExtents" : 32,
"indexes" : 17,
"indexSize" : 26171376,
"fileSize" : 19251855360,
"nsSizeMB" : 16,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"dataFileVersion" : {
"major" : 4,
"minor" : 22
},
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000000")
}
},
"sh010-rs/host110-prod:27017,host113-prod:27018" : {
"db" : "records",
"collections" : 4,
"objects" : 69148,
"avgObjSize" : 152176.07311852838,
"dataSize" : 10522671104,
"storageSize" : 16439971776,
"numExtents" : 32,
"indexes" : 17,
"indexSize" : 26269488,
"fileSize" : 19251855360,
"nsSizeMB" : 16,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"dataFileVersion" : {
"major" : 4,
"minor" : 22
},
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000002")
}
},
"sh011-rs/host109-prod:27018,host111-prod:27017" : {
"db" : "records",
"collections" : 2,
"objects" : 77687,
"avgObjSize" : 75111.53102835738,
"dataSize" : 5835189511,
"storageSize" : 5171572736,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 9543680,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000002")
}
},
"sh012-rs/host114-prod:27017,host115-prod:27018" : {
"db" : "records",
"collections" : 4,
"objects" : 91151,
"avgObjSize" : 115459.23068315213,
"dataSize" : 10524224336,
"storageSize" : 16454213568,
"numExtents" : 32,
"indexes" : 17,
"indexSize" : 42793184,
"fileSize" : 19251855360,
"nsSizeMB" : 16,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"dataFileVersion" : {
"major" : 4,
"minor" : 22
},
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000028")
}
},
"sh013-rs/host114-prod:27018,host115-prod:27017" : {
"db" : "records",
"collections" : 2,
"objects" : 99992,
"avgObjSize" : 58494.27406192495,
"dataSize" : 5848959452,
"storageSize" : 6180712448,
"numExtents" : 0,
"indexes" : 17,
"indexSize" : 13615104,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff00000000000000a5")
}
},
"sh014-rs/host111-prod:27018,host113-prod:27017" : {
"db" : "records",
"collections" : 4,
"objects" : 91498,
"avgObjSize" : 114842.1660801329,
"dataSize" : 10507828512,
"storageSize" : 16454213568,
"numExtents" : 32,
"indexes" : 17,
"indexSize" : 42646016,
"fileSize" : 19251855360,
"nsSizeMB" : 16,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"dataFileVersion" : {
"major" : 4,
"minor" : 22
},
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000004")
}
}
},
"objects" : 2170062,
"avgObjSize" : 48193.523844940835,
"dataSize" : 104583640243,
"storageSize" : 123156111104,
"numExtents" : 128,
"indexes" : 264,
"indexSize" : 312205824,
"fileSize" : 77007421440,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"ok" : 1
}
I've noticed that it is mainly the number of "saveState" and "restoreState" that differ. What could be the possible reason for this execution time variance?
Thanks in advance.
I don't know if this is sufficient as an answer for you, but it is possible to get different numbers when running the same query.
In depends on:
how many other operations are happening at the moment on your server
are the requested documents in memory (RAM) in this case the index.
Here you can also find the documentation for the output of your explain()