MongoDB (text search with the relevant field) aggregation problem - mongodb

I have the MongoDB aggregation query
db.data.aggregate([{ "$match" : { "$text" : { "$search" : "STORAGE TYPE" } } },
{ "$group" :
{ "_id" :{"doc_type": "$doc_type" ,"title" : "$title", "player_name" : "$player_name", "player_type" : "INSTITUTION", "country_code" :"$country_code" },
"number_records" : { "$sum" : 1}
}
},
{"$match" : {"doc_type": "PATENT"} },
{"$sort":{"number_records" : -1}},
{"$limit" : 10}],
{"allowDiskuse" : true}
)
When I tried to execute the above code, it keeps on buffering for a long time, I am not getting any output. Can anyone help me?
When I used command explain(), it shows the following code:
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"$text" : {
"$search" : "STORAGE TYPE"
}
},
{
"doc_type" : "PATENT"
}
]
},
"fields" : {
"country_code" : 1,
"doc_type" : 1,
"player_name" : 1,
"title" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "datadocuments.data",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"doc_type" : {
"$eq" : "PATENT"
}
},
{
"$text" : {
"$search" : "STORAGE TYPE",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"doc_type" : {
"$eq" : "PATENT"
}
},
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "title",
"parsedTextQuery" : {
"terms" : [
"storag",
"type"
],
"negatedTerms" : [ ],
"phrases" : [ ],
"negatedPhrases" : [ ]
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1
},
"indexName" : "title",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
}
}
]
}
}
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : {
"doc_type" : "$doc_type",
"title" : "$title",
"player_name" : "$player_name",
"player_type" : {
"$const" : "INSTITUTION"
},
"country_code" : "$country_code"
},
"number_records" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"number_records" : -1
},
"limit" : NumberLong("10")
}
}
],
"ok" : 1
}
I couldn't figure out the mistake; is there any problem in aggregation, if not, how to increase the performance?

Your error comes from your second match stage : at this point, doc_type doesn't exist, but _id.doc_type instead. But you bettermerge this stage with the first one, to improve performance by reducing number of documents passed to the $group stage.
Your improved query will be :
db.data.aggregate([
{"$match" : { "$text" : { "$search" : "STORAGE TYPE" `},"doc_type": "PATENT" } },`
{ "$group" :
{ "_id" :{"doc_type": "$doc_type" ,"title" : "$title", "player_name" : "$player_name", "player_type" : "INSTITUTION", "country_code" :"$country_code" },
"number_records" : { "$sum" : 1}
}
},
{"$sort":{"number_records" : -1}},
{"$limit" : 10}],
{"allowDiskuse" : true}
)

Related

MongoDB Query plan not using compound index

I am trying MongoDB with a dataset about the company profile margin for learning purpose. Here is the sample document
{
"parent_comp" : 1
"child_comp" : 101
"profit" : NumberLong(70320020)
}
I have created two indexes i.e one on child_comp field and the other one is a compound index with parent_comp, child_comp, and last_outage_timestamp.
For the below query, I executed the explain command to see the query plan.
MongoDB Enterprise > db.data.find({ "$and" : [{ "parent_comp" : 951, "child_comp" : 9351, "profit" : { "$gte" : { "$numberLong" : "500000000" } } }, { "profit" : { "$lte" : { "$numberLong" : "1000000000" } } }] }).sort({"profit" : 1}).limit(3).explain();
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.data",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"child_comp" : {
"$eq" : 9351
}
},
{
"parent_comp" : {
"$eq" : 951
}
},
{
"profit" : {
"$lte" : {
"$numberLong" : "1000000000"
}
}
},
{
"profit" : {
"$gte" : {
"$numberLong" : "500000000"
}
}
}
]
},
"queryHash" : "B570EF0C",
"planCacheKey" : "187EF74B",
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 3,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"child_comp" : {
"$eq" : 9351
}
},
{
"parent_comp" : {
"$eq" : 951
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"profit" : 1
},
"indexName" : "profit_index",
"isMultiKey" : false,
"multiKeyPaths" : {
"profit" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"profit" : [
"[{ $numberLong: \"500000000\" }, { $numberLong: \"1000000000\" }]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"profit" : 1
},
"limitAmount" : 3,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"parent_comp" : {
"$eq" : 951
}
},
{
"profit" : {
"$lte" : {
"$numberLong" : "1000000000"
}
}
},
{
"profit" : {
"$gte" : {
"$numberLong" : "500000000"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"child_comp" : 1
},
"indexName" : "child_comp_index",
"isMultiKey" : false,
"multiKeyPaths" : {
"child_comp" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"child_comp" : [
"[9351.0, 9351.0]"
]
}
}
}
}
},
{
"stage" : "LIMIT",
"limitAmount" : 3,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent_comp" : 1,
"child_comp" : 1,
"profit" : 1
},
"indexName" : "parent_comp_1_child_comp_1_profit_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent_comp" : [ ],
"child_comp" : [ ],
"profit" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent_comp" : [
"[951.0, 951.0]"
],
"child_comp" : [
"[9351.0, 9351.0]"
],
"profit" : [
"[{ $numberLong: \"500000000\" }, { $numberLong: \"1000000000\" }]"
]
}
}
}
}
]
},
"serverInfo" : {
"host" : "localhost",
"port" : 27017,
"version" : "4.2.8",
"gitVersion" : "43d25888249164d76d5e04dd6cf38f6111e21f5f"
},
"ok" : 1
}
As you can see winning plan used single index instead of compound index. So could you please let me know why compound index was not used.
Your query is sorting on profit, and the compound index does not include the field you are sorting on hence using the compound index would necessitate an additional sort stage.
The trade-offs and reasoning is further explained in the docs.
See also https://www.alexbevi.com/blog/2020/05/16/optimizing-mongodb-compound-indexes-the-equality-sort-range-esr-rule/.

Mongo Find complete at 0.105 sec, But count takes 1.0059 sec: Total 30024 records

My Count query is very slow. I have made the indexes and believe hence the Find query return records in 0.105 sec.
db.collectionname.find({}) => 0.105 sec
When do count with the same query it takes 1.0059 sec.
db.collectionname.count({}) => 1.0059 sec
There are 2L records in total in my collection and search result is 30024 records.
Please guide me what are the steps I need to take?
Mongo Version is : 3.2
db.getCollection('beta').find({"status":"1",
"type.und.value":"beta","state":{"$in":["a","b"]},
"changed":{"$gte":1463804682},"uid":{"$ne":"333"},
"price":{"$ne":[]}}).explain()
/* 1 */ {
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "beta",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"changed" : 1.0
},
"indexName" : "changed_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"changed" : [
"[1463804682.0, inf.0]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"uid" : 1.0
},
"indexName" : "uid_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"uid" : [
"[MinKey, \"333\")",
"(\"333\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"price" : 1.0
},
"indexName" : "price_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"price" : [
"[MinKey, undefined)",
"(undefined, [])",
"([], MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"type.und.value" : 1.0
},
"indexName" : "type.und.value_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"type.und.value" : [
"[\"beta\", \"beta\"]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"changed" : -1
},
"indexName" : "_changed",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"changed" : [
"[inf.0, 1463804682.0]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"status" : 1.0
},
"indexName" : "status_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"status" : [
"[\"1\", \"1\"]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "xxx",
"port" : xxx,
"version" : "3.2.13",
"gitVersion" : "xxx"
},
"ok" : 1.0 }
==========================================
db.getCollection('beta').explain().count({"status":"1",
"type.und.value":"beta","state":{"$in":["a","b"]},
"changed":{"$gte":1463804682},"uid":{"$ne":"333"},
"price":{"$ne":[]}})
/* 1 */ {
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "property.beta",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"changed" : 1.0
},
"indexName" : "changed_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"changed" : [
"[1463804682.0, inf.0]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"uid" : 1.0
},
"indexName" : "uid_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"uid" : [
"[MinKey, \"333\")",
"(\"333\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"price" : 1.0
},
"indexName" : "price_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"price" : [
"[MinKey, undefined)",
"(undefined, [])",
"([], MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"status" : {
"$eq" : "1"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"type.und.value" : 1.0
},
"indexName" : "type.und.value_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"type.und.value" : [
"[\"beta\", \"beta\"]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"status" : {
"$eq" : "1"
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"changed" : -1
},
"indexName" : "_changed",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"changed" : [
"[inf.0, 1463804682.0]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"type.und.value" : {
"$eq" : "beta"
}
},
{
"changed" : {
"$gte" : 1463804682.0
}
},
{
"state" : {
"$in" : [
"a",
"b"
]
}
},
{
"$not" : {
"price" : {
"$eq" : []
}
}
},
{
"$not" : {
"uid" : {
"$eq" : "333"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"status" : 1.0
},
"indexName" : "status_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"status" : [
"[\"1\", \"1\"]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "xxx",
"port" : xxx,
"version" : "3.2.13",
"gitVersion" : "xxx"
},
"ok" : 1.0 }

Mongo date range index with filters

We have the below query
db.Comment.find(
{
$and: [
{ reportCount: { $gt: 0 } },
{ assignee: { $exists: false } },
{ creationDate: { $gt: new Date(1507831097809) } },
{ creationDate: { $lt: new Date(1508522297966) } },
{ siteId: 'MAIN' },
{ parent: { $exists: false } },
{ status: 'ACTIVE' }
]
})
.sort({ creationDate: 1 })
And we have an index
{
"v" : 2,
"key" : {
"creationDate" : 1,
"reportCount" : 1,
"label" : 1
}
}
Here are explain results:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myNameSpace",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"creationDate" : 1.0,
"reportCount" : 1.0,
"label" : 1.0
},
"indexName" : "creationDate_1_reportCount_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"creationDate" : [],
"reportCount" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"creationDate" : [
"(new Date(1507831097809), new Date(1508522297966))"
],
"reportCount" : [
"(0.0, inf.0]"
],
"label" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1.0
},
"indexName" : "parent_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[null, null]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"assignee" : 1.0
},
"indexName" : "assignee_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"assignee" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"assignee" : [
"[null, null]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"siteId" : 1.0,
"updatedDate" : 1.0,
"label" : 1.0
},
"indexName" : "siteId_1_updatedDate_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"siteId" : [],
"updatedDate" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"siteId" : [
"[\"MAIN\", \"MAIN\"]"
],
"updatedDate" : [
"[MinKey, MaxKey]"
],
"label" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
{
"stage" : "SORT",
"sortPattern" : {
"creationDate" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"creationDate" : {
"$lt" : ISODate("2017-10-20T17:58:17.966Z")
}
},
{
"creationDate" : {
"$gt" : ISODate("2017-10-12T17:58:17.809Z")
}
},
{
"reportCount" : {
"$gt" : 0.0
}
}
]
},
"inputStage" : {
"stage" : "AND_SORTED",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"parent" : 1.0
},
"indexName" : "parent_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"parent" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"parent" : [
"[null, null]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"assignee" : 1.0
},
"indexName" : "assignee_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"assignee" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"assignee" : [
"[null, null]"
]
}
}
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 19,
"executionTimeMillis" : 8,
"totalKeysExamined" : 533,
"totalDocsExamined" : 56,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"siteId" : {
"$eq" : "MAIN"
}
},
{
"status" : {
"$eq" : "ACTIVE"
}
},
{
"$nor" : [
{
"assignee" : {
"$exists" : true
}
}
]
},
{
"$nor" : [
{
"parent" : {
"$exists" : true
}
}
]
}
]
},
"nReturned" : 19,
"executionTimeMillisEstimate" : 0,
"works" : 534,
"advanced" : 19,
"needTime" : 513,
"needYield" : 0,
"saveState" : 20,
"restoreState" : 20,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 56,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 56,
"executionTimeMillisEstimate" : 0,
"works" : 533,
"advanced" : 56,
"needTime" : 476,
"needYield" : 0,
"saveState" : 20,
"restoreState" : 20,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"creationDate" : 1.0,
"reportCount" : 1.0,
"label" : 1.0
},
"indexName" : "creationDate_1_reportCount_1_label_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"creationDate" : [],
"reportCount" : [],
"label" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"creationDate" : [
"(new Date(1507831097809), new Date(1508522297966))"
],
"reportCount" : [
"(0.0, inf.0]"
],
"label" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 533,
"seeks" : 477,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"ok" : 1.0
}
The query is still taking 700-800 ms to return the data. How can I change the index to make the query run faster? Don't consider "keysExamined" : 533, "seeks" : 477, This data. This is just test data.
Looks like its using an index but only the first field in the index? Also multuKey is false?
A few key points from the explain plan output:
The query addresses the following attributes: siteId, status, creationDate, reportCount, assignee, parent
The winning plan has two stages:
IX_SCAN uses creationDate_1_reportCount_1_label_1, this uses indexed lookups on creationDate and reportCount to identify 56 documents which are then forwarded to the FETCH stage
FETCH receives 56 documents from the IX_SCAN stage and then interrogates these documents to apply the siteId, status, assignee and parent filters. This interrogation causes 37 documents to be discarded resulting in 19 document to be returned.
So, your index covers just 2 of the 6 attributes in your query and the remaining 4 attributes in your query are applied by examining the documents not the index. If you want this query to be fully index covered then create the following index:
db.collection.createIndex(
{siteId: 1, status: 1, creationDate: 1, reportCount: 1, assignee: 1, parent: 1}
)
If you re run with this index in place then you should find that (a) MongoDB chooses this index and (b) the number of documents forwarded by the IX_SCAN stage is the same as the number of documents returned by your find call.
I say "should find" because there are other aspects here which might result in MongoDB choosing a different index e.g. use of $nor and the sort stage (creationDate: 1). I would recommend tweaking the index and running with explain 'on' after each tweak and looking for these key items in the executionStats sub document:
"nReturned"
"totalKeysExamined"
"totalDocsExamined"
A simple rule of thumb is this: the closer totalKeysExamined is to nReturned and the closer totalDocsExamined is to zero ... the better your index coverage.
There is also the question of the cost of an index (in terms of impact on write times and index storage) so I'd suggest considering your non functional requirements - can your desired elapsed times be achieved without full index coverage? If not, then you should proceed with empirical testing but be prepared to tweak your choice in reponse to what the explain() output tells you.

MongoDB query not using intersection indexes

I have MongoDB 3.28 with a collection and few indexes on different fields. I'm executing a query with many filters expecting the intersection indexes to kick in, but for some reason it is not. Additionally it is not showing up in the explain(true) allPlans
The query
{
"$and": [
{
"category": {
"$in": [
1,
5
]
}
},
{
"city": {
"$in": [
"y"
]
}
},
{
"neighbourhood": {
"$in": [
"x",
null
]
}
},
{
"$or": [
{
"price": {
"$lte": 3
}
},
{
"price": null
}
]
}
]
}
db.items.find(query).explain(true)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "xxx.items",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"price" : {
"$eq" : null
}
},
{
"price" : {
"$lte" : 3
}
}
]
},
{
"category" : {
"$in" : [
1,
5
]
}
},
{
"city" : {
"$in" : [
"y"
]
}
},
{
"neighbourhood" : {
"$in" : [
null,
"x"
]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"price" : {
"$eq" : null
}
},
{
"price" : {
"$lte" : 3
}
}
]
},
{
"category" : {
"$in" : [
1,
5
]
}
},
{
"neighbourhood" : {
"$in" : [
null,
"x"
]
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"city" : 1,
"updatedAt" : 1
},
"indexName" : "city_updatedAt",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"city" : [
"[\"y\", \"y\"]"
],
"updatedAt" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"category" : {
"$in" : [
1,
5
]
}
},
{
"city" : {
"$in" : [
"y"
]
}
},
{
"neighbourhood" : {
"$in" : [
null,
"x"
]
}
}
]
},
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$or" : [
{
"price" : {
"$eq" : null
}
},
{
"price" : {
"$lte" : 3
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"price" : 1
},
"indexName" : "price_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"price" : [
"[null, null]",
"[-inf.0, 3.0]"
]
}
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"price" : {
"$eq" : null
}
},
{
"price" : {
"$lte" : 3
}
}
]
},
{
"category" : {
"$in" : [
1,
5
]
}
},
{
"neighbourhood" : {
"$in" : [
null,
"x"
]
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"city" : 1
},
"indexName" : "city_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"city" : [
"[\"y\", \"y\"]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"price" : {
"$eq" : null
}
},
{
"price" : {
"$lte" : 3
}
}
]
},
{
"city" : {
"$in" : [
"y"
]
}
},
{
"neighbourhood" : {
"$in" : [
null,
"x"
]
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"category" : 1,
"updatedAt" : -1
},
"indexName" : "category",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"category" : [
"[1.0, 1.0]",
"[5.0, 5.0]"
],
"updatedAt" : [
"[MaxKey, MinKey]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "xxx",
"port" : 27017,
"version" : "3.2.8",
"gitVersion" : "ed70e33130c977bda0024c125b56d159573dbaf0"
},
"ok" : 1
}
db.items.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "xxx.items"
},
{
"v" : 1,
"unique" : true,
"key" : {
"RecordID" : 1
},
"name" : "RecordID",
"ns" : "xxx.items"
},
{
"v" : 1,
"key" : {
"SubCatID" : 1,
"updatedAt" : -1
},
"name" : "category",
"ns" : "xxx.items"
},
{
"v" : 1,
"key" : {
"updatedAt" : -1
},
"name" : "updatedAt_-1",
"ns" : "xxx.items",
"background" : true
},
{
"v" : 1,
"key" : {
"city" : 1
},
"name" : "city_1",
"ns" : "xxx.items"
},
{
"v" : 1,
"key" : {
"search" : 1
},
"name" : "search_1",
"ns" : "xxx.items",
"background" : true
},
{
"v" : 1,
"key" : {
"rooms" : 1
},
"name" : "rooms_1",
"ns" : "xxx.items",
"background" : true
},
{
"v" : 1,
"key" : {
"price" : 1
},
"name" : "price_1",
"background" : true,
"ns" : "xxx.items"
},
{
"v" : 1,
"key" : {
"city" : 1,
"updatedAt" : 1
},
"name" : "city_updatedAt",
"ns" : "xxx.items",
"background" : true
}
]
Sample record: db.items.find().limit(1)[0]
{
"_id" : ObjectId("568ee714578df40300ac65b0"),
"type" : "z",
"CatID" : 2,
"category" : 5,
"RecordID" : "1469882",
"title" : "x - x x",
"subtitle" : "x/x' - 7 x",
"subtitle2" : "3,700,000",
"someProps1" : false,
"someProps2" : 14,
"subtite3" : "some title",
"type" : 3,
"img" : "x",
"URL" : "y",
"someProps" : 0,
"latitude" : 31.809843,
"longitude" : 35.191562,
"Map_address" : {
"adress" : "x - x x",
"lat" : 31.809843,
"long" : 35.191562
},
"created_at" : ISODate("2016-01-07T22:30:44.425Z"),
"updated_at" : ISODate("2016-01-14T15:53:43.110Z"),
"price" : 3700000,
"rooms" : 7,
"updatedAt" : 1452292244,
"search" : "x - x x 14-01-2016",
"street" : null
}

Mongo $group too slow

I have a mongo db collections of about 168,200,000 documents. I am trying to get the average of a certain field with $group, and I am using $match before the $group in the pipeline to use the index on client.city. But the query is taking about 5 minutes to run, which is very slow.
Here are the things I tried:
db.ar12.aggregate(
{$match:{'client.city':'New York'}},
{'$group':{'_id':'client.city', 'avg':{'$avg':'$length'}}}
)
db.ar12.aggregate(
{$match:{'client.city':'New York'}},
{'$group':{'_id':null, 'avg':{'$avg':'$length'}}}
)
db.ar12.aggregate(
{$match:{'client.city':'New York'}},
{$project: {'length':1}},
{'$group':{'_id':null, 'avg':{'$avg':'$length'}}}
)
All 3 queries take about the same time, number of documents with client.city = to New York is 1,231,672, find({'client.city':'New York').count() takes a second to run
> db.version()
3.2.0
EDIT
Here's the explain result... As for the comment for adding a compound index with length, would that help, although I am not search by length I want all lengthes...
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"client.city" : "New York"
},
"fields" : {
"length" : 1,
"_id" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "clients.ar12",
"indexFilterSet" : false,
"parsedQuery" : {
"client.city" : {
"$eq" : "New York"
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"client.city" : 1
},
"indexName" : "client.city_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"client.city" : [
"[\"New York\", \"New York\"]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$project" : {
"length" : true
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"total" : {
"$avg" : "$length"
}
}
}
],
"ok" : 1
}
EDIT 2
I have added a compound index of client.city and length, but to no avail the speed is still too slow, I tried these 2 queries:
db.ar12.aggregate(
{$match: {'client.city':'New York'}},
{$project: {'client.city':1, 'length':1}},
{'$group':{'_id':'$client.city', 'avg':{'$avg':'$length'}}}
)
The above query wasn't using the compound index, so I tried this to force using it, and still nothing changed:
db.ar12.aggregate(
{$match: { $and : [{'client.city':'New York'}, {'length':{'$gt':0}}]}},
{$project: {'client.city':1, 'length':1}},
{'$group':{'_id':'$client.city', 'avg':{'$avg':'$length'}}}
)
below is the explain of the last query:
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"client.city" : "New York"
},
{
"length" : {
"$gt" : 0
}
}
]
},
"fields" : {
"client.city" : 1,
"length" : 1,
"_id" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "clients.ar12",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"client.city" : {
"$eq" : "New York"
}
},
{
"length" : {
"$gt" : 0
}
}
]
},
"winningPlan" : {
"stage" : "CACHED_PLAN",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"client.city" : 1,
"length" : 1
},
"indexName" : "client.city_1_length_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"client.city" : [
"[\"New York\", \"New York\"]"
],
"length" : [
"(0.0, inf.0]"
]
}
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$project" : {
"client" : {
"city" : true
},
"length" : true
}
},
{
"$group" : {
"_id" : "$client.city",
"avg" : {
"$avg" : "$length"
}
}
}
],
"ok" : 1
}
I have found a work around, length goes from 1 till 70. So what I did is in python I iterated from 1 to 70, and found the count of each length for each city,
db.ar12.find({'client.city':'New York', 'length':i}).count()
which is very fast, then calculated the average in python, it is taking about 2 seconds to run.
This is not the best solution, since I have other queries to run, I don't know if I can find a work around for all of them...