I am working on a project where we will have almost 5 million documents in a collection. And each document's size will be around 18571 bytes having 120 to 150 fields.
I have to return my response in less than 1 second. And mongo query will perform almost 10 to 15 group by in faceted query on maximum 3,00,000 documents.
This is my first time handling this much of data where I have to return responses in real time.
I have implemented indexes and they reduced response time to 5 to 6 seconds but I still need it in less then 1 second.
Below is sample query:
db.sample.aggregation(
"$match":{
"$and":[
{"is_new": <true/false>},
{"brand":<some-brand>},
{"year":{"$gte":<some-year>,"$lte":<some-year>}},
{"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
]
},
{
"$facet":{
"data":[{
"$project":{
"_id":"_id",
"brand":"$brand_name",
"model":"$model_name",
<will have almost 20 keys with lookup>
}
}],
"count":[{"$group":{"_id":"$_id"}},{"$count":"vin_count"}],
"price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
<will have 12-15 group by>
}
}
)
Below is sample document:
{
"_id" : "KNDMC5C11J6394584",
"brand_id" : 22,
"brand_name" : "XYZ",
"abc_id" : 1234567890,
"city" : "Gurgaon, IN",
"fluctuation" : 18,
"created_at" : ISODate("2018-08-17T06:08:12.940Z"),
"release_data" : "2018-06-29",
"seller_name" : "Seller name",
"seller_price" : 34890,
"seller_rating" : 4,
"seller_zip" : "12550",
"feature1" : "ABC",
"feature2" : 3300,
"feature3" : "AB",
"expected_price" : -1,
"exterior_color" : "Unknown",
"registered_dealer" : true,
"registered_brand" : "ABC",
"fluctuation_rate" : 20.700000000000003,
"fluctuation_type" : 2,
"fluc_type_name" : "Something",
"has_patents" : false,
"tested_frequency" : 24,
"interior_color" : "---",
"is_certified" : false,
"is_certified_iso" : false,
"is_featured" : false,
"is_new" : true,
"is_certified_bhel" : false,
"location" : {
"type" : "Point",
"coordinates" : [
-24.08180236816406,
31.507198333740234
]
},
"max_input" : 8,
"feature4" : 3,
"feature5" : 206,
"feature6" : "Something",
"monthly_payment" : 649,
"msrp" : 34890,
"feature7" : false,
"seller_id" : 123567890,
"product_family_name" : "abc",
"product_id" : 15,
"product_name" : "Something",
"reflection" : "Something",
"fluc_id" : 2312,
"fluc_name" : "something something (abc) ac",
"updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
"product_damage_category" : "None",
"year" : 2018,
"damage_check" : "-",
"team_size" : "-",
"Technology" : {
"camera_unit" : true
}
}
Below is the explain output
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"is_new" : true
},
{
"year" : {
"$gte" : 2018,
"$lte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test_collection.col",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"is_new" : {
"$eq" : true
}
},
{
"year" : {
"$lte" : 2018
}
},
{
"year" : {
"$gte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"is_new" : 1,
"year" : 1,
"sp_id" : 1
},
"indexName" : "is_new_1_year_1_sp_id_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"is_new" : [ ],
"year" : [ ],
"sp_id" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"is_new" : [
"[true, true]"
],
"year" : [
"[2018.0, 2018.0]"
],
"sp_id" : [
"[47590.0, 47590.0]",
"[48333.0, 48333.0]",
"[51333.0, 51333.0]",
<range of 40,000 seller_ids>
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$facet" : {
"data" : [
{
"$project" : {
"_id" : "$_id",
"brand_name" : "$brand_name",
"feature1" : "$feature1",
"feature2" : "$feature2",
"feature3" : "$feature3",
"feature4" : "$feature4",
"feature5" : "$feature5",
"feature6" : "$feature6",
"feature7" : "$feature7",
"feature8" : "$feature8",
"feature9" : "$feature9",
"feature10" : "$feature10",
"feature11" : "$feature11",
"feature12" : "$feature12",
"feature13" : "$feature13",
"feature14" : "$feature14",
"feature15" : "$feature15",
"feature16" : "$feature16",
"feature17" : "$feature17",
"feature18" : "$feature18",
"feature19" : "$feature19",
"feature20" : "$feature20"
}
}
],
"count" : [
{
"$group" : {
"_id" : "$_id"
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : false,
"count" : true
}
}
],
"feature1" : [
{
"$match" : {
"feature1" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature1",
"name" : {
"$first" : "$feature1"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature2" : [
{
"$match" : {
"feature2" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature2",
"name" : {
"$first" : "$feature2"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature3" : [
{
"$match" : {
"feature3" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature3",
"name" : {
"$first" : "$feature3"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature4" : [
{
"$match" : {
"feature4" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature4",
"name" : {
"$first" : "$feature4"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature5" : [
{
"$match" : {
"feature5" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature5",
"name" : {
"$first" : "$fuel"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature6" : [
{
"$match" : {
"feature6" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature6",
"name" : {
"$first" : "$feature6"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature7" : [
{
"$match" : {
"feature7" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature7",
"name" : {
"$first" : "$feature7"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature8" : [
{
"$match" : {
"feature8" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature8",
"name" : {
"$first" : "$feature8"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature9" : [
{
"$match" : {
"feature9" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature9",
"name" : {
"$first" : "$feature9"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature10" : [
{
"$match" : {
"feature10" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature10",
"name" : {
"$first" : "$feature10"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : -1
}
}
}
],
"feature11" : [
{
"$match" : {
"feature11" : {
"$exists" : true
}
}
},
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
],
"feature12" : [
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
]
}
}
],
"ok" : 1
}
If this information is not complete for the solution. I will provide more.
I am stuck on this from last 1 month.
Any help would be appreciated.
Related
I have a issue in MongoDB i'm trying to build a very complex aggregate query, and its work almost as i want it, but i still have trobles, and the problems is i need to move a spefiect field so i can use it later.
My aggregate look like this right now.
db.getCollection('travel_sights').aggregate([{
'$match': {
'preview.photo' : {
'$exists':true
},
'_id': {
'$in' : [ObjectId("5b7af9701fbad410e10f32f7")]
}
}
},{
'$unwind' : '$preview.photo'
}, {
'$lookup':{
'from' : 'media_data',
'localField' : '_id',
'foreignField':'bind',
'as':'media'
}
}])
and it will return data like this.
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7affea1fbad441494a663b"),
"sort" : 0
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7b002d1fbad441494a663c"),
"sort" : 0
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7b00351fbad441494a663d"),
"sort" : 0,
"primary" : false
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
and what you can se the last data have preview.photo.primary on it, and this field i want to return when i'm done with my aggregate query.
My final query look like this:
db.getCollection('travel_sights').aggregate([{
'$match': {
'preview.photo' : {
'$exists':true
},
'_id': {
'$in' : [ObjectId("5b7af9701fbad410e10f32f7")]
}
}
},{
'$unwind' : '$preview.photo'
}, {
'$lookup':{
'from' : 'media_data',
'localField' : '_id',
'foreignField':'bind',
'as':'media'
}
},{
'$unwind':'$media'
},{
'$project' : {
'preview' : 1,
'media': 1,
}
}, {
'$group': {
'_id':'$media._id',
'primary': {
'$first':'$preview'
}
}
}])
The problem here is when i want $preview return so i can find the primary about it, its allways only return the first where the value not exists, if i use $push the problem is i get every thing.
is there a way so i can pick the right primary value in my return? have trying $addFields to but whitout eny kind of lock.
Travel_sights data:
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"city_id" : ObjectId("5b6d0cb6222d4c70b803eaeb"),
"activated" : true,
"deleted" : false,
"url" : "url is here",
"name" : "title of it here",
"updated_at" : ISODate("2018-08-22T17:22:27.000Z"),
"content" : "content here",
"preview" : {
"photo" : [
{
"id" : ObjectId("5b7affea1fbad441494a663b"),
"sort" : 0
},
{
"id" : ObjectId("5b7b002d1fbad441494a663c"),
"sort" : 0
},
{
"id" : ObjectId("5b7b00351fbad441494a663d"),
"sort" : 0,
"primary" : true
},
{
"id" : ObjectId("5b7d9baa1fbad410de638bbb"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bae1fbad410e10f32f9"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bb11fbad441494a663e"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bb41fbad4ff97273402"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bb71fbad4ff99527e82"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bbb1fbad410de638bbc"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bbe1fbad410e10f32fa"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bc11fbad441494a663f"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bc41fbad4ff97273403"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bc71fbad4ff99527e83"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bca1fbad410de638bbd"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bcd1fbad441494a6640"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bd01fbad4ff97273404"),
"sort" : 0
}
]
}
}
3 sample foto bind data here:
{
"_id" : ObjectId("5b7affea1fbad441494a663b"),
"file-name" : "55575110311__0F115282-B5A0-4654-AA44-B7DC2C682992.jpeg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
]
}
{
"_id" : ObjectId("5b7b002d1fbad441494a663c"),
"file-name" : "55575110748__E7B07EFD-9F7E-40D6-8B57-38F708E4C0C0.jpeg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
],
"description" : "this is secoudn demo!",
"title" : "demo 3"
}
{
"_id" : ObjectId("5b7b00351fbad441494a663d"),
"file-name" : "paris2.jpg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
],
"description" : "this is a demo1 :)",
"title" : "demo"
}
You can filter out the element from the array where the primary field exists using $filter aggregation and then easily $group with the media._id field and get the $first document value.
Finally your query will be
db.getCollection("travel_sights").aggregate([
{ "$match": {
"preview.photo" : { "$exists":true },
"_id": { "$in" : [ ObjectId("5b7af9701fbad410e10f32f7") ] }
}},
{ "$addFields": {
"preview.photo": {
"$arrayElemAt": [
{ "$filter": {
"input": "$preview.photo",
"as": "photo",
"cond": { "$ne": [ "$$photo.primary", undefined ] }
}}, 0
]
}
}},
{ "$lookup":{
"from" : "media_data",
"localField" : "_id",
"foreignField": "bind",
"as": "media"
}},
{ "$unwind":"$media" },
{ "$project" : { "preview" : 1, "media": 1, }},
{ "$group": {
"_id": "$media._id",
"primary": { "$first": "$preview" }
}}
])
I have a collection containing many product documents that have this structure. Each document represents a cartesian product record with a resulting product price.
{
"name": "PRD_SV_HB2_SVH",
"criterias": [
{
"type": "PREMIUM_REGION",
"value": "COD_RP_KZH"
},
{
"type": "ACCIDENT",
"value": "COD_UZ_EIN"
},
{
"type": "AGE_GROUP",
"value": "COD_LA_G36"
},
{
"type": "PRICE_MODEL",
"value": "COD_TM_HO2"
},
{
"type": "PRICE_TABLE",
"value": "PRT_SU_HB2_V001_2009010"
},
{
"type": "DEDUCTIBLE",
"value": "COD_SB_HO4"
}
],
"price": {
"pricingElements": {
"BASE_PRICE": {
"currency": "CHF",
"amount": 67.8
}
}
},
"priceType": "STANDARD",
"_class": "a.b.c.Product"
}
When querying the collection for a unique cartesian product record I use the following query:
db.product.find({ "name": "PRD_SV_HB2_SVH", "$and": [
{ "criterias": { "$elemMatch": { "value": "COD_LA_G36" } } },
{ "criterias": { "$elemMatch": { "value": "COD_SB_HO4" } } },
{ "criterias": { "$elemMatch": { "value": "COD_UZ_EIN" } } },
{ "criterias": { "$elemMatch": { "value": "COD_RP_KZH" } } },
{ "criterias": { "$elemMatch": { "value": "COD_TM_HO2" } } },
{ "criterias": { "$elemMatch": { "value": "PRT_SU_HB2_V001_2009010" } } }
]
})
The query takes more that 2 seconds to produce a result which is not satisfactory. When I run explain on the same query I can see that MongoDB uses an index name but it does not use the dedicated index name_value for this query.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "productEngine.product",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
},
{
"name" : {
"$eq" : "PRD_SV_HB2_SVH"
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name" : 1
},
"indexName" : "name",
"isMultiKey" : false,
"multiKeyPaths" : {
"name" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name" : [
"[\"PRD_SV_HB2_SVH\", \"PRD_SV_HB2_SVH\"]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name" : 1,
"criteria.value" : 1
},
"indexName" : "name_value",
"isMultiKey" : false,
"multiKeyPaths" : {
"name" : [ ],
"criteria.value" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name" : [
"[\"PRD_SV_HB2_SVH\", \"PRD_SV_HB2_SVH\"]"
],
"criteria.value" : [
"[MinKey, MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name" : 1,
"priceType" : 1,
"criteria.value" : 1
},
"indexName" : "name_priceType_value",
"isMultiKey" : false,
"multiKeyPaths" : {
"name" : [ ],
"priceType" : [ ],
"criteria.value" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name" : [
"[\"PRD_SV_HB2_SVH\", \"PRD_SV_HB2_SVH\"]"
],
"priceType" : [
"[MinKey, MaxKey]"
],
"criteria.value" : [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "1a63040d1b73",
"port" : 27018,
"version" : "3.4.10",
"gitVersion" : "078f28920cb24de0dd479b5ea6c66c644f6326e9"
},
"ok" : 1
}
I currently created the name_value compound index like this:
{"name":1, "criteria.value":1}
Is this the correct way to create a compound index on nested document fields or I am missing something here? Why isn't it using the name_value index?
I have two mongo queries, the only change in a query is merchantId field still both queries giving me different winning plan.
First Query
db.transactions.find({"created":{"$gte":1527465600000,"$lte":1527551999000},"merchantId":940,"additionalInformation.REQUESTOR":{"$ne":"MOTO"},"$or":[{"paymentMode":{"$ne":"UPI"}},{"bankCode":{"$ne":"GTEZ"}}]}).sort({ _id: -1 }).limit(200).explain()
Output of above query
{
"queryPlanner" : {
"plannerVersion" : 1,
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"merchantId" : {
"$eq" : 940
}
},
{
"created" : {
"$lte" : 1527551999000
}
},
{
"created" : {
"$gte" : 1527465600000
}
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : -1
},
"limitAmount" : 200,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"created" : 1,
"merchantId" : 1
},
"indexName" : "created_1_merchantId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"created" : [
"[1527465600000.0, 1527551999000.0]"
],
"merchantId" : [
"[940.0, 940.0]"
]
}
}
}
}
}
},
"serverInfo" : {
},
"ok" : 1
}
Second Query
db.transactions.find({"created":{"$gte":1527465600000,"$lte":1527551999000},"merchantId":1429,"additionalInformation.REQUESTOR":{"$ne":"MOTO"},"$or":[{"paymentMode":{"$ne":"UPI"}},{"bankCode":{"$ne":"GTEZ"}}]}).sort({ _id: -1 }).limit(200).explain()
Output of above query
{
"queryPlanner" : {
"plannerVersion" : 1,
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"merchantId" : {
"$eq" : 1429
}
},
{
"created" : {
"$lte" : 1527551999000
}
},
{
"created" : {
"$gte" : 1527465600000
}
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 200,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"merchantId" : {
"$eq" : 1429
}
},
{
"created" : {
"$lte" : 1527551999000
}
},
{
"created" : {
"$gte" : 1527465600000
}
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
"serverInfo" : {
},
"ok" : 1
}
As you can see only param difference is merchantId, still explain gives different winning plan also IXSCAN also shows different indexes used. In first query created_1_merchantId_1 index is used and in second query id index is used. First query takes 40 seconds to get results while second query gives 1 sec. Quick will be highly appreciated.
I am new to mongo and below query performs really slow with record set over 2 Million records
Query
db.testCollection.aggregate({
$match: {
active: {
$ne: false
}
}
}, {
$group: {
_id: {
productName: "$productName",
model: "$model",
version: "$version",
uid: "$uid"
},
total: {
$sum: 1
}
}
}, {
$project: {
total: 1,
model: "$_id.model",
version: "$_id.version",
uid: "$_id.uid",
productName: "$_id.productName"
}
}, {
$sort: {
model: 1
}
})
explain()
{
"stages" : [
{
"$cursor" : {
"query" : {
"active" : {
"$ne" : false
}
},
"fields" : {
"version" : 1,
"productName" : 1,
"model" : 1,
"uid" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "fms2.device",
"indexFilterSet" : false,
"parsedQuery" : {
"$nor" : [
{
"active" : {
"$eq" : false
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1
},
"indexName" : "active",
"isMultiKey" : false,
"multiKeyPaths" : {
"active" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[MinKey, false)",
"(false, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : {
"productName" : "$productName",
"model" : "$model",
"version" : "$version",
"uid" : "$uid"
},
"total" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : true,
"total" : true,
"model" : "$_id.model",
"version" : "$_id.version",
"uid" : "$_id.uid",
"productName" : "$_id.productName"
}
},
{
"$sort" : {
"sortKey" : {
"model" : 1
}
}
}
],
"ok" : 1
}
Is there a way to optimize this query more ? I had a look into https://docs.mongodb.com/manual/core/aggregation-pipeline-optimization/ as well but most of the stated suggestions are not applicable for this query.
Not sure if it matters, result of this aggregation ends up with only 20-30 records.
I have three queries to get needed data by group by currency and price/profit.
One query takes ~1.3 seconds on 230 000 items.
One item looks like this:
{
"_id" : ObjectId("590e59fca0404a6e5577302b"),
"make_name" : "Peugeot",
"model_name" : "307",
"car_id" : NumberInt("396554354"),
"title" : "Sell",
"description" : "Cool",
"site_name" : "olx.ua",
"first_registration" : ISODate("2002-01-01T00:00:00.000Z"),
"fuel" : "Petrol",
"mileage" : NumberInt("250000"),
"category" : "Limousine",
"horse_power" : null,
"cubic_capacity" : NumberInt("1600"),
"transmission" : "Manual",
"price" : NumberInt("5050"),
"currency" : "USD",
"negotiable" : true,
"profit" : NumberInt("-8"),
"owners_count" : NumberInt("2"),
"color" : NumberInt("3"),
"condition" : NumberInt("4"),
"updated_at" : ISODate("2017-06-01T03:51:34.000Z"),
"rear_camera" : false,
"ABS" : false,
"four_wheel_drive" : false,
"bluetooth" : false,
"board_computer" : false,
"cd_player" : false,
"electric_mirrors" : false,
"electric_windows" : true,
"parking_assistance" : false,
"handsfree" : false,
"guarantee" : false,
"head_up_display" : false,
"has_inspection" : false,
"air_conditioning" : false,
"alloy_wheel_rims" : false,
"multi_func_steering_wheel" : false,
"navigation" : false,
"non_smoking_car" : false,
"panorama_roof" : false,
"particle_filter" : false,
"rain_sensor" : false,
"full_service_history" : false,
"power_steering" : false,
"sunroof" : false,
"seat_heating" : false,
"sports_suspension" : false,
"sports_seats" : false,
"pre_heating" : false,
"start_stop" : false,
"taxi" : false,
"tax_paid" : true,
"cruise_control" : false,
"xenon_headlights" : true,
"security" : false,
"sport_package" : false,
"business" : true,
"damaged" : false,
"price_100" : 5000,
"profit_100" : 0
},
My query is:
db.cars.aggregate([{
'$match': {
'$and': [
{ 'first_registration': { '$gte': ISODate("2000-01-01") } },
{ 'first_registration': { '$lte': ISODate("2017-01-01") } },
{ 'price': { '$gte': 0 } },
{ 'price': { '$lte': 60000 } },
{ 'profit': { '$exists': true } },
{ 'profit': { '$gte': -20000 } },
{ 'profit': { '$lte': 30000 } },
{ 'updated_at': { '$gte': ISODate("2017-06-04") } },
{ 'currency': 'USD' },
{ 'damaged': false }]
}
},
{
'$group': {
'_id': {
'price': {
'$subtract': ['$price',
{ '$mod': ['$price', 100] }]
},
'profit': { '$subtract': ['$profit', { '$mod': ['$profit', 100] }] }
},
'car_id': { '$first': '$car_id' },
'currency': { '$first': '$currency' },
'price': { '$first': '$price' },
'profit': { '$first': '$profit' }
}
}])
I need to get first item in a group of specified price/profit.
Example: 10 cars have price/profit 100-160 USD, so only one car will be returned for such query, because group(data) point for this car is price 100, profit 100. I hope this works this way.
First "match" query takes around 0.012 seconds to get 150 000 items.
So the issue is in group query, I think.
I tried to pre-build math operations subtract and mod:
db.cars.find({
'profit': {'$exists': true},
'price_100': {'$exists': false}, }).snapshot().forEach(function(doc){
db.cars.update({_id:doc._id}, {$set:{
"price_100":doc.price - (doc.price % 100),
"profit_100": doc.profit - (doc.profit % 100)
}});
});
Then my query started to look like:
db.cars.aggregate(
[
{
'$match': {
'$and': [
{ 'first_registration': { '$gte': ISODate("2000-01-01") } },
{ 'first_registration': { '$lte': ISODate("2017-01-01") } },
{ 'price': { '$gte': 0 } },
{ 'price': { '$lte': 60000 } },
{ 'profit': { '$exists': true } },
{ 'profit': { '$gte': -20000 } },
{ 'profit': { '$lte': 30000 } },
{ 'updated_at': { '$gte': ISODate("2017-06-04") } },
{ 'currency': 'USD' },
{ 'damaged': false }]
}
},
{
'$group': {
'_id': {
'price': '$price_100',
'profit': '$profit_100',
},
'car_id': { '$first': '$car_id' },
'currency': { '$first': '$currency' },
'price': { '$first': '$price' },
'profit': { '$first': '$profit' }
}
}])
Unfortunately, it takes 300 milliseconds more than original.
Explain to my query:
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$exists" : true
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"currency" : "USD"
},
{
"damaged" : false
}
]
},
"fields" : {
"car_id" : NumberInt("1"),
"currency" : NumberInt("1"),
"price" : NumberInt("1"),
"price_100" : NumberInt("1"),
"profit" : NumberInt("1"),
"profit_100" : NumberInt("1"),
"_id" : NumberInt("0")
},
"queryPlanner" : {
"plannerVersion" : NumberInt("1"),
"namespace" : "master_test.cars",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"currency" : {
"$eq" : "USD"
}
},
{
"damaged" : {
"$eq" : false
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : [
{
"currency" : {
"$eq" : "USD"
}
},
{
"damaged" : {
"$eq" : false
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : {
"price" : "$price_100",
"profit" : "$profit_100"
},
"car_id" : {
"$first" : "$car_id"
},
"currency" : {
"$first" : "$currency"
},
"price" : {
"$first" : "$price"
},
"profit" : {
"$first" : "$profit"
}
}
}
],
"ok" : 1
}
Why 3 queries one might think? I have 3 currencies: USD, EUR and PLN in my data base, so I do 3 requests. Currently I don't know how to unify the query.
UPDATE FOR NEIL:
After implementing your suggestions, I was able to reduce time from 1.3 seconds to 1 second.
Query looks like:
db.cars.aggregate([{
'$match': {
'$and': [
{ 'first_registration': { '$gte': ISODate("2000-01-01"), '$lte': ISODate("2017-01-01") } },
{ 'price': { '$gte': 0, '$lte': 60000 } },
{ 'profit': { '$exists': true, '$gte': -20000, '$lte': 30000 } },
{ 'updated_at': { '$gte': ISODate("2017-06-04") } },
{ 'currency': 'USD' },
{ 'damaged': false }]
}
},
{
'$group': {
'_id': {
'price': {
'$subtract': ['$price',
{ '$mod': ['$price', 100] }]
},
'profit': { '$subtract': ['$profit', { '$mod': ['$profit', 100] }] }
},
'car_id': { '$first': '$car_id' },
'currency': { '$first': '$currency' },
'price': { '$first': '$price' },
'profit': { '$first': '$profit' }
}
}])
And explain:
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z"),
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0,
"$lte" : 60000
}
},
{
"profit" : {
"$exists" : true,
"$gte" : -20000,
"$lte" : 30000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"currency" : "USD"
},
{
"damaged" : false
}
]
},
"fields" : {
"car_id" : NumberInt("1"),
"currency" : NumberInt("1"),
"price" : NumberInt("1"),
"profit" : NumberInt("1"),
"_id" : NumberInt("0")
},
"queryPlanner" : {
"plannerVersion" : NumberInt("1"),
"namespace" : "master_test.cars",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"currency" : {
"$eq" : "USD"
}
},
{
"damaged" : {
"$eq" : false
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"updated_at" : 1,
"currency" : 1,
"damaged" : 1
},
"indexName" : "updated_at_1_currency_1_damaged_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"updated_at" : [ ],
"currency" : [ ],
"damaged" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt("2"),
"direction" : "forward",
"indexBounds" : {
"updated_at" : [
"[new Date(1496534400000), new Date(9223372036854775807)]"
],
"currency" : [
"[\"USD\", \"USD\"]"
],
"damaged" : [
"[false, false]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : {
"price" : {
"$subtract" : [
"$price",
{
"$mod" : [
"$price",
{
"$const" : 100
}
]
}
]
},
"profit" : {
"$subtract" : [
"$profit",
{
"$mod" : [
"$profit",
{
"$const" : 100
}
]
}
]
}
},
"car_id" : {
"$first" : "$car_id"
},
"currency" : {
"$first" : "$currency"
},
"price" : {
"$first" : "$price"
},
"profit" : {
"$first" : "$profit"
}
}
}
],
"ok" : 1
}
Running on pre-built fields price_100 and profit_100 still 1.3 seconds, but now we have 300 ms less for non-prebuilt query, nice!