I have a collection containing many product documents that have this structure. Each document represents a cartesian product record with a resulting product price.
{
"name": "PRD_SV_HB2_SVH",
"criterias": [
{
"type": "PREMIUM_REGION",
"value": "COD_RP_KZH"
},
{
"type": "ACCIDENT",
"value": "COD_UZ_EIN"
},
{
"type": "AGE_GROUP",
"value": "COD_LA_G36"
},
{
"type": "PRICE_MODEL",
"value": "COD_TM_HO2"
},
{
"type": "PRICE_TABLE",
"value": "PRT_SU_HB2_V001_2009010"
},
{
"type": "DEDUCTIBLE",
"value": "COD_SB_HO4"
}
],
"price": {
"pricingElements": {
"BASE_PRICE": {
"currency": "CHF",
"amount": 67.8
}
}
},
"priceType": "STANDARD",
"_class": "a.b.c.Product"
}
When querying the collection for a unique cartesian product record I use the following query:
db.product.find({ "name": "PRD_SV_HB2_SVH", "$and": [
{ "criterias": { "$elemMatch": { "value": "COD_LA_G36" } } },
{ "criterias": { "$elemMatch": { "value": "COD_SB_HO4" } } },
{ "criterias": { "$elemMatch": { "value": "COD_UZ_EIN" } } },
{ "criterias": { "$elemMatch": { "value": "COD_RP_KZH" } } },
{ "criterias": { "$elemMatch": { "value": "COD_TM_HO2" } } },
{ "criterias": { "$elemMatch": { "value": "PRT_SU_HB2_V001_2009010" } } }
]
})
The query takes more that 2 seconds to produce a result which is not satisfactory. When I run explain on the same query I can see that MongoDB uses an index name but it does not use the dedicated index name_value for this query.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "productEngine.product",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
},
{
"name" : {
"$eq" : "PRD_SV_HB2_SVH"
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name" : 1
},
"indexName" : "name",
"isMultiKey" : false,
"multiKeyPaths" : {
"name" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name" : [
"[\"PRD_SV_HB2_SVH\", \"PRD_SV_HB2_SVH\"]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name" : 1,
"criteria.value" : 1
},
"indexName" : "name_value",
"isMultiKey" : false,
"multiKeyPaths" : {
"name" : [ ],
"criteria.value" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name" : [
"[\"PRD_SV_HB2_SVH\", \"PRD_SV_HB2_SVH\"]"
],
"criteria.value" : [
"[MinKey, MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_LA_G36"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_SB_HO4"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_UZ_EIN"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_RP_KZH"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "COD_TM_HO2"
}
}
}
},
{
"criterias" : {
"$elemMatch" : {
"value" : {
"$eq" : "PRT_SU_HB2_V001_2009010"
}
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name" : 1,
"priceType" : 1,
"criteria.value" : 1
},
"indexName" : "name_priceType_value",
"isMultiKey" : false,
"multiKeyPaths" : {
"name" : [ ],
"priceType" : [ ],
"criteria.value" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name" : [
"[\"PRD_SV_HB2_SVH\", \"PRD_SV_HB2_SVH\"]"
],
"priceType" : [
"[MinKey, MaxKey]"
],
"criteria.value" : [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "1a63040d1b73",
"port" : 27018,
"version" : "3.4.10",
"gitVersion" : "078f28920cb24de0dd479b5ea6c66c644f6326e9"
},
"ok" : 1
}
I currently created the name_value compound index like this:
{"name":1, "criteria.value":1}
Is this the correct way to create a compound index on nested document fields or I am missing something here? Why isn't it using the name_value index?
Related
I am working on a project where we will have almost 5 million documents in a collection. And each document's size will be around 18571 bytes having 120 to 150 fields.
I have to return my response in less than 1 second. And mongo query will perform almost 10 to 15 group by in faceted query on maximum 3,00,000 documents.
This is my first time handling this much of data where I have to return responses in real time.
I have implemented indexes and they reduced response time to 5 to 6 seconds but I still need it in less then 1 second.
Below is sample query:
db.sample.aggregation(
"$match":{
"$and":[
{"is_new": <true/false>},
{"brand":<some-brand>},
{"year":{"$gte":<some-year>,"$lte":<some-year>}},
{"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
]
},
{
"$facet":{
"data":[{
"$project":{
"_id":"_id",
"brand":"$brand_name",
"model":"$model_name",
<will have almost 20 keys with lookup>
}
}],
"count":[{"$group":{"_id":"$_id"}},{"$count":"vin_count"}],
"price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
<will have 12-15 group by>
}
}
)
Below is sample document:
{
"_id" : "KNDMC5C11J6394584",
"brand_id" : 22,
"brand_name" : "XYZ",
"abc_id" : 1234567890,
"city" : "Gurgaon, IN",
"fluctuation" : 18,
"created_at" : ISODate("2018-08-17T06:08:12.940Z"),
"release_data" : "2018-06-29",
"seller_name" : "Seller name",
"seller_price" : 34890,
"seller_rating" : 4,
"seller_zip" : "12550",
"feature1" : "ABC",
"feature2" : 3300,
"feature3" : "AB",
"expected_price" : -1,
"exterior_color" : "Unknown",
"registered_dealer" : true,
"registered_brand" : "ABC",
"fluctuation_rate" : 20.700000000000003,
"fluctuation_type" : 2,
"fluc_type_name" : "Something",
"has_patents" : false,
"tested_frequency" : 24,
"interior_color" : "---",
"is_certified" : false,
"is_certified_iso" : false,
"is_featured" : false,
"is_new" : true,
"is_certified_bhel" : false,
"location" : {
"type" : "Point",
"coordinates" : [
-24.08180236816406,
31.507198333740234
]
},
"max_input" : 8,
"feature4" : 3,
"feature5" : 206,
"feature6" : "Something",
"monthly_payment" : 649,
"msrp" : 34890,
"feature7" : false,
"seller_id" : 123567890,
"product_family_name" : "abc",
"product_id" : 15,
"product_name" : "Something",
"reflection" : "Something",
"fluc_id" : 2312,
"fluc_name" : "something something (abc) ac",
"updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
"product_damage_category" : "None",
"year" : 2018,
"damage_check" : "-",
"team_size" : "-",
"Technology" : {
"camera_unit" : true
}
}
Below is the explain output
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"is_new" : true
},
{
"year" : {
"$gte" : 2018,
"$lte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test_collection.col",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"is_new" : {
"$eq" : true
}
},
{
"year" : {
"$lte" : 2018
}
},
{
"year" : {
"$gte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"is_new" : 1,
"year" : 1,
"sp_id" : 1
},
"indexName" : "is_new_1_year_1_sp_id_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"is_new" : [ ],
"year" : [ ],
"sp_id" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"is_new" : [
"[true, true]"
],
"year" : [
"[2018.0, 2018.0]"
],
"sp_id" : [
"[47590.0, 47590.0]",
"[48333.0, 48333.0]",
"[51333.0, 51333.0]",
<range of 40,000 seller_ids>
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$facet" : {
"data" : [
{
"$project" : {
"_id" : "$_id",
"brand_name" : "$brand_name",
"feature1" : "$feature1",
"feature2" : "$feature2",
"feature3" : "$feature3",
"feature4" : "$feature4",
"feature5" : "$feature5",
"feature6" : "$feature6",
"feature7" : "$feature7",
"feature8" : "$feature8",
"feature9" : "$feature9",
"feature10" : "$feature10",
"feature11" : "$feature11",
"feature12" : "$feature12",
"feature13" : "$feature13",
"feature14" : "$feature14",
"feature15" : "$feature15",
"feature16" : "$feature16",
"feature17" : "$feature17",
"feature18" : "$feature18",
"feature19" : "$feature19",
"feature20" : "$feature20"
}
}
],
"count" : [
{
"$group" : {
"_id" : "$_id"
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : false,
"count" : true
}
}
],
"feature1" : [
{
"$match" : {
"feature1" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature1",
"name" : {
"$first" : "$feature1"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature2" : [
{
"$match" : {
"feature2" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature2",
"name" : {
"$first" : "$feature2"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature3" : [
{
"$match" : {
"feature3" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature3",
"name" : {
"$first" : "$feature3"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature4" : [
{
"$match" : {
"feature4" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature4",
"name" : {
"$first" : "$feature4"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature5" : [
{
"$match" : {
"feature5" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature5",
"name" : {
"$first" : "$fuel"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature6" : [
{
"$match" : {
"feature6" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature6",
"name" : {
"$first" : "$feature6"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature7" : [
{
"$match" : {
"feature7" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature7",
"name" : {
"$first" : "$feature7"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature8" : [
{
"$match" : {
"feature8" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature8",
"name" : {
"$first" : "$feature8"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature9" : [
{
"$match" : {
"feature9" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature9",
"name" : {
"$first" : "$feature9"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature10" : [
{
"$match" : {
"feature10" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature10",
"name" : {
"$first" : "$feature10"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : -1
}
}
}
],
"feature11" : [
{
"$match" : {
"feature11" : {
"$exists" : true
}
}
},
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
],
"feature12" : [
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
]
}
}
],
"ok" : 1
}
If this information is not complete for the solution. I will provide more.
I am stuck on this from last 1 month.
Any help would be appreciated.
I have two mongo queries, the only change in a query is merchantId field still both queries giving me different winning plan.
First Query
db.transactions.find({"created":{"$gte":1527465600000,"$lte":1527551999000},"merchantId":940,"additionalInformation.REQUESTOR":{"$ne":"MOTO"},"$or":[{"paymentMode":{"$ne":"UPI"}},{"bankCode":{"$ne":"GTEZ"}}]}).sort({ _id: -1 }).limit(200).explain()
Output of above query
{
"queryPlanner" : {
"plannerVersion" : 1,
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"merchantId" : {
"$eq" : 940
}
},
{
"created" : {
"$lte" : 1527551999000
}
},
{
"created" : {
"$gte" : 1527465600000
}
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : -1
},
"limitAmount" : 200,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"created" : 1,
"merchantId" : 1
},
"indexName" : "created_1_merchantId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"created" : [
"[1527465600000.0, 1527551999000.0]"
],
"merchantId" : [
"[940.0, 940.0]"
]
}
}
}
}
}
},
"serverInfo" : {
},
"ok" : 1
}
Second Query
db.transactions.find({"created":{"$gte":1527465600000,"$lte":1527551999000},"merchantId":1429,"additionalInformation.REQUESTOR":{"$ne":"MOTO"},"$or":[{"paymentMode":{"$ne":"UPI"}},{"bankCode":{"$ne":"GTEZ"}}]}).sort({ _id: -1 }).limit(200).explain()
Output of above query
{
"queryPlanner" : {
"plannerVersion" : 1,
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"merchantId" : {
"$eq" : 1429
}
},
{
"created" : {
"$lte" : 1527551999000
}
},
{
"created" : {
"$gte" : 1527465600000
}
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 200,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$or" : [
{
"$not" : {
"bankCode" : {
"$eq" : "GTEZ"
}
}
},
{
"$not" : {
"paymentMode" : {
"$eq" : "UPI"
}
}
}
]
},
{
"merchantId" : {
"$eq" : 1429
}
},
{
"created" : {
"$lte" : 1527551999000
}
},
{
"created" : {
"$gte" : 1527465600000
}
},
{
"$not" : {
"additionalInformation.REQUESTOR" : {
"$eq" : "MOTO"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "backward",
"indexBounds" : {
"_id" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
"serverInfo" : {
},
"ok" : 1
}
As you can see only param difference is merchantId, still explain gives different winning plan also IXSCAN also shows different indexes used. In first query created_1_merchantId_1 index is used and in second query id index is used. First query takes 40 seconds to get results while second query gives 1 sec. Quick will be highly appreciated.
I am new to mongo and below query performs really slow with record set over 2 Million records
Query
db.testCollection.aggregate({
$match: {
active: {
$ne: false
}
}
}, {
$group: {
_id: {
productName: "$productName",
model: "$model",
version: "$version",
uid: "$uid"
},
total: {
$sum: 1
}
}
}, {
$project: {
total: 1,
model: "$_id.model",
version: "$_id.version",
uid: "$_id.uid",
productName: "$_id.productName"
}
}, {
$sort: {
model: 1
}
})
explain()
{
"stages" : [
{
"$cursor" : {
"query" : {
"active" : {
"$ne" : false
}
},
"fields" : {
"version" : 1,
"productName" : 1,
"model" : 1,
"uid" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "fms2.device",
"indexFilterSet" : false,
"parsedQuery" : {
"$nor" : [
{
"active" : {
"$eq" : false
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"active" : 1
},
"indexName" : "active",
"isMultiKey" : false,
"multiKeyPaths" : {
"active" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"active" : [
"[MinKey, false)",
"(false, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : {
"productName" : "$productName",
"model" : "$model",
"version" : "$version",
"uid" : "$uid"
},
"total" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : true,
"total" : true,
"model" : "$_id.model",
"version" : "$_id.version",
"uid" : "$_id.uid",
"productName" : "$_id.productName"
}
},
{
"$sort" : {
"sortKey" : {
"model" : 1
}
}
}
],
"ok" : 1
}
Is there a way to optimize this query more ? I had a look into https://docs.mongodb.com/manual/core/aggregation-pipeline-optimization/ as well but most of the stated suggestions are not applicable for this query.
Not sure if it matters, result of this aggregation ends up with only 20-30 records.
I have three queries to get needed data by group by currency and price/profit.
One query takes ~1.3 seconds on 230 000 items.
One item looks like this:
{
"_id" : ObjectId("590e59fca0404a6e5577302b"),
"make_name" : "Peugeot",
"model_name" : "307",
"car_id" : NumberInt("396554354"),
"title" : "Sell",
"description" : "Cool",
"site_name" : "olx.ua",
"first_registration" : ISODate("2002-01-01T00:00:00.000Z"),
"fuel" : "Petrol",
"mileage" : NumberInt("250000"),
"category" : "Limousine",
"horse_power" : null,
"cubic_capacity" : NumberInt("1600"),
"transmission" : "Manual",
"price" : NumberInt("5050"),
"currency" : "USD",
"negotiable" : true,
"profit" : NumberInt("-8"),
"owners_count" : NumberInt("2"),
"color" : NumberInt("3"),
"condition" : NumberInt("4"),
"updated_at" : ISODate("2017-06-01T03:51:34.000Z"),
"rear_camera" : false,
"ABS" : false,
"four_wheel_drive" : false,
"bluetooth" : false,
"board_computer" : false,
"cd_player" : false,
"electric_mirrors" : false,
"electric_windows" : true,
"parking_assistance" : false,
"handsfree" : false,
"guarantee" : false,
"head_up_display" : false,
"has_inspection" : false,
"air_conditioning" : false,
"alloy_wheel_rims" : false,
"multi_func_steering_wheel" : false,
"navigation" : false,
"non_smoking_car" : false,
"panorama_roof" : false,
"particle_filter" : false,
"rain_sensor" : false,
"full_service_history" : false,
"power_steering" : false,
"sunroof" : false,
"seat_heating" : false,
"sports_suspension" : false,
"sports_seats" : false,
"pre_heating" : false,
"start_stop" : false,
"taxi" : false,
"tax_paid" : true,
"cruise_control" : false,
"xenon_headlights" : true,
"security" : false,
"sport_package" : false,
"business" : true,
"damaged" : false,
"price_100" : 5000,
"profit_100" : 0
},
My query is:
db.cars.aggregate([{
'$match': {
'$and': [
{ 'first_registration': { '$gte': ISODate("2000-01-01") } },
{ 'first_registration': { '$lte': ISODate("2017-01-01") } },
{ 'price': { '$gte': 0 } },
{ 'price': { '$lte': 60000 } },
{ 'profit': { '$exists': true } },
{ 'profit': { '$gte': -20000 } },
{ 'profit': { '$lte': 30000 } },
{ 'updated_at': { '$gte': ISODate("2017-06-04") } },
{ 'currency': 'USD' },
{ 'damaged': false }]
}
},
{
'$group': {
'_id': {
'price': {
'$subtract': ['$price',
{ '$mod': ['$price', 100] }]
},
'profit': { '$subtract': ['$profit', { '$mod': ['$profit', 100] }] }
},
'car_id': { '$first': '$car_id' },
'currency': { '$first': '$currency' },
'price': { '$first': '$price' },
'profit': { '$first': '$profit' }
}
}])
I need to get first item in a group of specified price/profit.
Example: 10 cars have price/profit 100-160 USD, so only one car will be returned for such query, because group(data) point for this car is price 100, profit 100. I hope this works this way.
First "match" query takes around 0.012 seconds to get 150 000 items.
So the issue is in group query, I think.
I tried to pre-build math operations subtract and mod:
db.cars.find({
'profit': {'$exists': true},
'price_100': {'$exists': false}, }).snapshot().forEach(function(doc){
db.cars.update({_id:doc._id}, {$set:{
"price_100":doc.price - (doc.price % 100),
"profit_100": doc.profit - (doc.profit % 100)
}});
});
Then my query started to look like:
db.cars.aggregate(
[
{
'$match': {
'$and': [
{ 'first_registration': { '$gte': ISODate("2000-01-01") } },
{ 'first_registration': { '$lte': ISODate("2017-01-01") } },
{ 'price': { '$gte': 0 } },
{ 'price': { '$lte': 60000 } },
{ 'profit': { '$exists': true } },
{ 'profit': { '$gte': -20000 } },
{ 'profit': { '$lte': 30000 } },
{ 'updated_at': { '$gte': ISODate("2017-06-04") } },
{ 'currency': 'USD' },
{ 'damaged': false }]
}
},
{
'$group': {
'_id': {
'price': '$price_100',
'profit': '$profit_100',
},
'car_id': { '$first': '$car_id' },
'currency': { '$first': '$currency' },
'price': { '$first': '$price' },
'profit': { '$first': '$profit' }
}
}])
Unfortunately, it takes 300 milliseconds more than original.
Explain to my query:
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$exists" : true
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"currency" : "USD"
},
{
"damaged" : false
}
]
},
"fields" : {
"car_id" : NumberInt("1"),
"currency" : NumberInt("1"),
"price" : NumberInt("1"),
"price_100" : NumberInt("1"),
"profit" : NumberInt("1"),
"profit_100" : NumberInt("1"),
"_id" : NumberInt("0")
},
"queryPlanner" : {
"plannerVersion" : NumberInt("1"),
"namespace" : "master_test.cars",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"currency" : {
"$eq" : "USD"
}
},
{
"damaged" : {
"$eq" : false
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : [
{
"currency" : {
"$eq" : "USD"
}
},
{
"damaged" : {
"$eq" : false
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : {
"price" : "$price_100",
"profit" : "$profit_100"
},
"car_id" : {
"$first" : "$car_id"
},
"currency" : {
"$first" : "$currency"
},
"price" : {
"$first" : "$price"
},
"profit" : {
"$first" : "$profit"
}
}
}
],
"ok" : 1
}
Why 3 queries one might think? I have 3 currencies: USD, EUR and PLN in my data base, so I do 3 requests. Currently I don't know how to unify the query.
UPDATE FOR NEIL:
After implementing your suggestions, I was able to reduce time from 1.3 seconds to 1 second.
Query looks like:
db.cars.aggregate([{
'$match': {
'$and': [
{ 'first_registration': { '$gte': ISODate("2000-01-01"), '$lte': ISODate("2017-01-01") } },
{ 'price': { '$gte': 0, '$lte': 60000 } },
{ 'profit': { '$exists': true, '$gte': -20000, '$lte': 30000 } },
{ 'updated_at': { '$gte': ISODate("2017-06-04") } },
{ 'currency': 'USD' },
{ 'damaged': false }]
}
},
{
'$group': {
'_id': {
'price': {
'$subtract': ['$price',
{ '$mod': ['$price', 100] }]
},
'profit': { '$subtract': ['$profit', { '$mod': ['$profit', 100] }] }
},
'car_id': { '$first': '$car_id' },
'currency': { '$first': '$currency' },
'price': { '$first': '$price' },
'profit': { '$first': '$profit' }
}
}])
And explain:
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z"),
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0,
"$lte" : 60000
}
},
{
"profit" : {
"$exists" : true,
"$gte" : -20000,
"$lte" : 30000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"currency" : "USD"
},
{
"damaged" : false
}
]
},
"fields" : {
"car_id" : NumberInt("1"),
"currency" : NumberInt("1"),
"price" : NumberInt("1"),
"profit" : NumberInt("1"),
"_id" : NumberInt("0")
},
"queryPlanner" : {
"plannerVersion" : NumberInt("1"),
"namespace" : "master_test.cars",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"currency" : {
"$eq" : "USD"
}
},
{
"damaged" : {
"$eq" : false
}
},
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"updated_at" : {
"$gte" : ISODate("2017-06-04T00:00:00.000Z")
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"first_registration" : {
"$lte" : ISODate("2017-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$lte" : 60000
}
},
{
"profit" : {
"$lte" : 30000
}
},
{
"first_registration" : {
"$gte" : ISODate("2000-01-01T00:00:00.000Z")
}
},
{
"price" : {
"$gte" : 0
}
},
{
"profit" : {
"$gte" : -20000
}
},
{
"profit" : {
"$exists" : true
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"updated_at" : 1,
"currency" : 1,
"damaged" : 1
},
"indexName" : "updated_at_1_currency_1_damaged_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"updated_at" : [ ],
"currency" : [ ],
"damaged" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt("2"),
"direction" : "forward",
"indexBounds" : {
"updated_at" : [
"[new Date(1496534400000), new Date(9223372036854775807)]"
],
"currency" : [
"[\"USD\", \"USD\"]"
],
"damaged" : [
"[false, false]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : {
"price" : {
"$subtract" : [
"$price",
{
"$mod" : [
"$price",
{
"$const" : 100
}
]
}
]
},
"profit" : {
"$subtract" : [
"$profit",
{
"$mod" : [
"$profit",
{
"$const" : 100
}
]
}
]
}
},
"car_id" : {
"$first" : "$car_id"
},
"currency" : {
"$first" : "$currency"
},
"price" : {
"$first" : "$price"
},
"profit" : {
"$first" : "$profit"
}
}
}
],
"ok" : 1
}
Running on pre-built fields price_100 and profit_100 still 1.3 seconds, but now we have 300 ms less for non-prebuilt query, nice!
My Aggregation is pretty slow. I've already made it a little faster (from 3000 ms to 200ms) by using the match statement before the unwind statement. Is there any other way to improve my aggregation? In the end there'll be just one result (the last one based on timestamp). The unwind part is the longest operation if i'm right yet i really do need this.
db.CpuInfo.aggregate([
{"$match":
{
"timestamp": {"$gte":1464764400},
'hostname': 'baklap4'
}
},
{ "$unwind": "$cpuList" },
{ "$group":
{ "_id":
{ "interval":
{ "$subtract": [
"$timestamp",
{ "$mod": [ "$timestamp", 60 * 5 ] }
]}
},
"avgCPULoad": { "$avg": "$cpuList.load" },
"timestamp": { "$max": "$timestamp" }
}
},
{ "$project": { "_id": 0, "avgCPULoad": 1, "timestamp": 1 } },
{$sort: {'timestamp': -1}},
{$limit: 1}
])
The items in my collection are all simular to this:
{
"_id": ObjectId("574d6175da461e77030041b7"),
"hostname": "VPS",
"timestamp": NumberLong(1460040691),
"cpuCores": NumberLong(2),
"cpuList": [
{
"name": "cpu1",
"load": 3.4
},
{
"name": "cpu2",
"load": 0.7
}
]
}
I've added the explain option to my aggregation and this is the result:
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"timestamp" : {
"$gte" : 1464732000
},
"hostname" : "baklap4"
},
"fields" : {
"cpuList" : 1,
"timestamp" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "prototyping.CpuInfo",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"hostname" : {
"$eq" : "baklap4"
}
},
{
"timestamp" : {
"$gte" : 1464732000
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"hostname" : {
"$eq" : "baklap4"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"timestamp" : NumberLong(1)
},
"indexName" : "timestamp_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"timestamp" : [
"[1464732000.0, inf.0]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$unwind" : {
"path" : "$cpuList"
}
},
{
"$group" : {
"_id" : {
"interval" : {
"$subtract" : [
"$timestamp",
{
"$mod" : [
"$timestamp",
{
"$const" : 300
}
]
}
]
}
},
"avgCPULoad" : {
"$avg" : "$cpuList.load"
},
"timestamp" : {
"$max" : "$timestamp"
}
}
},
{
"$project" : {
"_id" : false,
"timestamp" : true,
"avgCPULoad" : true
}
},
{
"$sort" : {
"sortKey" : {
"timestamp" : -1
},
"limit" : NumberLong(1)
}
}
],
"ok" : 1
}
When i Look up in my table i see that Timestamp and Id are indexed:
db.CpuInfo.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "prototyping.CpuInfo"
},
{
"v" : 1,
"key" : {
"timestamp" : NumberLong(1)
},
"name" : "timestamp_1",
"ns" : "prototyping.CpuInfo",
"sparse" : false
}
]