mongodb aggregate query not using index sort - mongodb

hi my document example as below.
{
"_id" : ObjectId("5ee2234fde52e50d9520f6d7"),
"ClientMessageId" : "EAAB38DD88551BF180FA005056BB48A4",
"ClientMessageType" : "UpdateSalesModelAndVendor",
"Sku" : "HBV00000XXXXX",
"TraceId" : "EAAB38DD885504F180FA005056BB48A4",
"Data" : [
{
"_id" : ObjectId("5ee2234fde52e50d9520f6d8"),
"Sku" : "HBV00000XXXXX",
"IsActive" : false,
"Version" : NumberInt(1),
"CommandName" : "UpdateSalesModelAndVendorCommand",
"TraceId" : "EAAB38DD885504F180FA005056BB48A4",
"ClientMessageType" : "UpdateSalesModelAndVendor",
"ClientMessageId" : "EAAB38DD88551BF180FA005056BB48A4",
"ClientUsername" : "USER",
"ClientDateTime" : ISODate("2020-06-10T16:38:55.000+0000"),
"ReceivedAt" : ISODate("2020-06-10T16:42:23.992+0000"),
"InfoMessages" : null,
"WarningMessages" : null,
"ErrorMessages" : [
]
}
],
"MessageType" : "Listing.RetailListingCreated",
"__v" : NumberInt(0)
}
I have two index like this
[
{
"v" : 2.0,
"key" : {
"_id" : 1.0
},
"name" : "_id_"
},
{
"v" : 2.0,
"key" : {
"Data.Sku" : 1.0
},
"name" : "Data.Sku_1",
"background" : true
},
{
"v" : 2.0,
"key" : {
"Data.Sku" : 1.0,
"Data.ReceivedAt" : -1.0
},
"name" : "Data.Sku_1_Data.ReceivedAt_-1",
"background" : true
}
]
this query get 0.8 ms (index used)
db.eventlogs.aggregate([
{ $sort: { "Data.Sku": -1 } },
{ $unwind: "$Data" },
{ "$skip": 0 }, { "$limit": 50 },
],
{ allowDiskUse: true }
)
but this query get 2 minutes over ( not index used )
db.eventlogs.aggregate([
{ $sort: { "Data.ReceivedAt": -1 } },
{ $unwind: "$Data" },
{ "$skip": 0 }, { "$limit": 50 },
],
{ allowDiskUse: true }
)
How can solve this problem. Should i create new index for ReceivedAt ?
db.eventlogs.createIndex( { "Data.ReceivedAt": -1}, { "background": true});

Related

MongoDB slow facet query using multiple group by

I am working on a project where we will have almost 5 million documents in a collection. And each document's size will be around 18571 bytes having 120 to 150 fields.
I have to return my response in less than 1 second. And mongo query will perform almost 10 to 15 group by in faceted query on maximum 3,00,000 documents.
This is my first time handling this much of data where I have to return responses in real time.
I have implemented indexes and they reduced response time to 5 to 6 seconds but I still need it in less then 1 second.
Below is sample query:
db.sample.aggregation(
"$match":{
"$and":[
{"is_new": <true/false>},
{"brand":<some-brand>},
{"year":{"$gte":<some-year>,"$lte":<some-year>}},
{"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
]
},
{
"$facet":{
"data":[{
"$project":{
"_id":"_id",
"brand":"$brand_name",
"model":"$model_name",
<will have almost 20 keys with lookup>
}
}],
"count":[{"$group":{"_id":"$_id"}},{"$count":"vin_count"}],
"price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
<will have 12-15 group by>
}
}
)
Below is sample document:
{
"_id" : "KNDMC5C11J6394584",
"brand_id" : 22,
"brand_name" : "XYZ",
"abc_id" : 1234567890,
"city" : "Gurgaon, IN",
"fluctuation" : 18,
"created_at" : ISODate("2018-08-17T06:08:12.940Z"),
"release_data" : "2018-06-29",
"seller_name" : "Seller name",
"seller_price" : 34890,
"seller_rating" : 4,
"seller_zip" : "12550",
"feature1" : "ABC",
"feature2" : 3300,
"feature3" : "AB",
"expected_price" : -1,
"exterior_color" : "Unknown",
"registered_dealer" : true,
"registered_brand" : "ABC",
"fluctuation_rate" : 20.700000000000003,
"fluctuation_type" : 2,
"fluc_type_name" : "Something",
"has_patents" : false,
"tested_frequency" : 24,
"interior_color" : "---",
"is_certified" : false,
"is_certified_iso" : false,
"is_featured" : false,
"is_new" : true,
"is_certified_bhel" : false,
"location" : {
"type" : "Point",
"coordinates" : [
-24.08180236816406,
31.507198333740234
]
},
"max_input" : 8,
"feature4" : 3,
"feature5" : 206,
"feature6" : "Something",
"monthly_payment" : 649,
"msrp" : 34890,
"feature7" : false,
"seller_id" : 123567890,
"product_family_name" : "abc",
"product_id" : 15,
"product_name" : "Something",
"reflection" : "Something",
"fluc_id" : 2312,
"fluc_name" : "something something (abc) ac",
"updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
"product_damage_category" : "None",
"year" : 2018,
"damage_check" : "-",
"team_size" : "-",
"Technology" : {
"camera_unit" : true
}
}
Below is the explain output
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"is_new" : true
},
{
"year" : {
"$gte" : 2018,
"$lte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test_collection.col",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"is_new" : {
"$eq" : true
}
},
{
"year" : {
"$lte" : 2018
}
},
{
"year" : {
"$gte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"is_new" : 1,
"year" : 1,
"sp_id" : 1
},
"indexName" : "is_new_1_year_1_sp_id_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"is_new" : [ ],
"year" : [ ],
"sp_id" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"is_new" : [
"[true, true]"
],
"year" : [
"[2018.0, 2018.0]"
],
"sp_id" : [
"[47590.0, 47590.0]",
"[48333.0, 48333.0]",
"[51333.0, 51333.0]",
<range of 40,000 seller_ids>
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$facet" : {
"data" : [
{
"$project" : {
"_id" : "$_id",
"brand_name" : "$brand_name",
"feature1" : "$feature1",
"feature2" : "$feature2",
"feature3" : "$feature3",
"feature4" : "$feature4",
"feature5" : "$feature5",
"feature6" : "$feature6",
"feature7" : "$feature7",
"feature8" : "$feature8",
"feature9" : "$feature9",
"feature10" : "$feature10",
"feature11" : "$feature11",
"feature12" : "$feature12",
"feature13" : "$feature13",
"feature14" : "$feature14",
"feature15" : "$feature15",
"feature16" : "$feature16",
"feature17" : "$feature17",
"feature18" : "$feature18",
"feature19" : "$feature19",
"feature20" : "$feature20"
}
}
],
"count" : [
{
"$group" : {
"_id" : "$_id"
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : false,
"count" : true
}
}
],
"feature1" : [
{
"$match" : {
"feature1" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature1",
"name" : {
"$first" : "$feature1"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature2" : [
{
"$match" : {
"feature2" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature2",
"name" : {
"$first" : "$feature2"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature3" : [
{
"$match" : {
"feature3" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature3",
"name" : {
"$first" : "$feature3"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature4" : [
{
"$match" : {
"feature4" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature4",
"name" : {
"$first" : "$feature4"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature5" : [
{
"$match" : {
"feature5" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature5",
"name" : {
"$first" : "$fuel"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature6" : [
{
"$match" : {
"feature6" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature6",
"name" : {
"$first" : "$feature6"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature7" : [
{
"$match" : {
"feature7" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature7",
"name" : {
"$first" : "$feature7"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature8" : [
{
"$match" : {
"feature8" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature8",
"name" : {
"$first" : "$feature8"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature9" : [
{
"$match" : {
"feature9" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature9",
"name" : {
"$first" : "$feature9"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature10" : [
{
"$match" : {
"feature10" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature10",
"name" : {
"$first" : "$feature10"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : -1
}
}
}
],
"feature11" : [
{
"$match" : {
"feature11" : {
"$exists" : true
}
}
},
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
],
"feature12" : [
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
]
}
}
],
"ok" : 1
}
If this information is not complete for the solution. I will provide more.
I am stuck on this from last 1 month.
Any help would be appreciated.

Return specific array value field in aggregate

I have a issue in MongoDB i'm trying to build a very complex aggregate query, and its work almost as i want it, but i still have trobles, and the problems is i need to move a spefiect field so i can use it later.
My aggregate look like this right now.
db.getCollection('travel_sights').aggregate([{
'$match': {
'preview.photo' : {
'$exists':true
},
'_id': {
'$in' : [ObjectId("5b7af9701fbad410e10f32f7")]
}
}
},{
'$unwind' : '$preview.photo'
}, {
'$lookup':{
'from' : 'media_data',
'localField' : '_id',
'foreignField':'bind',
'as':'media'
}
}])
and it will return data like this.
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7affea1fbad441494a663b"),
"sort" : 0
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7b002d1fbad441494a663c"),
"sort" : 0
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7b00351fbad441494a663d"),
"sort" : 0,
"primary" : false
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
and what you can se the last data have preview.photo.primary on it, and this field i want to return when i'm done with my aggregate query.
My final query look like this:
db.getCollection('travel_sights').aggregate([{
'$match': {
'preview.photo' : {
'$exists':true
},
'_id': {
'$in' : [ObjectId("5b7af9701fbad410e10f32f7")]
}
}
},{
'$unwind' : '$preview.photo'
}, {
'$lookup':{
'from' : 'media_data',
'localField' : '_id',
'foreignField':'bind',
'as':'media'
}
},{
'$unwind':'$media'
},{
'$project' : {
'preview' : 1,
'media': 1,
}
}, {
'$group': {
'_id':'$media._id',
'primary': {
'$first':'$preview'
}
}
}])
The problem here is when i want $preview return so i can find the primary about it, its allways only return the first where the value not exists, if i use $push the problem is i get every thing.
is there a way so i can pick the right primary value in my return? have trying $addFields to but whitout eny kind of lock.
Travel_sights data:
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"city_id" : ObjectId("5b6d0cb6222d4c70b803eaeb"),
"activated" : true,
"deleted" : false,
"url" : "url is here",
"name" : "title of it here",
"updated_at" : ISODate("2018-08-22T17:22:27.000Z"),
"content" : "content here",
"preview" : {
"photo" : [
{
"id" : ObjectId("5b7affea1fbad441494a663b"),
"sort" : 0
},
{
"id" : ObjectId("5b7b002d1fbad441494a663c"),
"sort" : 0
},
{
"id" : ObjectId("5b7b00351fbad441494a663d"),
"sort" : 0,
"primary" : true
},
{
"id" : ObjectId("5b7d9baa1fbad410de638bbb"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bae1fbad410e10f32f9"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bb11fbad441494a663e"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bb41fbad4ff97273402"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bb71fbad4ff99527e82"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bbb1fbad410de638bbc"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bbe1fbad410e10f32fa"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bc11fbad441494a663f"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bc41fbad4ff97273403"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bc71fbad4ff99527e83"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bca1fbad410de638bbd"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bcd1fbad441494a6640"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bd01fbad4ff97273404"),
"sort" : 0
}
]
}
}
3 sample foto bind data here:
{
"_id" : ObjectId("5b7affea1fbad441494a663b"),
"file-name" : "55575110311__0F115282-B5A0-4654-AA44-B7DC2C682992.jpeg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
]
}
{
"_id" : ObjectId("5b7b002d1fbad441494a663c"),
"file-name" : "55575110748__E7B07EFD-9F7E-40D6-8B57-38F708E4C0C0.jpeg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
],
"description" : "this is secoudn demo!",
"title" : "demo 3"
}
{
"_id" : ObjectId("5b7b00351fbad441494a663d"),
"file-name" : "paris2.jpg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
],
"description" : "this is a demo1 :)",
"title" : "demo"
}
You can filter out the element from the array where the primary field exists using $filter aggregation and then easily $group with the media._id field and get the $first document value.
Finally your query will be
db.getCollection("travel_sights").aggregate([
{ "$match": {
"preview.photo" : { "$exists":true },
"_id": { "$in" : [ ObjectId("5b7af9701fbad410e10f32f7") ] }
}},
{ "$addFields": {
"preview.photo": {
"$arrayElemAt": [
{ "$filter": {
"input": "$preview.photo",
"as": "photo",
"cond": { "$ne": [ "$$photo.primary", undefined ] }
}}, 0
]
}
}},
{ "$lookup":{
"from" : "media_data",
"localField" : "_id",
"foreignField": "bind",
"as": "media"
}},
{ "$unwind":"$media" },
{ "$project" : { "preview" : 1, "media": 1, }},
{ "$group": {
"_id": "$media._id",
"primary": { "$first": "$preview" }
}}
])

Slow Aggregation MongoDB

My Aggregation is pretty slow. I've already made it a little faster (from 3000 ms to 200ms) by using the match statement before the unwind statement. Is there any other way to improve my aggregation? In the end there'll be just one result (the last one based on timestamp). The unwind part is the longest operation if i'm right yet i really do need this.
db.CpuInfo.aggregate([
{"$match":
{
"timestamp": {"$gte":1464764400},
'hostname': 'baklap4'
}
},
{ "$unwind": "$cpuList" },
{ "$group":
{ "_id":
{ "interval":
{ "$subtract": [
"$timestamp",
{ "$mod": [ "$timestamp", 60 * 5 ] }
]}
},
"avgCPULoad": { "$avg": "$cpuList.load" },
"timestamp": { "$max": "$timestamp" }
}
},
{ "$project": { "_id": 0, "avgCPULoad": 1, "timestamp": 1 } },
{$sort: {'timestamp': -1}},
{$limit: 1}
])
The items in my collection are all simular to this:
{
"_id": ObjectId("574d6175da461e77030041b7"),
"hostname": "VPS",
"timestamp": NumberLong(1460040691),
"cpuCores": NumberLong(2),
"cpuList": [
{
"name": "cpu1",
"load": 3.4
},
{
"name": "cpu2",
"load": 0.7
}
]
}
I've added the explain option to my aggregation and this is the result:
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"timestamp" : {
"$gte" : 1464732000
},
"hostname" : "baklap4"
},
"fields" : {
"cpuList" : 1,
"timestamp" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "prototyping.CpuInfo",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"hostname" : {
"$eq" : "baklap4"
}
},
{
"timestamp" : {
"$gte" : 1464732000
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"hostname" : {
"$eq" : "baklap4"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"timestamp" : NumberLong(1)
},
"indexName" : "timestamp_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"timestamp" : [
"[1464732000.0, inf.0]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$unwind" : {
"path" : "$cpuList"
}
},
{
"$group" : {
"_id" : {
"interval" : {
"$subtract" : [
"$timestamp",
{
"$mod" : [
"$timestamp",
{
"$const" : 300
}
]
}
]
}
},
"avgCPULoad" : {
"$avg" : "$cpuList.load"
},
"timestamp" : {
"$max" : "$timestamp"
}
}
},
{
"$project" : {
"_id" : false,
"timestamp" : true,
"avgCPULoad" : true
}
},
{
"$sort" : {
"sortKey" : {
"timestamp" : -1
},
"limit" : NumberLong(1)
}
}
],
"ok" : 1
}
When i Look up in my table i see that Timestamp and Id are indexed:
db.CpuInfo.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "prototyping.CpuInfo"
},
{
"v" : 1,
"key" : {
"timestamp" : NumberLong(1)
},
"name" : "timestamp_1",
"ns" : "prototyping.CpuInfo",
"sparse" : false
}
]

mongodb aggregation $group and then $push a object

this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}

Use field value as key

I am doing this query
db.analytics.aggregate([
{
$match: {"event":"USER_SENTIMENT"}
},
{ $group: {
_id: {brand:"$data.brandId",sentiment:"$data.sentiment"},
count: {$sum : 1}
}
},
{ $group: {
_id: "$_id.brand",
sentiments: {$addToSet : {sentiment:"$_id.sentiment", count:"$count"}}
}
}
])
Which generates that :
{
"result" : [
{
"_id" : 57,
"sentiments" : [
{
"sentiment" : "Meh",
"count" : 4
}
]
},
{
"_id" : 376,
"sentiments" : [
{
"sentiment" : "Meh",
"count" : 1
},
{
"sentiment" : "Happy",
"count" : 1
},
{
"sentiment" : "Confused",
"count" : 1
}
]
}
],
"ok" : 1
}
But What I want is that :
[
{
"_id" : 57,
"Meh" : 4
},
{
"_id" : 376,
"Meh" : 1,
"Happy" : 1,
"Confused" : 1
}
]
Any idea on how to transform that? The blocking point for me is to transform a value into a key.