mongodb multiple aggregations in single operation - mongodb

I have an item collection with following documents.
{ "item" : "i1", "category" : "c1", "brand" : "b1" }
{ "item" : "i2", "category" : "c2", "brand" : "b1" }
{ "item" : "i3", "category" : "c1", "brand" : "b2" }
{ "item" : "i4", "category" : "c2", "brand" : "b1" }
{ "item" : "i5", "category" : "c1", "brand" : "b2" }
I want to separate aggregation results --> count by category, count by brand. Please note, it is not count by (category,brand)
I am able to do this using map-reduce using following code.
map = function(){
emit({type:"category",category:this.category},1);
emit({type:"brand",brand:this.brand},1);
}
reduce = function(key, values){
return Array.sum(values)
}
db.item.mapReduce(map,reduce,{out:{inline:1}})
And the result is
{
"results" : [
{
"_id" : {
"type" : "brand",
"brand" : "b1"
},
"value" : 3
},
{
"_id" : {
"type" : "brand",
"brand" : "b2"
},
"value" : 2
},
{
"_id" : {
"type" : "category",
"category" : "c1"
},
"value" : 3
},
{
"_id" : {
"type" : "category",
"category" : "c2"
},
"value" : 2
}
],
"timeMillis" : 21,
"counts" : {
"input" : 5,
"emit" : 10,
"reduce" : 4,
"output" : 4
},
"ok" : 1,
}
I can get same results by firing two different aggregation commands as below.
db.item.aggregate({$group:{_id:"$category",count:{$sum:1}}})
db.item.aggregate({$group:{_id:"$brand",count:{$sum:1}}})
Is there anyway I can do the same using aggregation framework by single aggregation command.
I have simplified my case here, but in actual I need this grouping from fields in array of subdocuments. Assume the above is structure after I do unwind.
It is a real-time query (someone waiting for response), though on smaller dataset, so execution time is important.
I am using MongoDB 2.4.

Starting in Mongo 3.4, the $facet aggregation stage greatly simplifies this type of use case by processing multiple aggregation pipelines within a single stage on the same set of input documents:
// { "item" : "i1", "category" : "c1", "brand" : "b1" }
// { "item" : "i2", "category" : "c2", "brand" : "b1" }
// { "item" : "i3", "category" : "c1", "brand" : "b2" }
// { "item" : "i4", "category" : "c2", "brand" : "b1" }
// { "item" : "i5", "category" : "c1", "brand" : "b2" }
db.collection.aggregate(
{ $facet: {
categories: [{ $group: { _id: "$category", count: { "$sum": 1 } } }],
brands: [{ $group: { _id: "$brand", count: { "$sum": 1 } } }]
}}
)
// {
// "categories" : [
// { "_id" : "c1", "count" : 3 },
// { "_id" : "c2", "count" : 2 }
// ],
// "brands" : [
// { "_id" : "b1", "count" : 3 },
// { "_id" : "b2", "count" : 2 }
// ]
// }

Over a large data set I would say that your current mapReduce approach would be the best one, because the aggregation technique for this would not work well with large data. But possibly over a reasonably small size it might just be what you need:
db.items.aggregate([
{ "$group": {
"_id": null,
"categories": { "$push": "$category" },
"brands": { "$push": "$brand" }
}},
{ "$project": {
"_id": {
"categories": "$categories",
"brands": "$brands"
},
"categories": 1
}},
{ "$unwind": "$categories" },
{ "$group": {
"_id": {
"brands": "$_id.brands",
"category": "$categories"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.brands",
"categories": { "$push": {
"category": "$_id.category",
"count": "$count"
}},
}},
{ "$project": {
"_id": "$categories",
"brands": "$_id"
}},
{ "$unwind": "$brands" },
{ "$group": {
"_id": {
"categories": "$_id",
"brand": "$brands"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"categories": { "$first": "$_id.categories" },
"brands": { "$push": {
"brand": "$_id.brand",
"count": "$count"
}}
}}
])
Not really the same as the mapReduce output, you could throw in some more stages to change the output format, but this should be usable:
{
"_id" : null,
"categories" : [
{
"category" : "c2",
"count" : 2
},
{
"category" : "c1",
"count" : 3
}
],
"brands" : [
{
"brand" : "b2",
"count" : 2
},
{
"brand" : "b1",
"count" : 3
}
]
}
As you can see, this involves a fair bit of shuffling between arrays in order to group each set of either "category" or "brand" within the same pipeline process. Again I will say, this will not do well for large data, but for something like "items in an order" it would probably do nicely.
Of course as you say, you have simplified somewhat, so the first grouping key on null is either going to be something else or either narrowed down to do that null case by an earlier $match stage, which is probably what you want to do.

Related

match element in the array with aggregation

i have mongo db collection the follwing structure
{
{
"_id" : ObjectId("63e37afe7a3453d5014c011b"),
"schemaVersion" : NumberInt(1),
"Id" : "ObjectId("63e37afe7a3453d5014c0112")",
"Id1" : "ObjectId("63e37afe7a3453d5014c0113")",
"Id2" : "ObjectId("63e37afe7a3453d5014c0114")",
"collectionName" : "Country",
"List" : [
{
"countryId" : NumberInt(1),
"name" : "Afghanistan",
},{
"countryId" : NumberInt(1),
"name" : "India",
},
{
"countryId" : NumberInt(1),
"name" : "USA",
}
}
i need to match the value with id, id1, id2, collectionName and name in the list to get country id for example if match the below value
"Id" : "ObjectId("63e37afe7a3453d5014c0112")",
"Id1" : "ObjectId("63e37afe7a3453d5014c0113")",
"Id2" : "ObjectId("63e37afe7a3453d5014c0114")",
"collectionName" : "Country",
"name" : "Afghanistan",
i need result
{
"countryId" : 1,
"name" : "Afghanistan",
}
i tried like below
db.country_admin.aggregate([
{ $match: { collectionName: "Country" } },
{ $unwind : '$countryList' },
{ $project : { _id : 0, 'countryList.name' : 1, 'countryList.countryId' : 1 } }
]).pretty()
and i have following output
[
{
"List" : {
"countryId" : 1.0,
"name" : "Afghanistan"
}
},
{
"List" : {
"countryId" : 2.0,
"name" : "india"
}
},
{
"List" : {
"countryId" : 3.0,
"name" : "USA"
}
}]```
You can try using $filter to avoid $unwind like this example:
First $match by your desired condition(s).
Then $filter and get the first element (as "List.name": "Afghanistan" is used into $match stage there will be at least one result).
And output only values you want using $project.
db.collection.aggregate([
{
"$match": {
"Id": ObjectId("63e37afe7a3453d5014c0112"),
"Id1": ObjectId("63e37afe7a3453d5014c0113"),
"Id2": ObjectId("63e37afe7a3453d5014c0114"),
"collectionName": "Country",
"List.name": "Afghanistan",
}
},
{
"$project": {
"country": {
"$arrayElemAt": [
{
"$filter": {
"input": "$List",
"cond": {
"$eq": [
"$$this.name",
"Afghanistan"
]
}
}
},
0
]
}
}
},
{
"$project": {
"_id": 0,
"countryId": "$country.countryId",
"name": "$country.name"
}
}
])
Example here
By the way, using $unwind is also possible and you can check this example

Mongodb how to reduce the array within the matching key and calculate avg

{
"_id" : {
"state" : "NY",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 18.75,
"name" : "PU"
},
{
"id" : "21",
"score" : 25.0,
"name" : "PU"
},
{
"id" : "23",
"score" : 25.0,
"name" : "CL"
},
{
"id" : "23",
"score" : 56.25,
"name" : "CL"
}
]
}
Desired result:
Match the key with id within the array and calculate avg of score.
{
"_id" : {
"state" : "New York",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 21.875,
"name" : "PU"
},
{
"id" : "23",
"score" : 40.625,
"name" : "CL"
}
]
}
Thank you in advance.
Query
(returns the expected result)
unwind List
group with including the id, and find avg
fix the structure to be similar with the document you want
group back to restore the document structure (reverse the unwind)
if 2 sames ids have different name(if possible to happen)
query will make them seperated members in the array.
(alternativly it could make them same member and pack the names in an array, but that would produce different schema from the one you expect to see)
Test code here
db.collection.aggregate([
{
"$unwind": {
"path": "$List"
}
},
{
"$group": {
"_id": {
"state": "$_id.state",
"st": "$_id.st",
"id": "$List.id",
"name": "$List.name"
},
"avg": {
"$avg": "$List.score"
}
}
},
{
"$project": {
"_id": {
"state": "$_id.state",
"st": "$_id.st"
},
"List": {
"name": "$_id.name",
"id": "$_id.id",
"avg": "$avg"
}
}
},
{
"$group": {
"_id": "$_id",
"List": {
"$push": "$List"
}
}
}
])

Mongodb aggregate with cond and query value

I'm new to mongodb. I need to know how it is possible to query item for set to the value with aggregate
Data
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA"
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB"
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC"
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD"
}
]
mongoshell
Assume $check is false
db.getCollection('test').aggregate(
[
{
"$group": {
"_id": "$id",
//...,
"item": {
"$last": {
"$cond": [
{"$eq": ["$check", true]},
"YES",
* * ANSWER **,
}
]
}
},
}
]
)
So i need the result for item is all the name contain with same parent_id as string of array
Expect result
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC",
"item" : ["CCCC"]
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD",
"item" : ["AAAA","BBBB","DDDD"]
}
]
Try this..
Sample live demo
db.collection.aggregate([
{
"$group": {
"_id": "$parent_id",
"item": {
"$push": "$name"
},
"data": {
"$push": {
"_id": "$_id",
"name": "$name"
}
}
}
},
{
"$unwind": "$data"
},
{
"$project": {
"_id": "$data._id",
"parent_id": "$_id",
"name": "$data.name",
"item": 1
}
}
])

mongodb aggregate sum item as nested data

Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}
You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])

How to query a mongo collection to return the full document with virtual fields containing calculated values from the sub-document?

I'm trying to query a collection for a specific document that contains a sub-document. The sub-document contains values for which I'd like to obtain
the highest and lowest scores from that sub-document and return that result as virtual fields to the original document.
I have the following dataset:
{
"_id" : "d0e78492342f9f-f843ec7-4bd14g3h-bh34j3a9-02d6ah32k8e6b79e",
"name" : "Addison Hunt",
"tests" : [
{
"name" : "lorem",
"score" : 79
},
{
"name" : "vallum",
"score" : 100
},
{
"name" : "ipsum",
"score" : 65
}
],
"created_at" : 1401488865684,
"class" : "dolor sit amit",
"user_id" : "005G5635231325O4VIAU"
}
In mongo 2.4, how can I query mongo once to return the following result:
{
"_id" : "d0e78492342f9f-f843ec7-4bd14g3h-bh34j3a9-02d6ah32k8e6b79e",
"name" : "Addison Hunt",
"tests" : [
{
"name" : "lorem",
"score" : 79
},
{
"name" : "vallum",
"score" : 100
},
{
"name" : "ipsum",
"score" : 65
}
],
"created_at" : 1401488865684,
"class" : "dolor sit amit",
"user_id" : "005G5635231325O4VIAU",
"worst_test": {
"name" : "ipsum",
"score" : 65
},
"best_test": {
"name" : "vallum",
"score" : 100
}
}
Where "best_test" and "worst_test" are virtual fields representing the tests with the highest and lowest scores, respectively.
I've tried with many different ways and the closest I've gotten is with this query:
db.students.aggregate([
{ $match: {
'_id': 'd0e78492342f9f-f843ec7-4bd14g3h-bh34j3a9-02d6ah32k8e6b79e'
}},
{ $unwind: '$tests' },
{ $sort: {'tests.score': 1} },
{ $group: {
_id: '$_id',
student_tests: {$push: "$$ROOT"},
worst_test: {$first: '$tests'},
best_test: { $last: '$tests' }
}}
]);
Which yields this result:
{
"_id" : "d0e78492342f9f-f843ec7-4bd14g3h-bh34j3a9-02d6ah32k8e6b79e",
"student_tests" : [
{
"name" : "Addison Hunt",
"tests" : [
{
"name" : "ipsum",
"score" : 65
}
],
"created_at" : 1401488865684,
"class" : "dolor sit amit",
"user_id" : "005G5635231325O4VIAU",
},
{
"name" : "Addison Hunt",
"tests" : [
{
"name" : "lorem",
"score" : 79
}
],
"created_at" : 1401488865684,
"class" : "dolor sit amit",
"user_id" : "005G5635231325O4VIAU",
},
{
"name" : "Addison Hunt",
"tests" : [
{
"name" : "vallum",
"score" : 100
}
],
"created_at" : 1401488865684,
"class" : "dolor sit amit",
"user_id" : "005G5635231325O4VIAU",
},
],
"worst_test": {
"name" : "ipsum",
"score" : 65
},
"best_test": {
"name" : "vallum",
"score" : 100
}
}
If you are using $$ROOT then in fact you are using MongoDB 2.6 as this is an aggregation variable only introduced in that version.
But while handy for various things, all it does is represent the entire document at the present stage of the pipeline where used. To do what you want and return the original document unmodified but with additional fields, you could use it in $project stage before the $unwind to assign to the _id field, but really you don't have exactly the same document as you would still need to $project at the end in order to get the correct document shape out of those elements.
You best bet is just projecting the fields, but keeping an un-altered copy of the array before any $sort is applied:
db.students.aggregate([
{ "$match": {
"_id": "d0e78492342f9f-f843ec7-4bd14g3h-bh34j3a9-02d6ah32k8e6b79e"
}},
{ "$project": {
"name": 1,
"tests": 1,
"created_at": 1,
"class": 1,
"user_id": 1,
"testCopy": "$tests"
}},
{ "$unwind": "$testCopy" },
{ "$sort": { "testCopy.score": 1 } },
{ "$group": {
"_id: "$_id",
"tests": { "$first": "$tests" },
"created_at": { "$first": "$created_at" },
"class": { "$first": "$class" },
"user_id": { "$first": "$user_id" },
"worst_test": { "$first": "$testCopy" },
"best_test": { "$last": "$testCopy" }
}}
]);
Or using $$ROOT as mentioned before, alternately just placing the fields under the _id individually in the $project:
db.students.aggregate([
{ "$match": {
"_id": "d0e78492342f9f-f843ec7-4bd14g3h-bh34j3a9-02d6ah32k8e6b79e"
}},
{ "$project": {
"_id": "$$ROOT",
"tests": 1
}},
{ "$unwind": "$tests" },
{ "$sort": { "tests.score": 1 } },
{ "$group": {
"_id": "$_id",
"aworst_test": { "$first": "$tests" },
"abest_test": { "$last": "$tests" }
}},
{ "$project": {
"_id": "$_id._id",
"tests": "$_id.tests",
"created_at": "$_id.created_at",
"class": "$_id.class",
"user_id": "$_id.user_id",
"worst_test": "$aworst_test",
"best_test": "$abest_test"
}}
]);
But as you see, you are still doing the $project work somewhere in order to get the structure you want, as well as the "renamed fields" to maintain the field order you want as the $project will otherwise "optimize" and "keep" any fields that have not been renamed and "append" new fields after the existing ones.
There really is no simple way to "get all fields" in the same way as you originally found them. Operations like $project and $group are an "all or nothing" affair, where they only explicitly produce what you tell them to.