MongoDB query to get count of array values for specific groups - mongodb

I am trying to achieve a group which tells me the count of occurrence of color and the entity count corresponding to each color.
I have written a query, But this is giving me twice the count because of unwind i guess.
My Query :
db.message.aggregate([
{$unwind: '$entities'},
{ "$group": {
"_id": {
"color": "$color",
"entities" :"$entities"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.color",
"count": { "$sum": "$count" },
"entities": {
"$push": {
"entity": "$_id.entities",
"count": "$count"
},
}
}}
])
Database Message Data :
{
"_id" : ObjectId("5c55f99abf6dc481ccfa7f67"),
"color" : "Red",
"channel" : "google",
"content" : "red color",
"entities" : [
"a",
"b"
]
},
{
"_id" : ObjectId("5c57f0a0bf6dc44424e8d371"),
"color" : "Red",
"channel" : "google",
"content" : "red color",
"entities" : [
"a",
"b"
]
},{
"_id" : ObjectId("5c5947a4bf6dc462603d87da"),
"color" : "Blue",
"channel" : "google",
"content" : "yo yo",
"entities" : [
"a",
"b"
]
},
{
"_id" : ObjectId("5c6151b9bf6dc4bee8dac8c9"),
"color" : "Blue",
"handle" : "IBM",
"channel" : "twitter",
"content" : "yo yo",
"entities" : [
"a",
"b",
"c"
]
}
(EDITED) I am Expecting the following Output :
{
"color" : "Red",
"count" : 2,
"entities" : [
{
"entity" : "a",
"count" : 2
},
{
"entity" : "b",
"count" : 2
}
]
},
{
"color" : "Blue",
"count" : 2,
"entities" : [
{
"entity" : "c",
"count" : 1
},
{
"entity" : "b",
"count" : 2
},
{
"entity" : "a",
"count" : 2
}
]
}
Please Help!.

Related

Mongodb how to reduce the array within the matching key and calculate avg

{
"_id" : {
"state" : "NY",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 18.75,
"name" : "PU"
},
{
"id" : "21",
"score" : 25.0,
"name" : "PU"
},
{
"id" : "23",
"score" : 25.0,
"name" : "CL"
},
{
"id" : "23",
"score" : 56.25,
"name" : "CL"
}
]
}
Desired result:
Match the key with id within the array and calculate avg of score.
{
"_id" : {
"state" : "New York",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 21.875,
"name" : "PU"
},
{
"id" : "23",
"score" : 40.625,
"name" : "CL"
}
]
}
Thank you in advance.
Query
(returns the expected result)
unwind List
group with including the id, and find avg
fix the structure to be similar with the document you want
group back to restore the document structure (reverse the unwind)
if 2 sames ids have different name(if possible to happen)
query will make them seperated members in the array.
(alternativly it could make them same member and pack the names in an array, but that would produce different schema from the one you expect to see)
Test code here
db.collection.aggregate([
{
"$unwind": {
"path": "$List"
}
},
{
"$group": {
"_id": {
"state": "$_id.state",
"st": "$_id.st",
"id": "$List.id",
"name": "$List.name"
},
"avg": {
"$avg": "$List.score"
}
}
},
{
"$project": {
"_id": {
"state": "$_id.state",
"st": "$_id.st"
},
"List": {
"name": "$_id.name",
"id": "$_id.id",
"avg": "$avg"
}
}
},
{
"$group": {
"_id": "$_id",
"List": {
"$push": "$List"
}
}
}
])

Mongodb aggregate with cond and query value

I'm new to mongodb. I need to know how it is possible to query item for set to the value with aggregate
Data
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA"
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB"
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC"
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD"
}
]
mongoshell
Assume $check is false
db.getCollection('test').aggregate(
[
{
"$group": {
"_id": "$id",
//...,
"item": {
"$last": {
"$cond": [
{"$eq": ["$check", true]},
"YES",
* * ANSWER **,
}
]
}
},
}
]
)
So i need the result for item is all the name contain with same parent_id as string of array
Expect result
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC",
"item" : ["CCCC"]
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD",
"item" : ["AAAA","BBBB","DDDD"]
}
]
Try this..
Sample live demo
db.collection.aggregate([
{
"$group": {
"_id": "$parent_id",
"item": {
"$push": "$name"
},
"data": {
"$push": {
"_id": "$_id",
"name": "$name"
}
}
}
},
{
"$unwind": "$data"
},
{
"$project": {
"_id": "$data._id",
"parent_id": "$_id",
"name": "$data.name",
"item": 1
}
}
])

Group multi-dimensional array after unwinding elements

Again with mongoDB. I really like aggregation, but still can't "get it".
So here is my array:
{
"_id" : ObjectId("55951b2bf41edfc80b00002a"),
"orders" : [
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"id_basket" : 1,
"card" : [
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
},
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
}
],
"full_amount" : "40",
},
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"id_basket" : 1,
"card" : [
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
},
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
}
],
"full_amount" : "40",
},
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
"id_user" : 97,
}
I want to output something like this:
{
"_id" : ObjectId("55951b2bf41edfc80b00002a"),
"orders" : [
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"card" : [
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
},
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"card" : [
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
},
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
}
I've tried many combinations with unwinding, projecting and grouping and failed to get what I want. Can someone help me with this?
You probably shouldn't be using the aggregation framework for tasks like this that do not actually "aggregate" anything between documents. This really is a "projection" task since all you are asking is to "alter" the structure of a document, and that is a task probably better suited to coding in the client after the document is retrieved.
A very good reason for this is that operations like $unwind are very costly in terms of performance. What $unwind does is produce a "copy" of the document content for each array member present, which results in a lot more documents to process.
Think of that like a "SQL Join" with a "one to many" relationship, the only difference being the data is self contained in one document. Processing $unwind simulates the "join" results in that the "master" (one) document contents are reproduced for every "child" (many) document.
In order to counter such operations being done by people, MongoDB 2.6 introduced the $map operator, which processes array elements within the document itself.
So instead of doing multiple ( or any ) $unwind actions, you can instead just process the arrays within the document itself using $map in a $project stage:
db.collection.aggregate([
{ "$project": {
"orders": { "$map": {
"input": "$orders",
"as": "o",
"in": {
"id": "$$o.id",
"name": "$$o.name",
"card": { "$map": {
"input": "$$o.card",
"as": "c",
"in": {
"id": "$$c.id",
"serial": "$$c.serial",
"name": "$$c.name",
"ticket": { "$map": {
"input": "$$c.ticket",
"as": "t",
"in": {
"id": "$$t.id",
"name": "$$t.name",
"price": "$$t.price.price_disp"
}
}}
}
}},
"full_amount": "$$o.full_amount"
}
}},
"rate": 1,
"date": 1
}}
])
The operations are fairly simple there as each "array" is assigned it's own variable name, and for a simple projection operation such as this all that is really left is selecting which fields you want.
In earlier versions, processing using $unwind is much more difficult:
db.collection.aggregate([
{ "$unwind": "$orders" },
{ "$unwind": "$orders.card" },
{ "$unwind": "$orders.card.ticket" },
{ "$group": {
"_id": {
"_id": "$_id",
"orders": {
"id": "$orders.id",
"name": "$orders.name",
"card": {
"id": "$orders.card.id",
"serial": "$orders.card.serial",
"name": "$orders.card.name"
},
"full_amount": "$orders.full_amount"
},
"rate": "$rate",
"date": "$date"
},
"ticket": {
"$push": {
"id": "$orders.card.ticket.id",
"name": "$orders.card.ticket.name",
"price": "$orders.card.ticket.price.price_disp"
}
}
}},
{ "$group": {
"_id": {
"_id": "$_id._id",
"orders": {
"id": "$_id.orders.id",
"name": "$_id.orders.name",
"full_amount": "$_id.orders.full_amount"
},
"rate": "$_id.rate",
"date": "$_id.date"
},
"card": {
"$push": {
"id": "$_id.orders.card.id",
"serial": "$_id.orders.card.serial",
"name": "$_id.orders.card.name",
"ticket": "$ticket"
}
}
}},
{ "$group": {
"_id": "$_id._id",
"orders": {
"$push": {
"id": "$_id.orders.id",
"name": "$_id.orders.name",
"card": "$card",
"full_amount": "$_id.orders.full_amount"
}
},
"rate": { "$first": "$_id.rate" },
"date": { "$first": "$_id.date" }
}}
])
So following through that carefully, you should see that since you $unwind three times it is necessary to $group "three times" as well, while carefully grouping all the distinct values at each "level" and re-constructing the arrays via $push.
This really is not advised at all as was mentioned earlier:
You "are not grouping/aggregating anything" and each sub-document "must" contain a "unique" itentifier because of the "grouping" operations required to re-construct arrays. ( See: NOTE )
The $unwind operation here is very costly. All of the document information is re-produced by a factor of "n" array X "n" array elements and so on. So there is much more data in the aggregation pipeline than your collection or query selection actually contains in itself.
Therefore in conclusion, for the general processing of "reformatting your data" you should instead be processing each document in your code rather than be "throwing it" at the aggregation pipeline to do.
If your document data requires "sufficient" manipulation that makes a "substantial difference" to the returned result size that you deem to be more efficient than pulling the whole document and manipulating in the client, then and "only" then should you be using the $project form as shown with the $map operations.
Sidebar
Your original "tag" here mentions "PHP".
All MongoDB queries including the aggregation have nothing language specific about them and are just "data structures" and are represented as such mostly in the "native form" for those languages (PHP,JavaScript,python,etc), and with "builder methods" for those languages without "native" expressive formats for free structures ( C,C#,Java ).
In all cases, there are simple parsers available for JSON, which is a common "linqua franca" here as the MongoB Shell itself is JavaScript based and understands JSON structre ( as actual JavaScript Objects ) natively.
So when working with such examples use tools like:
json_decode: to get more of an insight into how your native data structure is constructed.
json_encode: in order to check your native data structure against any JSON represented sample.
All content here is just simple "key/value" array() notation, though nested. But it is probably good practice to be aware of the tools and use them regularly.
NOTE:
The data sample you give looks very much like you have "cut and paste" data in order to create multiple items, as various "sub-items" all share the same "id" values.
Your "real" data should not do this! So I hope it does not, but if so then fix it.
In order to make the second example workable ( first is perfectly fine as is ) the data needs to be altered to included "unique" "id" values for each sub-element.
As I used here:
{
"_id" : ObjectId("55951b2bf41edfc80b00002a"),
"orders" : [
{
"id" : "55929142f41edfdc0f00002a",
"name" : "XYZ",
"card" : [
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000031",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000032",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "251",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000033",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000034",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
},
{
"id" : "55929142f41edfdc0f00002b",
"name" : "XYZ",
"card" : [
{
"id" : "252",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000035",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000036",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "253",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000037",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000038",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
}
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
}

Aggregate group multiple fields

Given the following dataset:
{ "_id" : 1, "city" : "Yuma", "cat": "roads", "Q1" : 0, "Q2" : 25, "Q3" : 0, "Q4" : 0 }
{ "_id" : 2, "city" : "Reno", "cat": "roads", "Q1" : 30, "Q2" : 0, "Q3" : 0, "Q4" : 60 }
{ "_id" : 3, "city" : "Yuma", "cat": "parks", "Q1" : 0, "Q2" : 0, "Q3" : 45, "Q4" : 0 }
{ "_id" : 4, "city" : "Reno", "cat": "parks", "Q1" : 35, "Q2" : 0, "Q3" : 0, "Q4" : 0 }
{ "_id" : 5, "city" : "Yuma", "cat": "roads", "Q1" : 0, "Q2" : 15, "Q3" : 0, "Q4" : 20 }
I'm trying to achieve the following result. It would be great to just return the totals greater than zero, and also compress each city, cat and Qx total to a single record.
{
"city" : "Yuma",
"cat" : "roads",
"Q2total" : 40
},
{
"city" : "Reno",
"cat" : "roads",
"Q1total" : 30
},
{
"city" : "Reno",
"cat" : "roads",
"Q4total" : 60
},
{
"city" : "Yuma",
"cat" : "parks",
"Q3total" : 45
},
{
"city" : "Reno",
"cat" : "parks",
"Q1total" : 35
},
{
"city" : "Yuma",
"cat" : "roads",
"Q4total" : 20
}
Possible?
We could ask, to what end? Your documents already have a nice consistent Object structure which is recommended. Having objects with varying keys is not a great idea. Data is "data" and should not really be the name of the keys.
With that in mind, the aggregation framework actually follows this sense and does not allow for the generation of arbitrary key names from data contained in the document. But you could get a similar result with the output as data points:
db.junk.aggregate([
// Aggregate first to reduce the pipeline documents somewhat
{ "$group": {
"_id": {
"city": "$city",
"cat": "$cat"
},
"Q1": { "$sum": "$Q1" },
"Q2": { "$sum": "$Q2" },
"Q3": { "$sum": "$Q3" },
"Q4": { "$sum": "$Q4" }
}},
// Convert the "quarter" elements to array entries with the same keys
{ "$project": {
"totals": {
"$map": {
"input": { "$literal": [ "Q1", "Q2", "Q3", "Q4" ] },
"as": "el",
"in": { "$cond": [
{ "$eq": [ "$$el", "Q1" ] },
{ "quarter": "$$el", "total": "$Q1" },
{ "$cond": [
{ "$eq": [ "$$el", "Q2" ] },
{ "quarter": "$$el", "total": "$Q2" },
{ "$cond": [
{ "$eq": [ "$$el", "Q3" ] },
{ "quarter": "$$el", "total": "$Q3" },
{ "quarter": "$$el", "total": "$Q4" }
]}
]}
]}
}
}
}},
// Unwind the array produced
{ "$unwind": "$totals" },
// Filter any "0" resutls
{ "$match": { "totals.total": { "$ne": 0 } } },
// Maybe project a prettier "flatter" output
{ "$project": {
"_id": 0,
"city": "$_id.city",
"cat": "$_id.cat",
"quarter": "$totals.quarter",
"total": "$totals.total"
}}
])
Which gives you results like this:
{ "city" : "Reno", "cat" : "parks", "quarter" : "Q1", "total" : 35 }
{ "city" : "Yuma", "cat" : "parks", "quarter" : "Q3", "total" : 45 }
{ "city" : "Reno", "cat" : "roads", "quarter" : "Q1", "total" : 30 }
{ "city" : "Reno", "cat" : "roads", "quarter" : "Q4", "total" : 60 }
{ "city" : "Yuma", "cat" : "roads", "quarter" : "Q2", "total" : 40 }
{ "city" : "Yuma", "cat" : "roads", "quarter" : "Q4", "total" : 20 }
You could alternately use mapReduce which allows "some" flexibility with key names. The catch is though that your aggregation is still by "quarter", so you need that as part of the primary key, which cannot be changed once emitted.
Additionally, you cannot "filter" any aggregated results of "0" without a second pass after outputting to a collection, so it's not really of much use for what you want to do, unless you can live with a second mapReduce operation of "transform" query on the output collection.
Worth note is if you look at what is being done in the "second" pipeline stage here with $project and $map you will see that the document structure is essentially being altered to sometime like what you could alternately structure your documents like originally, like this:
{
"city" : "Reno",
"cat" : "parks"
"totals" : [
{ "quarter" : "Q1", "total" : 35 },
{ "quarter" : "Q2", "total" : 0 },
{ "quarter" : "Q3", "total" : 0 },
{ "quarter" : "Q4", "total" : 0 }
]
},
{
"city" : "Yuma",
"cat" : "parks"
"totals" : [
{ "quarter" : "Q1", "total" : 0 },
{ "quarter" : "Q2", "total" : 0 },
{ "quarter" : "Q3", "total" : 45 },
{ "quarter" : "Q4", "total" : 0 }
]
}
Then the aggregation operation becomes simple for your documents to the same results as shown above:
db.collection.aggregate([
{ "$unwind": "$totals" },
{ "$group": {
"_id": {
"city": "$city",
"cat": "$cat",
"quarter": "$totals.quarter"
},
"ttotal": { "$sum": "$totals.total" }
}},
{ "$match": { "ttotal": { "$ne": 0 } },
{ "$project": {
"_id": 0,
"city": "$_id.city",
"cat": "$_id.cat",
"quarter": "$_id.quarter",
"total": "$ttotal"
}}
])
So it might make more sense to consider structuring your documents in that way to begin with and avoid any overhead required by the document transformation.
I think you'll find that consistent key names makes a far better object model to program to, where you should be reading the data point from the key-value and not the key-name. If you really need to, then it's a simple matter of reading the data from the object and transforming the keys of each already aggregated result in post processing.

mongodb multiple aggregations in single operation

I have an item collection with following documents.
{ "item" : "i1", "category" : "c1", "brand" : "b1" }
{ "item" : "i2", "category" : "c2", "brand" : "b1" }
{ "item" : "i3", "category" : "c1", "brand" : "b2" }
{ "item" : "i4", "category" : "c2", "brand" : "b1" }
{ "item" : "i5", "category" : "c1", "brand" : "b2" }
I want to separate aggregation results --> count by category, count by brand. Please note, it is not count by (category,brand)
I am able to do this using map-reduce using following code.
map = function(){
emit({type:"category",category:this.category},1);
emit({type:"brand",brand:this.brand},1);
}
reduce = function(key, values){
return Array.sum(values)
}
db.item.mapReduce(map,reduce,{out:{inline:1}})
And the result is
{
"results" : [
{
"_id" : {
"type" : "brand",
"brand" : "b1"
},
"value" : 3
},
{
"_id" : {
"type" : "brand",
"brand" : "b2"
},
"value" : 2
},
{
"_id" : {
"type" : "category",
"category" : "c1"
},
"value" : 3
},
{
"_id" : {
"type" : "category",
"category" : "c2"
},
"value" : 2
}
],
"timeMillis" : 21,
"counts" : {
"input" : 5,
"emit" : 10,
"reduce" : 4,
"output" : 4
},
"ok" : 1,
}
I can get same results by firing two different aggregation commands as below.
db.item.aggregate({$group:{_id:"$category",count:{$sum:1}}})
db.item.aggregate({$group:{_id:"$brand",count:{$sum:1}}})
Is there anyway I can do the same using aggregation framework by single aggregation command.
I have simplified my case here, but in actual I need this grouping from fields in array of subdocuments. Assume the above is structure after I do unwind.
It is a real-time query (someone waiting for response), though on smaller dataset, so execution time is important.
I am using MongoDB 2.4.
Starting in Mongo 3.4, the $facet aggregation stage greatly simplifies this type of use case by processing multiple aggregation pipelines within a single stage on the same set of input documents:
// { "item" : "i1", "category" : "c1", "brand" : "b1" }
// { "item" : "i2", "category" : "c2", "brand" : "b1" }
// { "item" : "i3", "category" : "c1", "brand" : "b2" }
// { "item" : "i4", "category" : "c2", "brand" : "b1" }
// { "item" : "i5", "category" : "c1", "brand" : "b2" }
db.collection.aggregate(
{ $facet: {
categories: [{ $group: { _id: "$category", count: { "$sum": 1 } } }],
brands: [{ $group: { _id: "$brand", count: { "$sum": 1 } } }]
}}
)
// {
// "categories" : [
// { "_id" : "c1", "count" : 3 },
// { "_id" : "c2", "count" : 2 }
// ],
// "brands" : [
// { "_id" : "b1", "count" : 3 },
// { "_id" : "b2", "count" : 2 }
// ]
// }
Over a large data set I would say that your current mapReduce approach would be the best one, because the aggregation technique for this would not work well with large data. But possibly over a reasonably small size it might just be what you need:
db.items.aggregate([
{ "$group": {
"_id": null,
"categories": { "$push": "$category" },
"brands": { "$push": "$brand" }
}},
{ "$project": {
"_id": {
"categories": "$categories",
"brands": "$brands"
},
"categories": 1
}},
{ "$unwind": "$categories" },
{ "$group": {
"_id": {
"brands": "$_id.brands",
"category": "$categories"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.brands",
"categories": { "$push": {
"category": "$_id.category",
"count": "$count"
}},
}},
{ "$project": {
"_id": "$categories",
"brands": "$_id"
}},
{ "$unwind": "$brands" },
{ "$group": {
"_id": {
"categories": "$_id",
"brand": "$brands"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"categories": { "$first": "$_id.categories" },
"brands": { "$push": {
"brand": "$_id.brand",
"count": "$count"
}}
}}
])
Not really the same as the mapReduce output, you could throw in some more stages to change the output format, but this should be usable:
{
"_id" : null,
"categories" : [
{
"category" : "c2",
"count" : 2
},
{
"category" : "c1",
"count" : 3
}
],
"brands" : [
{
"brand" : "b2",
"count" : 2
},
{
"brand" : "b1",
"count" : 3
}
]
}
As you can see, this involves a fair bit of shuffling between arrays in order to group each set of either "category" or "brand" within the same pipeline process. Again I will say, this will not do well for large data, but for something like "items in an order" it would probably do nicely.
Of course as you say, you have simplified somewhat, so the first grouping key on null is either going to be something else or either narrowed down to do that null case by an earlier $match stage, which is probably what you want to do.