Mongodb aggregate with cond and query value - mongodb

I'm new to mongodb. I need to know how it is possible to query item for set to the value with aggregate
Data
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA"
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB"
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC"
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD"
}
]
mongoshell
Assume $check is false
db.getCollection('test').aggregate(
[
{
"$group": {
"_id": "$id",
//...,
"item": {
"$last": {
"$cond": [
{"$eq": ["$check", true]},
"YES",
* * ANSWER **,
}
]
}
},
}
]
)
So i need the result for item is all the name contain with same parent_id as string of array
Expect result
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC",
"item" : ["CCCC"]
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD",
"item" : ["AAAA","BBBB","DDDD"]
}
]

Try this..
Sample live demo
db.collection.aggregate([
{
"$group": {
"_id": "$parent_id",
"item": {
"$push": "$name"
},
"data": {
"$push": {
"_id": "$_id",
"name": "$name"
}
}
}
},
{
"$unwind": "$data"
},
{
"$project": {
"_id": "$data._id",
"parent_id": "$_id",
"name": "$data.name",
"item": 1
}
}
])

Related

Mongodb how to reduce the array within the matching key and calculate avg

{
"_id" : {
"state" : "NY",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 18.75,
"name" : "PU"
},
{
"id" : "21",
"score" : 25.0,
"name" : "PU"
},
{
"id" : "23",
"score" : 25.0,
"name" : "CL"
},
{
"id" : "23",
"score" : 56.25,
"name" : "CL"
}
]
}
Desired result:
Match the key with id within the array and calculate avg of score.
{
"_id" : {
"state" : "New York",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 21.875,
"name" : "PU"
},
{
"id" : "23",
"score" : 40.625,
"name" : "CL"
}
]
}
Thank you in advance.
Query
(returns the expected result)
unwind List
group with including the id, and find avg
fix the structure to be similar with the document you want
group back to restore the document structure (reverse the unwind)
if 2 sames ids have different name(if possible to happen)
query will make them seperated members in the array.
(alternativly it could make them same member and pack the names in an array, but that would produce different schema from the one you expect to see)
Test code here
db.collection.aggregate([
{
"$unwind": {
"path": "$List"
}
},
{
"$group": {
"_id": {
"state": "$_id.state",
"st": "$_id.st",
"id": "$List.id",
"name": "$List.name"
},
"avg": {
"$avg": "$List.score"
}
}
},
{
"$project": {
"_id": {
"state": "$_id.state",
"st": "$_id.st"
},
"List": {
"name": "$_id.name",
"id": "$_id.id",
"avg": "$avg"
}
}
},
{
"$group": {
"_id": "$_id",
"List": {
"$push": "$List"
}
}
}
])

Merge mongodb aggregation results of unread messages count per conversation member into document

I want to aggregate unread messages count per member in a conversation group and seems my aggregation pipeline is working correctly, but, I don't know how to achieve the following results.
Please check the example.
Thread documents
/* 1 */
{
"_id" : ObjectId("60d4efa6a95f446051f31492"),
"latestMessage" : "Voluptatem eos officiis optio dolor est et.",
"type" : "FEED",
"users" : [
{
"_id" : ObjectId("60d4efa6a95f446051f31491"),
"displayName" : "Monique Connelly II"
},
{
"_id" : ObjectId("60d4efa6a95f446051f31490"),
"displayName" : "Ivory Jacobson DDS"
},
{
"_id" : ObjectId("60d4efa6a95f446051f3148f"),
"displayName" : "Ron Weimann"
}
],
"createdBy" : "60d4efa6a95f446051f3148f",
"createdAt" : ISODate("2021-06-24T20:48:38.537Z"),
"modifiedAt" : ISODate("2021-06-24T20:48:38.620Z"),
}
/* 2 */
{
"_id" : ObjectId("60d4efa6a95f446051f31493"),
"type" : "CONVERSATION",
"users" : [
{
"_id" : ObjectId("60d4efa6a95f446051f31491"),
"displayName" : "Monique Connelly II"
},
{
"_id" : ObjectId("60d4efa6a95f446051f31490"),
"displayName" : "Ivory Jacobson DDS"
},
{
"_id" : ObjectId("60d4efa6a95f446051f3148f"),
"displayName" : "Ron Weimann"
}
],
"createdBy" : "60d4efa6a95f446051f3148f",
"createdAt" : ISODate("2021-06-24T20:48:38.569Z"),
"modifiedAt" : ISODate("2021-06-24T20:48:38.569Z"),
}
Message documents
/* 1 */
{
"_id" : ObjectId("60d4efa6a95f446051f31494"),
"content" : "Itaque rerum facere neque fuga aspernatur dolorum.",
"deleted" : false,
"threadId" : ObjectId("60d4efa6a95f446051f31492"),
"type" : "TEXT",
"thread" : {
"$ref" : "threads",
"$id" : ObjectId("60d4efa6a95f446051f31492")
},
"readBy" : [
ObjectId("60d4efa6a95f446051f3148f")
],
"attributes" : [],
"createdBy" : "60d4efa6a95f446051f3148f",
"createdAt" : ISODate("2021-06-24T20:48:38.598Z"),
"modifiedAt" : ISODate("2021-06-24T20:48:38.598Z"),
}
/* 2 */
{
"_id" : ObjectId("60d4efa6a95f446051f31495"),
"content" : "Nisi suscipit iste magni voluptatem.",
"deleted" : false,
"threadId" : ObjectId("60d4efa6a95f446051f31492"),
"type" : "TEXT",
"thread" : {
"$ref" : "threads",
"$id" : ObjectId("60d4efa6a95f446051f31492")
},
"readBy" : [
ObjectId("60d4efa6a95f446051f3148f")
],
"attributes" : [],
"createdBy" : "60d4efa6a95f446051f3148f",
"createdAt" : ISODate("2021-06-24T20:48:38.610Z"),
"modifiedAt" : ISODate("2021-06-24T20:48:38.610Z"),
}
and more...
My aggregation pipeline
db.threads.aggregate([
{ "$lookup": { "from": "messages", "localField": "_id", "foreignField": "threadId", "as": "message"}},
{ "$unwind": "$message" },
{ "$unwind": "$users" },
{ "$unwind": "$message.readBy" },
{ "$group": {
"_id": {
"user_id": "$users._id",
"thread_id": "$_id",
},
"unread_messages": {
"$sum": {
"$cond": [
{ "$ne": [ "$users._id", "$message.readBy" ] },
1,
0
]
}
}
}},
// { "$replaceRoot": { "newRoot": { "$mergeObjects": ["$_id", { "count": "$unread_messages" }]} } }
])
Aggregation results
{
"_id" : {
"user_id" : ObjectId("60d4efa6a95f446051f31491"),
"thread_id" : ObjectId("60d4efa6a95f446051f31492")
},
"unread_messages" : 4.0
},
{
"_id" : {
"user_id" : ObjectId("60d4efa6a95f446051f3148f"),
"thread_id" : ObjectId("60d4efa6a95f446051f31492")
},
"unread_messages" : 4.0
},
{
"_id" : {
"user_id" : ObjectId("60d4efa6a95f446051f31490"),
"thread_id" : ObjectId("60d4efa6a95f446051f31492")
},
"unread_messages" : 4.0
}
Results are partially OK, but, I want to keep my original structure of a document and copy aggregation results accordingly to the thread id back to the original document
I would be very grateful if you help me with the aggregation pipeline
Expected results
{
"_id": ObjectId("60d4efa6a95f446051f31492"),
"latestMessage": "Voluptatem eos officiis optio dolor est et.",
"type": "LISTING",
"users": [{
"_id": ObjectId("60d4efa6a95f446051f31491"),
"displayName": "Monique Connelly II"
},
{
"_id": ObjectId("60d4efa6a95f446051f31490"),
"displayName": "Ivory Jacobson DDS"
},
{
"_id": ObjectId("60d4efa6a95f446051f3148f"),
"displayName": "Ron Weimann"
}
],
"createdBy": "60d4efa6a95f446051f3148f",
"createdAt": ISODate("2021-06-24T20:48:38.537Z"),
"modifiedAt": ISODate("2021-06-24T20:48:38.620Z"),
"message": [...],
"stats": [{
"_id": {
"user_id": ObjectId("60d4efa6a95f446051f31491"),
"thread_id": ObjectId("60d4efa6a95f446051f31492")
},
"unread_messages": 4.0
},
{
"_id": {
"user_id": ObjectId("60d4efa6a95f446051f3148f"),
"thread_id": ObjectId("60d4efa6a95f446051f31492")
},
"unread_messages": 4.0
},
{
"_id": {
"user_id": ObjectId("60d4efa6a95f446051f31490"),
"thread_id": ObjectId("60d4efa6a95f446051f31492")
},
"unread_messages": 4.0
}
]
}
I post a solution that works perfectly for my case with $first + $replateRoot operations.
Final results have different ids as not in the first post that is because I re-created documents
db.threads.aggregate([
{ "$lookup": { "from": "messages", "localField": "_id", "foreignField": "threadId", "as": "message"}},
{ "$unwind": "$message" },
{ "$unwind": "$users" },
{ "$unwind": "$message.readBy" },
{ "$group": {
"_id": {
"user_id": "$users._id",
"thread_id": "$_id",
},
"thread": {
"$first": "$$ROOT"
},
"unread_messages": {
"$sum": {
"$cond": [
{ "$ne": [ "$users._id", "$message.readBy" ] },
1,
0
]
}
}
}},
{ "$group": {
"_id": "$_id.thread_id",
"thread": { "$first": "$thread" },
"stats": {
"$push": {
"userId": "$_id.user_id",
"unreadMessages": "$unread_messages"
}
}
}},
{ "$replaceRoot": { "newRoot": { "$mergeObjects": ["$thread", { stats: "$stats" }]} } }
])
Final results
/* 1 */
{
"_id" : ObjectId("60d574e242e59a48b886c586"),
"latestMessage" : "Molestias quo quod occaecati exercitationem veniam eaque.",
"type" : "LISTING",
"users" : {
"_id" : ObjectId("60d574e242e59a48b886c584"),
"displayName" : "Meggan Vandervort"
},
"unreadMessages" : NumberLong(0),
"createdBy" : "60d574e242e59a48b886c582",
"createdAt" : ISODate("2021-06-25T06:17:06.547Z"),
"modifiedAt" : ISODate("2021-06-25T06:17:06.617Z")
"message" : {
"_id" : ObjectId("60d574e242e59a48b886c58d"),
"content" : "Velit dolores vel.",
"deleted" : false,
"threadId" : ObjectId("60d574e242e59a48b886c586"),
"type" : "TEXT",
"thread" : {
"$ref" : "threads",
"$id" : ObjectId("60d574e242e59a48b886c586")
},
"readBy" : ObjectId("60d574e242e59a48b886c582"),
"attributes" : [],
"createdBy" : "60d574e242e59a48b886c582",
"createdAt" : ISODate("2021-06-25T06:17:06.595Z"),
"modifiedAt" : ISODate("2021-06-25T06:17:06.595Z")
},
"stats" : [
{
"userId" : ObjectId("60d574e242e59a48b886c584"),
"unreadessages" : 6.0
},
{
"userId" : ObjectId("60d574e242e59a48b886c583"),
"unreadessages" : 6.0
},
{
"userId" : ObjectId("60d574e242e59a48b886c582"),
"unreadessages" : 6.0
}
]
}
and more...

Mongodb $lookup joins all collection instead of matching object

so I am trying to do a $lookup with Mongodb but I have a strange output.
I have two collections, "sites" and "consumptions".
sites :
{
"_id" : ObjectId("5b26db6e7f59e825909da106"),
"siteId" : 49,
"industry" : "Commercial Property",
"sub_industry" : "Shopping Center/Shopping Mall",
"square_feet" : 497092,
"latitude" : 41.2161756,
"longitude" : -78.14809154,
"timezone" : "America/New_York",
"timezone_offset" : "-04:00",
"__v" : 0
}
consumptions :
{
"_id" : ObjectId("5b26db907f59e825909f3d2a"),
"timestamp" : 1325382000,
"dttm_utc" : ISODate("2012-01-01T00:40:00Z"),
"value" : 2.8956,
"estimated" : 0,
"anomaly" : "",
"site" : [
{
"_id" : ObjectId("5b26db727f59e825909da16a")
}
],
"__v" : 0
}
This is the $lookup I am trying to do :
db.consumptions.aggregate([
{
$lookup:
{
from: "sites",
localField: "site.id",
foreignField: "id",
as: "site"
}
}
])
The expected output would be to have the detail of the site in each consumption :
{
"_id" : ObjectId("5b26db907f59e825909f3d2a"),
"timestamp" : 1325382000,
"dttm_utc" : ISODate("2012-01-01T00:40:00Z"),
"value" : 2.8956,
"estimated" : 0,
"anomaly" : "",
"site" : [
{
"_id" : ObjectId("5b26db6e7f59e825909da106"),
"siteId" : 49,
"industry" : "Commercial Property",
"sub_industry" : "Shopping Center/Shopping Mall",
"square_feet" : 497092,
"latitude" : 41.2161756,
"longitude" : -78.14809154,
"timezone" : "America/New_York",
"timezone_offset" : "-04:00",
"__v" : 0
}
],
"__v" : 0
}
This is the output I am getting with the $lookup :
{
"_id" : ObjectId("5b26db907f59e825909f3d2a"),
"timestamp" : 1325382000,
"dttm_utc" : ISODate("2012-01-01T00:40:00Z"),
"value" : 2.8956,
"estimated" : 0,
"anomaly" : "",
"site" : [
{
"_id" : ObjectId("5b26db6e7f59e825909da0f3"),
"siteId" : 6,
"industry" : "Commercial Property",
"sub_industry" : "Shopping Center/Shopping Mall",
"square_feet" : 161532,
"latitude" : 34.78300117,
"longitude" : -106.8952497,
"timezone" : "America/Denver",
"timezone_offset" : "-06:00",
"__v" : 0
},
{
"_id" : ObjectId("5b26db6e7f59e825909da0f4"),
"siteId" : 8,
"industry" : "Commercial Property",
"sub_industry" : "Shopping Center/Shopping Mall",
"square_feet" : 823966,
"latitude" : 40.32024733,
"longitude" : -76.40494239,
"timezone" : "America/New_York",
"timezone_offset" : "-04:00",
"__v" : 0
}, ... (all the sites details are listed)
],
"__v" : 0
}
Thank you in advance for your help !
You need to first $unwind the site array to match site._id to the foreign field _id and then $group to rolling back into the arrays again.
db.collection.aggregate([
{ "$unwind": "$site" },
{ "$lookup": {
"from": Site.collection.name,
"localField": "site._id",
"foreignField": "_id",
"as": "site"
}},
{ "$unwind": "$site" },
{ "$group": {
"_id": "$_id",
"value": { "$first": "$value" },
"estimated": { "$first": "$estimated" },
"anomaly": { "$first": "$anomaly" },
"timestamp": { "$first": "$timestamp" },
"dttm_utc": { "$first": "$dttm_utc" },
"site": { "$push": "$site" }
}}
])
And if you have mongodb 3.6 then you can try this
db.collection.aggregate([
{ "$unwind": "$site" },
{ "$lookup": {
"from": Site.collection.name,
"let": { "siteId": "$site._id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$siteId" ] } } }
],
"as": "site"
}},
{ "$unwind": "$site" },
{ "$group": {
"_id": "$_id",
"value": { "$first": "$value" },
"estimated": { "$first": "$estimated" },
"anomaly": { "$first": "$anomaly" },
"timestamp": { "$first": "$timestamp" },
"dttm_utc": { "$first": "$dttm_utc" },
"site": { "$push": "$site" }
}}
])
Make sure you should put Site.collection.name correctly
I think that The $lookup doesn't work directly with an array.
try using $unwind first.

MongoDb aggregation query with $group and $push into subdocument

I have a question regarding the $group argument of MongoDb aggregations. My data structure looks as follows:
My "Event" collection contains this single document:
{
"_id": ObjectId("mongodbobjectid..."),
"name": "Some Event",
"attendeeContainer": {
"min": 0,
"max": 10,
"attendees": [
{
"type": 1,
"status": 2,
"contact": ObjectId("mongodbobjectidHEX1")
},
{
"type": 7,
"status": 4,
"contact": ObjectId("mongodbobjectidHEX2")
}
]
}
}
My "Contact" collection contains these documents:
{
"_id": ObjectId("mongodbobjectidHEX1"),
"name": "John Doe",
"age": 35
},
{
"_id": ObjectId("mongodbobjectidHEX2"),
"name": "Peter Pan",
"age": 60
}
What I want to do is perform an aggregate query on the "Event" collection and get the following result with full "contact" data:
{
"_id": ObjectId("mongodbobjectid..."),
"name": "Some Event",
"attendeeContainer": {
"min": 0,
"max": 10,
"attendees": [
{
"type": 1,
"status": 2,
"contact": {
"_id": ObjectId("mongodbobjectidHEX1"),
"name": "John Doe",
"age": 35
}
},
{
"type": 7,
"status": 4,
"contact": {
"_id": ObjectId("mongodbobjectidHEX2"),
"name": "Peter Pan",
"age": 60
}
}
]
}
}
The arguments I am using right now look as follows (shortened version):
"$unwind" : "$attendeeContainer.attendees",
"$lookup" : { "from" : "contactinfo", "localField" : "attendeeContainer.attendees.contact","foreignField" : "_id", "as" : "contactInfo" },
"$unwind" : "$contactInfo",
"$group" : { "_id": "$_id",
"name": { "$first" : "$name" },
...
"contact": { "$push": { "contact": "$contactInfo"} }
}
However, this leads to the "contact" array being on "Event" level (because of the grouping) instead of one document of the array being at each "attendeeContainer.attendees". How can I push the "contact" array to be at "attendeeContainer.attendees"? (as shown in the desired output above)
I tried things like:
"attendeeContainer.attendees.contact": { "$push": { "contact": "$contactInfo"} }
But mongodb apparently does not allow "." at $group stage.
Try running the following aggregation pipeline, the key is using a final $project pipeline to create the attendeeContainer subdocument:
db.event.aggregate([
{ "$unwind": "$attendeeContainer.attendees" },
{
"$lookup" : {
"from" : "contactinfo",
"localField" : "attendeeContainer.attendees.contact",
"foreignField" : "_id",
"as" : "attendeeContainer.attendees.contactInfo"
}
},
{ "$unwind": "$attendeeContainer.attendees.contactInfo" },
{
"$group": {
"_id" : "$_id",
"name": { "$first": "$name" },
"min" : { "$first": "$attendeeContainer.min" },
"max" : { "$first": "$attendeeContainer.max" },
"attendees": { "$push": "$attendeeContainer.attendees" }
}
},
{
"$project": {
"name": 1,
"attendeeContainer.min": "$min",
"attendeeContainer.max": "$min",
"attendeeContainer.attendees": "$attendees"
}
}
])
Debugging Tips
Debugging the pipeline at the 4th stage, you would get the result
db.event.aggregate([
{ "$unwind": "$attendeeContainer.attendees" },
{
"$lookup" : {
"from" : "contactinfo",
"localField" : "attendeeContainer.attendees.contact",
"foreignField" : "_id",
"as" : "attendeeContainer.attendees.contactInfo"
}
},
{ "$unwind": "$attendeeContainer.attendees.contactInfo" },
{
"$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"min" : { "$first": "$attendeeContainer.min" },
"max" : { "$first": "$attendeeContainer.max" },
"attendees": { "$push": "$attendeeContainer.attendees" }
}
}/*,
{
"$project": {
"name": 1,
"attendeeContainer.min": "$min",
"attendeeContainer.max": "$min",
"attendeeContainer.attendees": "$attendees"
}
}*/
])
Pipeline result
{
"_id" : ObjectId("582c789282a9183adc0b53f5"),
"name" : "Some Event",
"min" : 0,
"max" : 10,
"attendees" : [
{
"type" : 1,
"status" : 2,
"contact" : ObjectId("582c787682a9183adc0b53f3"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f3"),
"name" : "John Doe",
"age" : 35
}
},
{
"type" : 7,
"status" : 4,
"contact" : ObjectId("582c787682a9183adc0b53f4"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f4"),
"name" : "Peter Pan",
"age" : 60
}
}
]
}
and the final $project pipeline will give you the desired result:
db.event.aggregate([
{ "$unwind": "$attendeeContainer.attendees" },
{
"$lookup" : {
"from" : "contactinfo",
"localField" : "attendeeContainer.attendees.contact",
"foreignField" : "_id",
"as" : "attendeeContainer.attendees.contactInfo"
}
},
{ "$unwind": "$attendeeContainer.attendees.contactInfo" },
{
"$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"min" : { "$first": "$attendeeContainer.min" },
"max" : { "$first": "$attendeeContainer.max" },
"attendees": { "$push": "$attendeeContainer.attendees" }
}
},
{
"$project": {
"name": 1,
"attendeeContainer.min": "$min",
"attendeeContainer.max": "$min",
"attendeeContainer.attendees": "$attendees"
}
}/**/
])
Desired/Actual Output
{
"_id" : ObjectId("582c789282a9183adc0b53f5"),
"name" : "Some Event",
"attendeeContainer" : {
"min" : 0,
"max" : 10,
"attendees" : [
{
"type" : 1,
"status" : 2,
"contact" : ObjectId("582c787682a9183adc0b53f3"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f3"),
"name" : "John Doe",
"age" : 35
}
},
{
"type" : 7,
"status" : 4,
"contact" : ObjectId("582c787682a9183adc0b53f4"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f4"),
"name" : "Peter Pan",
"age" : 60
}
}
]
}
}

mongodb multiple aggregations in single operation

I have an item collection with following documents.
{ "item" : "i1", "category" : "c1", "brand" : "b1" }
{ "item" : "i2", "category" : "c2", "brand" : "b1" }
{ "item" : "i3", "category" : "c1", "brand" : "b2" }
{ "item" : "i4", "category" : "c2", "brand" : "b1" }
{ "item" : "i5", "category" : "c1", "brand" : "b2" }
I want to separate aggregation results --> count by category, count by brand. Please note, it is not count by (category,brand)
I am able to do this using map-reduce using following code.
map = function(){
emit({type:"category",category:this.category},1);
emit({type:"brand",brand:this.brand},1);
}
reduce = function(key, values){
return Array.sum(values)
}
db.item.mapReduce(map,reduce,{out:{inline:1}})
And the result is
{
"results" : [
{
"_id" : {
"type" : "brand",
"brand" : "b1"
},
"value" : 3
},
{
"_id" : {
"type" : "brand",
"brand" : "b2"
},
"value" : 2
},
{
"_id" : {
"type" : "category",
"category" : "c1"
},
"value" : 3
},
{
"_id" : {
"type" : "category",
"category" : "c2"
},
"value" : 2
}
],
"timeMillis" : 21,
"counts" : {
"input" : 5,
"emit" : 10,
"reduce" : 4,
"output" : 4
},
"ok" : 1,
}
I can get same results by firing two different aggregation commands as below.
db.item.aggregate({$group:{_id:"$category",count:{$sum:1}}})
db.item.aggregate({$group:{_id:"$brand",count:{$sum:1}}})
Is there anyway I can do the same using aggregation framework by single aggregation command.
I have simplified my case here, but in actual I need this grouping from fields in array of subdocuments. Assume the above is structure after I do unwind.
It is a real-time query (someone waiting for response), though on smaller dataset, so execution time is important.
I am using MongoDB 2.4.
Starting in Mongo 3.4, the $facet aggregation stage greatly simplifies this type of use case by processing multiple aggregation pipelines within a single stage on the same set of input documents:
// { "item" : "i1", "category" : "c1", "brand" : "b1" }
// { "item" : "i2", "category" : "c2", "brand" : "b1" }
// { "item" : "i3", "category" : "c1", "brand" : "b2" }
// { "item" : "i4", "category" : "c2", "brand" : "b1" }
// { "item" : "i5", "category" : "c1", "brand" : "b2" }
db.collection.aggregate(
{ $facet: {
categories: [{ $group: { _id: "$category", count: { "$sum": 1 } } }],
brands: [{ $group: { _id: "$brand", count: { "$sum": 1 } } }]
}}
)
// {
// "categories" : [
// { "_id" : "c1", "count" : 3 },
// { "_id" : "c2", "count" : 2 }
// ],
// "brands" : [
// { "_id" : "b1", "count" : 3 },
// { "_id" : "b2", "count" : 2 }
// ]
// }
Over a large data set I would say that your current mapReduce approach would be the best one, because the aggregation technique for this would not work well with large data. But possibly over a reasonably small size it might just be what you need:
db.items.aggregate([
{ "$group": {
"_id": null,
"categories": { "$push": "$category" },
"brands": { "$push": "$brand" }
}},
{ "$project": {
"_id": {
"categories": "$categories",
"brands": "$brands"
},
"categories": 1
}},
{ "$unwind": "$categories" },
{ "$group": {
"_id": {
"brands": "$_id.brands",
"category": "$categories"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.brands",
"categories": { "$push": {
"category": "$_id.category",
"count": "$count"
}},
}},
{ "$project": {
"_id": "$categories",
"brands": "$_id"
}},
{ "$unwind": "$brands" },
{ "$group": {
"_id": {
"categories": "$_id",
"brand": "$brands"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"categories": { "$first": "$_id.categories" },
"brands": { "$push": {
"brand": "$_id.brand",
"count": "$count"
}}
}}
])
Not really the same as the mapReduce output, you could throw in some more stages to change the output format, but this should be usable:
{
"_id" : null,
"categories" : [
{
"category" : "c2",
"count" : 2
},
{
"category" : "c1",
"count" : 3
}
],
"brands" : [
{
"brand" : "b2",
"count" : 2
},
{
"brand" : "b1",
"count" : 3
}
]
}
As you can see, this involves a fair bit of shuffling between arrays in order to group each set of either "category" or "brand" within the same pipeline process. Again I will say, this will not do well for large data, but for something like "items in an order" it would probably do nicely.
Of course as you say, you have simplified somewhat, so the first grouping key on null is either going to be something else or either narrowed down to do that null case by an earlier $match stage, which is probably what you want to do.