MongoDB - Aggregate muliple rows - mongodb

Suppose the following aggregation query:
Machine.aggregate( [ { $match : { $and: [ {"idc": req.query.idc }, {"customer":req.query.customer} ] } } ,{"$group":{_id: {"cluster":"$cluster","idc":"$idc","type":"$type"},"SumCores":{"$sum":"$cores"},"SumMemory": { "$sum":"$memory" }}}, { $sort : { idc : -1, cluster: 1 } } ]);
Which returns:
[
{
"_id": {
"cluster": 1,
"idc": "LH5",
"type": "Virtual"
},
"SumCores": 112,
"SumMemory": 384
},
{
"_id": {
"cluster": 1,
"idc": "LH5",
"type": "Physical"
},
"SumCores": 192,
"SumMemory": 768
},
{
"_id": {
"cluster": 1,
"idc": "LH8",
"type": "Virtual"
},
"SumCores": 232,
"SumMemory": 469
},
{
"_id": {
"cluster": 1,
"idc": "LH8",
"type": "Physical"
},
"SumCores": 256,
"SumMemory": 1024
}
]
Is there a way to change the aggregation to retrieve this desired output:
[
{
"_id": {
"cluster": 1,
"idc": "LH5"
},
"Virtual": {
"SumCores": 112,
"SumMemory": 384
},
"Physical": {
"SumCores": 192,
"SumMemory": 768
}
},
{
"_id": {
"cluster": 1,
"idc": "LH8"
},
"Virtual": {
"SumCores": 232,
"SumMemory": 469
},
"Physical": {
"idc": "LH8",
"type": "Physical"
}
}
]
Assumptions:
There will always be a Physical and Virtual "pair" per IDC/Cluster
I am happy to receive solutions which:
a) Change the aggregation query
b) Receive the existing data and change it into this format by way of a library and/or an algorithm

You are doing all the right things in the query already as you need to $group at the level you have in order to get the correct sums. The only thing remaining is to bring it all together.
Personally I would stick with the "pair" in an array as the final output:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
Which would give you:
{
"_id" : {
"cluster" : 1,
"idc" : "LH8"
},
"data" : [
{
"type" : "Virtual",
"SumCores" : 232,
"SumMemory" : 469
},
{
"type" : "Physical",
"SumCores" : 256,
"SumMemory" : 1024
}
]
}
{
"_id" : {
"cluster" : 1,
"idc" : "LH5"
},
"data" : [
{
"type" : "Virtual",
"SumCores" : 112,
"SumMemory" : 384
},
{
"type" : "Physical",
"SumCores" : 192,
"SumMemory" : 768
}
]
}
But if you really must, then you can filter out the matched elements from the array and put them in their own properties:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$project": {
"Physical": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Physical" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
},
"Virtual": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Virtual" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$Physical" },
{ "$unwind": "$Virtual"},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
Which gives you your result:
{
"_id" : {
"cluster" : 1,
"idc" : "LH8"
},
"Physical" : {
"SumCores" : 256,
"SumMemory" : 1024
},
"Virtual" : {
"SumCores" : 232,
"SumMemory" : 469
}
}
{
"_id" : {
"cluster" : 1,
"idc" : "LH5"
},
"Physical" : {
"SumCores" : 192,
"SumMemory" : 768
},
"Virtual" : {
"SumCores" : 112,
"SumMemory" : 384
}
}
But the first is just going to give you the same essential data without needing an extra pass through the results.
At any rate it's really just one more $group to bring it all together and then the optional stages if you really must have that data format. But I would personally handle any accessing of the "pair" in the code that needs to deal with it.

Related

Mongo aggregate to exclude objects from array

My sample document
{ "pId":12345, "charges": [
{
"type": "asr",
"dId": 123,
"value": 100
},
{
"type": "asr",
"dId": 124,
"value": 120
},
{
"type": "asp",
"dId": 125,
"value": 130
},
{
"type": "asn",
"dId": 126,
"value": 130
},
{
"type": "aso",
"dId": 127,
"value": 150
}....
] }
Excluded charges input:
charges [
{
"type": "asr",
"dId": 123
},
{
"type": "asr",
"dId": 124
} ...
]
I need to fetch all charges from the sample document except Excluded charges. Can someone help me to solve this?
I tried this
{}
{"$project" :{
"_id" : 0, "pId" : 1,
"charges": { "$filter" : { "input" : "$charges", "as" : "charge",
"cond" :{
{ "$not" : { "$and" : [{ "$eq" : ["$$charge.type", "asr"]}, { "$eq" : ["$$charge.dId", 123]}]}}
}
}}
When I have multiple excluded charges how can we do this
use this :
[
{
'$project': {
'charges': {
'$map': {
'input': {
'$filter': {
'input': '$charges',
'as': 'featuresT',
'cond': {
'$eq': [
{
'$or': [
{
'$and': [
{
'$eq': [
'$$featuresT.type', 'asr'
]
}, {
'$eq': [
'$$featuresT.dId', 123
]
}
]
}, {
'$and': [
{
'$eq': [
'$$featuresT.type', 'asr'
]
}, {
'$eq': [
'$$featuresT.dId', 124
]
}
]
}
]
}, false
]
}
}
},
'as': 'featuresF',
'in': {
'type': '$$featuresF.type',
'dId': '$$featuresF.dId',
'value': '$$featuresF.value'
}
}
}
}
}
]
found a simple way.
db.collection.aggregate([
{
$match: {
"pId": {
$eq: 12345
}
}
},
{
"$project": {
"_id": 0,
"pId": 1,
"charges": {
"$filter": {
"input": "$charges",
"as": "charge",
"cond": {
"$not": {
"$or": [
{
"$and": [
{
"$eq": [
"$$charge.type",
"asr"
]
},
{
"$eq": [
"$$charge.dId",
123
]
}
]
},
{
"$and": [
{
"$eq": [
"$$charge.type",
"asr"
]
},
{
"$eq": [
"$$charge.dId",
124
]
}
]
}
]
}
}
}
}
}
}
])
mongoplayground
$filter to filter charges array
$in with $not to exclude only the values that you want
db.collection.aggregate([
{
"$project": {
"_id": 0,
"pId": 1,
"charges": {
"$filter": {
"input": "$charges",
"cond": {
"$not": {
"$in": [
"$$this.dId",
[123, 124]
]
}
}
}
}
}
}
])
Here is the working example: https://mongoplayground.net/p/0uIdoml384h

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate

I have a collection of two-dimensional timeseries data as follows:
[
{
"value" : 9,
"timestamp" : "2020-12-30T02:06:33.000+0000",
"recipeId" : 15
},
{
"value" : 2,
"timestamp" : "2020-12-30T12:04:23.000+0000",
"recipeId" : 102
},
{
"value" : 5,
"timestamp" : "2020-12-30T15:09:23.000+0000",
"recipeId" : 102
},
...
]
The records have a recipeId which is the first level of grouping I'm looking for. All values for a day of a recipe should be summed up. I want an array of timeseries per recipeId. I need the missing days to be filled with a 0. I want this construct to be created for a provided start and end date range.
Some like this for date range of 2020-12-29 to 2020-12-31:
[
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 15
},
{
"sum" : 9,
"timestamp" : "2020-12-30",
"recipeId" : 15
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 15
},
...
],
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 0
},
{
"sum" : 7,
"timestamp" : "2020-12-30",
"recipeId" : 102
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 102
},
...
]
]
This is what I currently have and it's only partially solving my requirements. I can't manage to get the last few stages right:
[
{
"$match": {
"timestamp": {
"$gte": "2020-12-29T00:00:00.000Z",
"$lte": "2020-12-31T00:00:00.000Z"
}
}
},
{
"$addFields": {
"timestamp": {
"$dateFromParts": {
"year": { "$year": "$timestamp" },
"month": { "$month": "$timestamp" },
"day": { "$dayOfMonth": "$timestamp" }
}
},
"dateRange": {
"$map": {
"input": {
"$range": [
0,
{
"$trunc": {
"$divide": [
{
"$subtract": [
"2020-12-31T00:00:00.000Z",
"2020-12-29T00:00:00.000Z"
]
},
1000
]
}
},
86400
]
},
"in": {
"$add": [
"2020-12-29T00:00:00.000Z",
{ "$multiply": ["$$this", 1000] }
]
}
}
}
}
},
{ "$unwind": "$dateRange" },
{
"$group": {
"_id": { "date": "$dateRange", "recipeId": "$recipeId" },
"count": {
"$sum": { "$cond": [{ "$eq": ["$dateRange", "$timestamp"] }, 1, 0] }
}
}
},
{
"$group": {
"_id": "$_id.date",
"total": { "$sum": "$count" },
"byRecipeId": {
"$push": {
"k": { "$toString": "$_id.recipeId" },
"v": { "$sum": "$count" }
}
}
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"_id": 0,
"timestamp": "$_id",
"total": "$total",
"byRecipeId": {
"$arrayToObject": {
"$filter": { "input": "$byRecipeId", "cond": "$$this.v" }
}
}
}
}
]
which results in:
[
{
"timestamp": "2020-12-29T00:00:00.000Z",
"total": 21,
"byRecipeId": {}
},
{
"timestamp": "2020-12-30T00:00:00.000Z",
"total": 0,
"byRecipeId": {
"15": 9,
"102": 7
}
},
{
"timestamp": "2020-12-31T00:00:00.000Z",
"total": 0,
"byRecipeId": {}
}
]
I'm open to alternative solution of course. For examples I came across this post: https://medium.com/#alexandro.ramr777/fill-missing-values-using-mongodb-aggregation-framework-f011114e83e0 but it doesn't deal with multi-dimensions.
You could use the $redcue function. This code fills the gabs of Minutes for current day. Should be easy to adapt it to give missing Days.
{
$addFields: {
data: {
$reduce: {
input: { $range: [0, 24 * 60] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [
moment().startOf('day').toDate(),
{ $multiply: ["$$this", 1000 * 60] }
]
}
},
in: {
$concatArrays: [
"$$value",
[{
$cond: {
if: { $in: ["$$ts", "$data.timestamp"] },
then: {
$first: {
$filter: {
input: "$data",
cond: { $eq: ["$$this.timestamp", "$$ts"] }
}
}
},
else: { timestamp: "$$ts", total: 0 }
}
}]
]
}
}
}
}
}
}
}
In my opinion, $reduce is more elegant than $map, however based on my experience the performance is much worse with $reduce.

MongoDB: Get the previous and next element in the embedded array

I have a database which has the following structure:
{
"_id" : ObjectId("59b8d72ab515211f3c161c4b"),
"Transport_event_id" : 1,
"Carrier_id" : 23,
"Payload_id" : 0,
"StartTime" : 214392.0,
"EndTime" : 362707.0,
"Move_events" : [
{
"Timestamp" : 214398,
"x_pos" : 13,
"y_pos" : 202
},{
"Timestamp" : 214845,
"x_pos" : 12,
"y_pos" : 202
},{
"Timestamp" : 216399,
"x_pos" : 12,
"y_pos" : 216
},{
"Timestamp" : 216842,
"x_pos" : 11,
"y_pos" : 216
},{
"Timestamp" : 219586,
"x_pos" : 10,
"y_pos" : 216
}
]
}
I've made the following query which will return the next 2 Elements form a Array after a specific TimeStamp.
var cursor = db.Transport_eventBeta.aggregate([
{ "$match": { "StartTime": { "$lte": query_time } } },
{ "$match": { "EndTime": { "$gte": query_time } } },
{
"$project": {
"Move_events": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$Move_events",
"as": "event",
"cond": { "$lte": ["$$event.Timestamp" , query_time] }
}
}
},
"in": {
"$slice": [
"$Move_events",
{"$size": "$$filtered"},
2
]
}
}
},
"Carrier_id": 1
}
}
])
while (cursor.hasNext()) {
print(cursor.next());
}
What I need are the documents befor and after this specific TimeStamp.
Some kind of this:
"$slice": [
"$Move_events",
{"$size": "$$filtered"} - 1,
2
]
But this doesn't work. How can I solve this problem? 2 separate queries are no option because of the duration.
You can try below aggregation query in 3.4.
The query will filter Move_events to keep events with timestamp less than input timestamp followed by $arrayElemAt to get the Move_events after and before event.
db.Transport_eventBeta.aggregatee([
{
"$match": {
"StartTime": {
"$lte": query_time
},
"EndTime": {
"$gte": query_time
}
}
},
{
"$project": {
"Move_events": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$Move_events",
"as": "event",
"cond": {
"$lte": [
"$$event.Timestamp",
query_time
]
}
}
}
},
"in": [
{
"$arrayElemAt": [
"$Move_events",
{
"$subtract": [
{
"$size": "$$filtered"
},
1
]
}
]
},
{
"$arrayElemAt": [
"$Move_events",
{
"$size": "$$filtered"
}
]
}
]
}
}
}
}
])

MongoDB - Complex Grouping Query

I have the following MongoDB aggregation query which groups by IDC, type and cluster - which works perfectly.
I would like to additionally group "environment", inside this existing grouping. Please see my query below, my existing output, and what I would like to see (desired output).
If you have any questions or wish to see the source (I didn't think it was necessary, as it would take up room on the question, then please comment).
Thanks
Example Source (around 1000 documents):
{
"_id":"55d5dc40281077b6d8af1bfa",
"hostname":"1",
"domain":"domain",
"description":"VMWare ESXi 5",
"cluster":1,
"type":"Physical",
"os":"EXSi",
"idc":"AMS",
"environment":"DR",
"deviceclass":"host",
"cores":64,
"memory":256,
"clusters":0,
"customer":"MnS",
"mounts":[],
"roles":["ESX-HOST"],
"ipset":{"backnet":"1"},
"frontnet":[],
"created":"2015-09-28T11:12:36.526Z"
}
Query:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$project": {
"Physical": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Physical" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
},
"Virtual": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Virtual" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$Physical" },
{ "$unwind": "$Virtual"},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
Which gives me the following output:
{
"_id" : {
"cluster" : 1,
"idc" : "LH5"
},
"Physical" : {
"SumCores" : 192,
"SumMemory" : 768
},
"Virtual" : {
"SumCores" : 112,
"SumMemory" : 384
}
}
My desired output is:
[
{
"_id": {
"cluster": 1,
"idc": "LH8"
},
"Physical": [
{
"environment": "DR",
"SumCores": 256,
"SumMemory": 1024
},
{
"environment": "PROD",
"SumCores": 256,
"SumMemory": 1024
}
],
"Virtual": [
{
"environment": "DR",
"SumCores": 232,
"SumMemory": 469
},
{
"environment": "PROD",
"SumCores": 232,
"SumMemory": 469
}
]
}
]
Essentially, I want to group the sums based on the environment
Very much as in your initial query ( actually written by myself ), all you really need to do is add in that field detail to the initial _id of $group and then carry that through into the subsequent array entries:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type",
"environment": "$environment"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"environment": "$_id.environment",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$project": {
"Physical": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Physical" ] },
{
"environment": "$$el.environment",
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
},
"Virtual": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Virtual" ] },
{
"environment": "$$el.environment",
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$Physical" },
{ "$unwind": "$Virtual"},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
But you also "really" should be using the query form I recommended you did in the first place, since it is clear that all you want to do is diplay this in a template, and looping array content should be very simple:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type",
"environment": "$environment"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"environment": "$_id.environment",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);

Group Multiple Values in Aggregation

I want to group the all field of a collection with unique total. Let's assume there is collection like this:
id country state operator
121 IN HR AIRTEL
212 IN MH AIRTEL
213 US LA AT&T
214 UK JK VODAFONE
Output should be like this:
{
"country": { "IN": 2, "US":1, "UK":1 },
"state": { "HR":1, "MH":1, "LA":1, "JK": 1 },
"operator": { "AIRTEL":2, "AT&T": 1, "VODAFONE": 1 }
}
I am trying to use mongo aggregation framework, but can't really think how to do this?
I find out some similar to your output using aggregation check below code
db.collectionName.aggregate({
"$group": {
"_id": null,
"countryOfIN": {
"$sum": {
"$cond": [{
$eq: ["$country", "IN"]
}, 1, 0]
}
},
"countryOfUK": {
"$sum": {
"$cond": [{
$eq: ["$country", "UK"]
}, 1, 0]
}
},
"countryOfUS": {
"$sum": {
"$cond": [{
$eq: ["$country", "US"]
}, 1, 0]
}
},
"stateOfHR": {
"$sum": {
"$cond": [{
$eq: ["$state", "HR"]
}, 1, 0]
}
},
"stateOfMH": {
"$sum": {
"$cond": [{
$eq: ["$state", "MH"]
}, 1, 0]
}
},
"stateOfLA": {
"$sum": {
"$cond": [{
$eq: ["$state", "LA"]
}, 1, 0]
}
},
"stateOfJK": {
"$sum": {
"$cond": [{
$eq: ["$state", "JK"]
}, 1, 0]
}
},
"operatorOfAIRTEL": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AIRTEL"]
}, 1, 0]
}
},
"operatorOfAT&T": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AT&T"]
}, 1, 0]
}
},
"operatorOfVODAFONE": {
"$sum": {
"$cond": [{
$eq: ["$operator", "VODAFONE"]
}, 1, 0]
}
}
}
}, {
"$group": {
"_id": null,
"country": {
"$push": {
"IN": "$countryOfIN",
"UK": "$countryOfUK",
"US": "$countryOfUS"
}
},
"STATE": {
"$push": {
"HR": "$stateOfHR",
"MH": "$stateOfMH",
"LA": "$stateOfLA",
"JK": "$stateOfJK"
}
},
"operator": {
"$push": {
"AIRTEL": "$operatorOfAIRTEL",
"AT&T": "$operatorOfAT&T",
"VODAFONE": "$operatorOfVODAFONE"
}
}
}
}, {
"$project": {
"_id": 0,
"country": 1,
"STATE": 1,
"operator": 1
}
})
using $cond created groups of matched data and pushed them in second groups to combine.
An output format like you are looking for is not really suited to the aggregation framework since you are tranforming part of your data in to "key" names. The aggregation framework does not do this but rather sticks to database "best practice" as does not transform "data" to "key" names in any way.
You can perform a mapReduce operation instead with allows more flexibilty with the manipulation, but not as good performance due to the need to use JavaScript code to perform the manipulation:
db.collection.mapReduce(
function () {
var obj = {},
doc = this;
delete doc._id;
Object.keys(doc).forEach(function(key) {
obj[key] = {};
obj[key][doc[key]] = 1;
});
emit( null, obj );
},
function (key,values) {
var result = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(outerKey) {
Object.keys(value[outerKey]).forEach(function(innerKey) {
if ( !result.hasOwnProperty(outerKey) ) {
result[outerKey] = {};
}
if ( result[outerKey].hasOwnProperty(innerKey) ) {
result[outerKey][innerKey] += value[outerKey][innerKey];
} else {
result[outerKey][innerKey] = value[outerKey][innerKey];
}
});
});
});
return result;
},
{ "out": { "inline": 1 } }
)
And in the stucture that applies to all mapReduce results:
{
"results" : [
{
"_id" : null,
"value" : {
"country" : {
"IN" : 2,
"US" : 1,
"UK" : 1
},
"state" : {
"HR" : 1,
"MH" : 1,
"LA" : 1,
"JK" : 1
},
"operator" : {
"AIRTEL" : 2,
"AT&T" : 1,
"VODAFONE" : 1
}
}
}
]
}
For the aggregation framework itself, it is better suited to producing aggregation results that are more consistently structured:
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
])
Which would output:
{ "_id" : { "type" : "state", "key" : "JK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "UK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "US" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AT&T" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "LA" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AIRTEL" }, "count" : 2 }
{ "_id" : { "type" : "state", "key" : "MH" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "HR" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "VODAFONE" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "IN" }, "count" : 2 }
But is fairly easy to transform in client code while iterating the results:
var result = {};
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
]).forEach(function(doc) {
if ( !result.hasOwnProperty(doc._id.type) )
result[doc._id.type] = {};
result[doc._id.type][doc._id.key] = doc.count;
})
Which gives the final structure in "result":
{
"state" : {
"JK" : 1,
"LA" : 1,
"MH" : 1,
"HR" : 1
},
"country" : {
"UK" : 1,
"US" : 1,
"IN" : 2
},
"operator" : {
"AT&T" : 1,
"AIRTEL" : 2,
"VODAFONE" : 1
}
}