group first, make bucketauto second in mongodb aggregation - mongodb

I have a dataset structured like that:
{
"id": 1230239,
"group_name": "A",
"confidence": 0.14333882876354542,
},
{
"id": 1230240,
"group_name": "B",
"confidence": 0.4434535,
},
Etc.
It is pretty simple to calculate buckets and number of items in each bucket of confidence level, using $bucketauto like that:
{
"$bucketAuto": {
"groupBy": "$confidence",
"buckets": 4
}
}
But how can I do the same for each group, separately?
I tried this one:
{"$group": {
"_id": "group",
"data": {
"$push": {
"confidence": "$confidence",
}
}
}
},
{
"$bucketAuto": {
"groupBy": "$data.confidence",
"buckets": 4
}
}
But that does not work.
What I need roughly is this as an output:
{ 'groupA':
{
"_id": {
"min": 0.0005225352581638143,
"max": 0.2905137273072962
},
"count": 67
},
{"_id": {
"min": 0.2905137273072962,
"max":0.5531611756507283,
},
"count": 43
},
},
{ 'groupB':
{
"_id": {
"min": 0.0005225352581638143,
"max": 0.2905137273072962
},
"count": 67
},
{"_id": {
"min": 0.2905137273072962,
"max":0.5531611756507283,
},
"count": 43
},
}
Any advice or hint would be appreciated

$facet to the rescue -- the "multigroup" operator. This pipeline:
db.foo.aggregate([
{$facet: {
"groupA": [
{$match: {"group_name": "A"}}
,{$bucketAuto: {
"groupBy": "$confidence",
"buckets": 3
}}
]
,"groupB": [
{$match: {"group_name": "B"}}
,{$bucketAuto: {
"groupBy": "$confidence",
"buckets": 3
}}
]
}}
]);
yields the output you seek:
{
"groupA" : [
{
"_id" : {
"min" : 0.14333882876354542,
"max" : 0.34333882876354543
},
"count" : 2
},
{
"_id" : {
"min" : 0.34333882876354543,
"max" : 0.5433388287635454
},
"count" : 2
},
{
"_id" : {
"min" : 0.5433388287635454,
"max" : 0.5433388287635454
},
"count" : 1
}
],
"groupB" : [
{
"_id" : {
"min" : 0.5433388287635454,
"max" : 0.7433388287635454
// etc. etc.
If you want to go totally dynamic, you'll need to do it in two passes: first get the distinct group names, then build the $facet expression from those names:
db.foo.distinct("group_name").forEach(function(name) {
fct_stage["group" + name] = [
{$match: {"group_name": name}}
,{$bucketAuto: {
"groupBy": "$confidence",
"buckets": 3
}}
];
});
db.foo.aggregate([ {$facet: fct_stage} ]);

Related

Retrieve highest score for each game using aggregate in MongoDB

I am working on a database of various games and i want to design a query that returns top scorer from each game with specific player details.
The document structure is as follows:
db.gaming_system.insertMany(
[
{
"_id": "01",
"name": "GTA 5",
"high_scores": [
{
"hs_id": 1,
"name": "Harry",
"score": 6969
},
{
"hs_id": 2,
"name": "Simon",
"score": 8574
},
{
"hs_id": 3,
"name": "Ethan",
"score": 4261
}
]
},
{
"_id": "02",
"name": "Among Us",
"high_scores": [
{
"hs_id": 1,
"name": "Harry",
"score": 926
},
{
"hs_id": 2,
"name": "Simon",
"score": 741
},
{
"hs_id": 3,
"name": "Ethan",
"score": 841
}
]
}
]
)
I have created a query using aggregate which returns the name of game and the highest score for that game as follows
db.gaming_system.aggregate(
{ "$project": { "maximumscore": { "$max": "$high_scores.score" }, name:1 } },
{ "$group": { "_id": "$_id", Name: { $first: "$name" }, "Highest_Score": { "$max": "$maximumscore" } } },
{ "$sort" : { "_id":1 } }
)
The output from my query is as follows:
{ "_id" : "01", "Name" : "GTA 5", "Highest_Score" : 8574 }
{ "_id" : "02", "Name" : "Among Us", "Highest_Score" : 926 }
I want to generate output which also provides the name of player and "hs_id" of that player who has the highest score for each game as follows:
{ "_id" : "01", "Name" : "GTA 5", "Top_Scorer" : "Simon", "hs_id": 2, "Highest_Score" : 8574 }
{ "_id" : "02", "Name" : "Among Us", "Top_Scorer" : "Harry", "hs_id": 1, "Highest_Score" : 926 }
What should be added to my query using aggregate pipeline?
[
{
$unwind: "$high_scores" //unwind the high_scores, so you can then sort
},
{
$sort: {
"high_scores.score": -1 //sort the high_scores, irrelevant of game, because we are going to group in next stage
}
},
{
//now group them by _id, take the name and top scorer from $first (which is the first in that group as sorted by score in descending order
$group: {
_id: "$_id",
name: {
$first: "$name"
},
Top_Scorer: {
$first: "$high_scores"
}
}
}
]

mongo aggregation framework group by quarter/half year/year

I have a database with this schema structure :
{
"name" : "Carl",
"city" : "paris",
"time" : "1-2018",
"notes" : [
"A",
"A",
"B",
"C",
"D"
]
}
And this query using the aggregation framework :
db.getCollection('collection').aggregate(
[{
"$match": {
"$and": [{
"$or": [ {
"time": "1-2018"
}, {
"time": "2-2018"
} ]
}, {
"name": "Carl"
}, {
"city": "paris"
}]
}
}, {
"$unwind": "$notes"
}, {
"$group": {
"_id": {
"notes": "$notes",
"time": "$time"
},
"count": {
"$sum": 1
}
}
}
, {
"$group": {
"_id": "$_id.time",
"count": {
"$sum": 1
}
}
}, {
"$project": {
"_id": 0,
"time": "$_id",
"count": 1
}
}])
It working correcly and i'm getting these results these results :
{
"count" : 4.0,
"time" : "2-2018"
}
{
"count" : 4.0,
"time" : "1-2018"
}
My issue is that i'd like to keep the same match stage and i'd like to group by quarter.
Here the result i'd like to have :
{
"count" : 8.0,
"time" : "1-2018" // here quarter 1
}
Thanks

How to compare and count each value of element with condition in mongoDB pipeline after unwinding?

This is my command I ran in tools->command
{
aggregate : "hashtags",
pipeline:
[
{$unwind:"$time"},
{$match:{"$time":{$gte:NumberInt(1450854385), $lte:NumberInt(1450854385)}}},
{$group:{"_id":"$word","count":{$sum:1}}}
]
}
which gave us this result
Response from server:
{
"result": [
{
"_id": "dear",
"count": NumberInt(1)
},
{
"_id": "ghost",
"count": NumberInt(1)
},
{
"_id": "rat",
"count": NumberInt(1)
},
{
"_id": "police",
"count": NumberInt(1)
},
{
"_id": "bugs",
"count": NumberInt(3)
},
{
"_id": "dog",
"count": NumberInt(2)
},
{
"_id": "batman",
"count": NumberInt(9)
},
{
"_id": "ear",
"count": NumberInt(1)
}
],
"ok": 1
}
The documents are in collection 'hashtags'
The documents inserted are as shown below
1.
{
"_id": ObjectId("567a483bf0058ed6755ab3de"),
"hash_count": NumberInt(1),
"msgids": [
"1583"
],
"time": [
NumberInt(1450854385)
],
"word": "ghost"
}
2.
{
"_id": ObjectId("5679485ff0058ed6755ab3dd"),
"hash_count": NumberInt(1),
"msgids": [
"1563"
],
"time": [
NumberInt(1450788886)
],
"word": "dear"
}
3.
{
"_id": ObjectId("567941aaf0058ed6755ab3dc"),
"hash_count": NumberInt(9),
"msgids": [
"1555",
"1556",
"1557",
"1558",
"1559",
"1561",
"1562",
"1584",
"1585"
],
"time": [
NumberInt(1450787170),
NumberInt(1450787292),
NumberInt(1450787307),
NumberInt(1450787333),
NumberInt(1450787354),
NumberInt(1450787526),
NumberInt(1450787615),
NumberInt(1450855148),
NumberInt(1450855155)
],
"word": "batman"
}
4.
{
"_id": ObjectId("567939cdf0058ed6755ab3d9"),
"hash_count": NumberInt(3),
"msgids": [
"1551",
"1552",
"1586"
],
"time": [
NumberInt(1450785157),
NumberInt(1450785194),
NumberInt(1450856188)
],
"word": "bugs"
}
So I want to count the number of values in the field 'time' which comes in between two limits
such as this
foreach word
{
foreach time
{
if((a<time)&&(time<b))
word[count]++
}
}
but my query is just giving output of the total size of array 'time'.
What is the correct query?
for eg
if lower bound is 1450787615 and upper bound is 1450855155
there are 3 values in 'time'. for word 'batman'
The answer should be
{
"_id": "batman",
"count": NumberInt(3)
},
for batman.Thank you.
Use the following aggregation pipeline:
db.hashtags.aggregate([
{
"$match": {
"time": {
"$gte": 1450787615, "$lte": 1450855155
}
}
},
{ "$unwind": "$time" },
{
"$match": {
"time": {
"$gte": 1450787615, "$lte": 1450855155
}
}
},
{
"$group": {
"_id": "$word",
"count": {
"$sum": 1
}
}
}
])
For the given sample documents, this will yield:
/* 0 */
{
"result" : [
{
"_id" : "batman",
"count" : 3
},
{
"_id" : "dear",
"count" : 1
},
{
"_id" : "ghost",
"count" : 1
}
],
"ok" : 1
}

Combing aggregate operations in a single result

I have two aggregate operations that I'd like to combine. The first operation returns, for example:
{ "_id" : "Colors", "count" : 12 }
{ "_id" : "Animals", "count" : 6 }
and the second operation returns, for example:
{ "_id" : "Red", "count" : 10 }
{ "_id" : "Blue", "count" : 9 }
{ "_id" : "Green", "count" : 9 }
{ "_id" : "White", "count" : 7 }
{ "_id" : "Yellow", "count" : 7 }
{ "_id" : "Orange", "count" : 7 }
{ "_id" : "Black", "count" : 5 }
{ "_id" : "Goose", "count" : 4 }
{ "_id" : "Chicken", "count" : 3 }
{ "_id" : "Grey", "count" : 3 }
{ "_id" : "Cat", "count" : 3 }
{ "_id" : "Rabbit", "count" : 3 }
{ "_id" : "Duck", "count" : 3 }
{ "_id" : "Turkey", "count" : 2 }
{ "_id" : "Elephant", "count" : 2 }
{ "_id" : "Shark", "count" : 2 }
{ "_id" : "Fish", "count" : 2 }
{ "_id" : "Tiger", "count" : 2 }
{ "_id" : "Purple", "count" : 1 }
{ "_id" : "Pink", "count" : 1 }
How do I combine the 2 operations to achieve the following?
{ "_id" : "Colors", "count" : 12, "items" :
[
{ "_id" : "Red", "count" : 10 },
{ "_id" : "Blue", "count" : 9 },
{ "_id" : "Green", "count" : 9 },
{ "_id" : "White", "count" : 7 },
{ "_id" : "Yellow", "count" : 7 },
{ "_id" : "Orange", "count" : 7 },
{ "_id" : "Black", "count" : 5 },
{ "_id" : "Grey", "count" : 3 },
{ "_id" : "Purple", "count" : 1 },
{ "_id" : "Pink", "count" : 1 }
]
},
{ "_id" : "Animals", "count" : 6, "items" :
[
{ "_id" : "Goose", "count" : 4 },
{ "_id" : "Chicken", "count" : 3 },
{ "_id" : "Cat", "count" : 3 },
{ "_id" : "Rabbit", "count" : 3 },
{ "_id" : "Duck", "count" : 3 },
{ "_id" : "Turkey", "count" : 2 },
{ "_id" : "Elephant", "count" : 2 },
{ "_id" : "Shark", "count" : 2 },
{ "_id" : "Fish", "count" : 2 },
{ "_id" : "Tiger", "count" : 2 }
]
}
Schema
var ListSchema = new Schema({
created: {
type: Date,
default: Date.now
},
title: {
type: String,
default: '',
trim: true,
required: 'Title cannot be blank'
},
items: {
type: Array,
default: [String],
trim: true
},
creator: {
type: Schema.ObjectId,
ref: 'User'
}
});
Operation 1
db.lists.aggregate(
[
{ $group: { _id: "$title", count: { $sum: 1 } } },
{ $sort: { count: -1 } }
]
)
Operation 2
db.lists.aggregate(
[
{ $unwind: "$items" },
{ $group: { _id: "$items", count: { $sum: 1 } } },
{ $sort: { count: -1 } }
]
)
This really depends on the kind of results you are after in a respone. The things you are asking about seem to indicate that you are looking for "facet counts" in a result, but I'll touch on that a bit later.
For as basic result, there is nothing wrong with this as an approach:
Thing.aggregate(
[
{ "$group": {
"_id": {
"type": "$type", "name": "$name"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.type",
"count": { "$sum": "$count" },
"names": {
"$push": { "name": "$_id.name", "count": "$count" }
}
}}
],
function(err,results) {
console.log(JSON.stringify(results, undefined, 2));
callback(err);
}
)
Which should give you a result like this:
[
{
"_id": "colours",
"count": 50102,
"names": [
{ "name": "Green", "count": 9906 },
{ "name": "Yellow", "count": 10093 },
{ "name": "Red", "count": 10083 },
{ "name": "Orange", "count": 9997 },
{ "name": "Blue", "count": 10023 }
]
},
{
"_id": "animals",
"count": 49898,
"names": [
{ "name": "Tiger", "count": 9710 },
{ "name": "Lion", "count": 10058 },
{ "name": "Elephant", "count": 10069 },
{ "name": "Monkey", "count": 9963 },
{ "name": "Bear", "count": 10098 }
]
}
]
Where the very basic approach here is to simply $group in two stages, where the first stage aggregates on the combination of keys down to the lowest ( most granular ) grouping level, and then process a $group again to basically "add up" the totals on the highest ( least granular ) grouping level, also thus adding the lower results to an array of items.
But this is not "separated" as it would be in "facet counts", so to do this becomes a little more complex, as well as a little more insane. But first the example:
Thing.aggregate(
[
{ "$group": {
"_id": {
"type": "$type",
"name": "$name"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.type",
"count": { "$sum": "$count" },
"names": {
"$push": { "name": "$_id.name", "count": "$count" }
}
}},
{ "$group": {
"_id": null,
"types": {
"$push": {
"type": "$_id", "count": "$count"
}
},
"names": { "$push": "$names" }
}},
{ "$unwind": "$names" },
{ "$unwind": "$names" },
{ "$group": {
"_id": "$types",
"names": { "$push": "$names" }
}},
{ "$project": {
"_id": 0,
"facets": {
"types": "$_id",
"names": "$names",
},
"data": { "$literal": [] }
}}
],
function(err,results) {
console.log(JSON.stringify(results[0], undefined, 2));
callback(err);
}
);
Which will produce output like this:
{
"facets": {
"types": [
{ "type": "colours", "count": 50102 },
{ "type": "animals", "count": 49898 }
],
"names": [
{ "name": "Green", "count": 9906 },
{ "name": "Yellow", "count": 10093 },
{ "name": "Red", "count": 10083 },
{ "name": "Orange", "count": 9997 },
{ "name": "Blue", "count": 10023 },
{ "name": "Tiger", "count": 9710 },
{ "name": "Lion", "count": 10058 },
{ "name": "Elephant", "count": 10069 },
{ "name": "Monkey", "count": 9963 },
{ "name": "Bear", "count": 10098 }
]
},
"data": []
}
What should be apparent though is while "possible", the kind of "juggling" going on here in the pipeline to produce this output format is not really efficient. Compared to the first example, there is a lot of overhead in here just to simply split out the results into their own array responses and independently of the grouping keys. This notably becomes more complex with the more "facets" to generate.
Also as hinted at here in the output, what people generally ask of "facet counts" is that that the result "data" is also included in the response ( likely paged ) in addition to the aggregated facets. So the further complications should be apparent right here:
{ "$group": {
"_id": null,
(...)
Where the requirement of this type of operation is to basically "stuff" every piece of data into a single object. In most cases, and certainly where you want the actual data in results ( using 100,000 in this sample ) it becomes completely impractical to follow this approach and will almost certainly exceed the BSON document limit size of 16MB.
In such a case, where you want to produce results and the "facets" of that data in a response, then the best approach here is to run each aggregation and the output page as separate query operations and "stream" the output JSON ( or other format ) back to the receiving client.
As a self contained example:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/things');
var data = {
"colours": [
"Red","Blue","Green","Yellow","Orange"
],
"animals": [
"Lion","Tiger","Bear","Elephant","Monkey"
]
},
dataKeys = Object.keys(data);
var thingSchema = new Schema({
"name": String,
"type": String
});
var Thing = mongoose.model( 'Thing', thingSchema );
var writer = process.stdout;
mongoose.connection.on("open",function(err) {
if (err) throw err;
async.series(
[
function(callback) {
process.stderr.write("removing\n");
Thing.remove({},callback);
},
function(callback) {
process.stderr.write("inserting\n");
var bulk = Thing.collection.initializeUnorderedBulkOp(),
count = 0;
async.whilst(
function() { return count < 100000; },
function(callback) {
var keyLen = dataKeys.length,
keyIndex = Math.floor(Math.random(keyLen)*keyLen),
type = dataKeys[keyIndex],
types = data[type],
typeLen = types.length,
nameIndex = Math.floor(Math.random(typeLen)*typeLen),
name = types[nameIndex];
var obj = { "type": type, "name": name };
bulk.insert(obj);
count++;
if ( count % 1000 == 0 ) {
process.stderr.write('insert count: ' + count + "\n");
bulk.execute(function(err,resp) {
bulk = Thing.collection.initializeUnorderedBulkOp();
callback(err);
});
} else {
callback();
}
},
callback
);
},
function(callback) {
writer.write("{ \n \"page\": 1,\n \"pageSize\": 25,\n")
writer.write(" \"facets\": {\n"); // open object response
var stream = Thing.collection.aggregate(
[
{ "$group": {
"_id": "$name",
"count": { "$sum": 1 }
}}
],
{
"cursor": {
"batchSize": 1000
}
}
);
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"names\": [\n");
} else {
writer.write(",\n");
}
data = { "name": data._id, "count": data.count };
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ],\n");
var stream = Thing.collection.aggregate(
[
{ "$group": {
"_id": "$type",
"count": { "$sum": 1 }
}}
],
{
"cursor": {
"batchSize": 1000
}
}
);
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"types\": [\n");
} else {
writer.write(",\n");
}
data = { "name": data._id, "count": data.count };
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ]\n },\n");
var stream = Thing.find({}).limit(25).stream();
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"data\": [\n");
} else {
writer.write(",\n");
}
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ]\n}\n");
callback();
});
});
});
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
});
With the output like:
{
"page": 1,
"pageSize": 25,
"facets": {
"names": [
{"name":"Red","count":10007},
{"name":"Tiger","count":10012},
{"name":"Yellow","count":10119},
{"name":"Monkey","count":9970},
{"name":"Elephant","count":10046},
{"name":"Bear","count":10082},
{"name":"Orange","count":9982},
{"name":"Green","count":10005},
{"name":"Blue","count":9884},
{"name":"Lion","count":9893}
],
"types": [
{"name":"colours","count":49997},
{"name":"animals","count":50003}
]
},
"data": [
{"_id":"55bf141f3edc150b6abdcc02","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc81b","type":"colours","name":"Blue"},
{"_id":"55bf141f3edc150b6abdc81c","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc81d","type":"animals","name":"Bear"},
{"_id":"55bf141f3edc150b6abdc81e","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc81f","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc820","type":"colours","name":"Green"},
{"_id":"55bf141f3edc150b6abdc821","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc822","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc823","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc824","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc825","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc826","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc827","type":"colours","name":"Blue"},
{"_id":"55bf141f3edc150b6abdc828","type":"animals","name":"Tiger"},
{"_id":"55bf141f3edc150b6abdc829","type":"colours","name":"Red"},
{"_id":"55bf141f3edc150b6abdc82a","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc82b","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc82c","type":"animals","name":"Tiger"},
{"_id":"55bf141f3edc150b6abdc82d","type":"animals","name":"Bear"},
{"_id":"55bf141f3edc150b6abdc82e","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc82f","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc830","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc831","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc832","type":"animals","name":"Elephant"}
]
}
There are some considerations in here, notably that mongoose .aggregate() does not really directly support the standard node stream interface. There is an .each() method available from .cursor() on an aggregate method, but the "stream" implied from the core API method gives a lot more control here, so the .collection mehod here to get the underlying driver object is preferable. Hopefully a future mongoose release will consider this.
So if your end goal is such a "facet count" alongside the results as demonstrated here, then each aggregation and results make the most sense to "stream" in the way as demonstrated. Without that, the aggregation becomes both overcomplicated as well as very likely to exceed the BSON limit, just as doing otherwise in this case would.

Group Multiple Values in Aggregation

I want to group the all field of a collection with unique total. Let's assume there is collection like this:
id country state operator
121 IN HR AIRTEL
212 IN MH AIRTEL
213 US LA AT&T
214 UK JK VODAFONE
Output should be like this:
{
"country": { "IN": 2, "US":1, "UK":1 },
"state": { "HR":1, "MH":1, "LA":1, "JK": 1 },
"operator": { "AIRTEL":2, "AT&T": 1, "VODAFONE": 1 }
}
I am trying to use mongo aggregation framework, but can't really think how to do this?
I find out some similar to your output using aggregation check below code
db.collectionName.aggregate({
"$group": {
"_id": null,
"countryOfIN": {
"$sum": {
"$cond": [{
$eq: ["$country", "IN"]
}, 1, 0]
}
},
"countryOfUK": {
"$sum": {
"$cond": [{
$eq: ["$country", "UK"]
}, 1, 0]
}
},
"countryOfUS": {
"$sum": {
"$cond": [{
$eq: ["$country", "US"]
}, 1, 0]
}
},
"stateOfHR": {
"$sum": {
"$cond": [{
$eq: ["$state", "HR"]
}, 1, 0]
}
},
"stateOfMH": {
"$sum": {
"$cond": [{
$eq: ["$state", "MH"]
}, 1, 0]
}
},
"stateOfLA": {
"$sum": {
"$cond": [{
$eq: ["$state", "LA"]
}, 1, 0]
}
},
"stateOfJK": {
"$sum": {
"$cond": [{
$eq: ["$state", "JK"]
}, 1, 0]
}
},
"operatorOfAIRTEL": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AIRTEL"]
}, 1, 0]
}
},
"operatorOfAT&T": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AT&T"]
}, 1, 0]
}
},
"operatorOfVODAFONE": {
"$sum": {
"$cond": [{
$eq: ["$operator", "VODAFONE"]
}, 1, 0]
}
}
}
}, {
"$group": {
"_id": null,
"country": {
"$push": {
"IN": "$countryOfIN",
"UK": "$countryOfUK",
"US": "$countryOfUS"
}
},
"STATE": {
"$push": {
"HR": "$stateOfHR",
"MH": "$stateOfMH",
"LA": "$stateOfLA",
"JK": "$stateOfJK"
}
},
"operator": {
"$push": {
"AIRTEL": "$operatorOfAIRTEL",
"AT&T": "$operatorOfAT&T",
"VODAFONE": "$operatorOfVODAFONE"
}
}
}
}, {
"$project": {
"_id": 0,
"country": 1,
"STATE": 1,
"operator": 1
}
})
using $cond created groups of matched data and pushed them in second groups to combine.
An output format like you are looking for is not really suited to the aggregation framework since you are tranforming part of your data in to "key" names. The aggregation framework does not do this but rather sticks to database "best practice" as does not transform "data" to "key" names in any way.
You can perform a mapReduce operation instead with allows more flexibilty with the manipulation, but not as good performance due to the need to use JavaScript code to perform the manipulation:
db.collection.mapReduce(
function () {
var obj = {},
doc = this;
delete doc._id;
Object.keys(doc).forEach(function(key) {
obj[key] = {};
obj[key][doc[key]] = 1;
});
emit( null, obj );
},
function (key,values) {
var result = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(outerKey) {
Object.keys(value[outerKey]).forEach(function(innerKey) {
if ( !result.hasOwnProperty(outerKey) ) {
result[outerKey] = {};
}
if ( result[outerKey].hasOwnProperty(innerKey) ) {
result[outerKey][innerKey] += value[outerKey][innerKey];
} else {
result[outerKey][innerKey] = value[outerKey][innerKey];
}
});
});
});
return result;
},
{ "out": { "inline": 1 } }
)
And in the stucture that applies to all mapReduce results:
{
"results" : [
{
"_id" : null,
"value" : {
"country" : {
"IN" : 2,
"US" : 1,
"UK" : 1
},
"state" : {
"HR" : 1,
"MH" : 1,
"LA" : 1,
"JK" : 1
},
"operator" : {
"AIRTEL" : 2,
"AT&T" : 1,
"VODAFONE" : 1
}
}
}
]
}
For the aggregation framework itself, it is better suited to producing aggregation results that are more consistently structured:
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
])
Which would output:
{ "_id" : { "type" : "state", "key" : "JK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "UK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "US" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AT&T" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "LA" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AIRTEL" }, "count" : 2 }
{ "_id" : { "type" : "state", "key" : "MH" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "HR" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "VODAFONE" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "IN" }, "count" : 2 }
But is fairly easy to transform in client code while iterating the results:
var result = {};
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
]).forEach(function(doc) {
if ( !result.hasOwnProperty(doc._id.type) )
result[doc._id.type] = {};
result[doc._id.type][doc._id.key] = doc.count;
})
Which gives the final structure in "result":
{
"state" : {
"JK" : 1,
"LA" : 1,
"MH" : 1,
"HR" : 1
},
"country" : {
"UK" : 1,
"US" : 1,
"IN" : 2
},
"operator" : {
"AT&T" : 1,
"AIRTEL" : 2,
"VODAFONE" : 1
}
}