Aggregate analytics data by day/next day logic - mongodb

I wrote a server for mobile app analytics in which I have a sharded(!Upd) collection with events as follows:
{
"event": "install",
"userId": "a",
"time": 2014-02-09,
"data" : ...
},
{
"event": "login",
"userId": "a",
"time": 2014-02-12,
"data" : ...
},
{
"event": "install",
"userId": "b",
"time": 2014-4-29,
"data" : ...
},
{
"event": "login",
"userId": "b",
"time": 2014-4-30,
"data" : ...
}
...
I need to select users, who have the event install and not login in next day after the install event (in other words, I want to select Users who install the app, but do not login in next day). So the output for above data should be:
{
"userId": "a",
"data" : ...
}
How do carry out this task with aggregation framework or mapreduce? Or maybe another solution?

This is a bit tricky one :-)
You can do it with aggregation, if time is just a date field (without time data),
then
having collection
{
"_id" : ObjectId("57694365ef9176ec54960a66"),
"event" : "install",
"userId" : "a",
"time" : ISODate("2014-09-02T00:00:00.000Z")
},{
"_id" : ObjectId("57694365ef9176ec54960a67"),
"event" : "login",
"userId" : "a",
"time" : ISODate("2014-12-02T00:00:00.000Z")
},{
"_id" : ObjectId("57694365ef9176ec54960a68"),
"event" : "install",
"userId" : "b",
"time" : ISODate("2014-04-29T00:00:00.000Z")
},{
"_id" : ObjectId("57694365ef9176ec54960a69"),
"event" : "login",
"userId" : "b",
"time" : ISODate("2014-04-30T00:00:00.000Z")
}
we can use aggregate query:
var match = {
$match : {
"event" : "install"
}
};
var projectNextDayDate = {
$project : {
_id : 1,
event : 1,
userId : 1,
time : 1,
nextDay : {
$add : ["$time", 24 * 60 * 60 * 1000]
}
}
}
var lookup = {
$lookup : {
from : "zella",
localField : "nextDay",
foreignField : "time",
as : "mergedDocs"
}
}
var nowMatchUsers = {
$project : {
_id : 1,
event : 1,
userId : 1,
time : 1,
nextDay : 1,
mergedDocs : {
$filter : {
input : "mergedDocs",
as : "m",
cond : {
$eq : ["$$m.userId", "$userId"]
}
}
}
}
}
var findEmptyArrays = {
$match : {
mergedDocs : []
}
}
db.zella.aggregate([match, projectNextDayDate, lookup, findEmptyArrays])
with this output:
{
"_id" : ObjectId("57694365ef9176ec54960a66"),
"event" : "install",
"userId" : "a",
"time" : ISODate("2014-09-02T00:00:00.000Z"),
"nextDay" : ISODate("2014-09-03T00:00:00.000Z"),
"mergedDocs" : []
}
The assumption here is that time is a date 2014-09-02T00:00:00.000
the other way to merge collection is to use user ID, as a $lookup point, but then there will be more logic to filter result set and performance could be dropped.

You could try running the following aggregation pipeline:
db.test.aggregate([
{
"$project": {
"event": 1,
"userId": 1,
"time": 1,
"data": 1,
"dayAfter": {
"$add": [ "$time", 24 * 60 * 60 * 1000 ]
}
}
},
{ "$match": { "event": { "$in": ["install", "login"] } } },
{
"$group": {
"_id": "$userId",
"eventsTimeLine": {
"$push": {
"event": "$event",
"time": "$time",
"dayAfter": "$dayAfter"
}
},
"data": { "$push": "$data" }
}
},
{ "$unwind": "$eventsTimeLine" },
{ "$sort": { "eventsTimeLine.event": 1 } },
{
"$group": {
"_id": "$_id",
"dayAfterInstall": { "$first": "$eventsTimeLine.dayAfter" },
"loginTime": { "$last": "$eventsTimeLine.time" },
"data": { "$first": "$data" }
}
},
{
"$project": {
"isChurn": { "$ne": [ "$loginTime", "$dayAfterInstall" ] },
"userId": "$_id", "data": 1, "_id": 0
}
},
{ "$match" : { "isChurn" : true } }
])

Here another solution with mapreduce and aggregation:
var mapFunction = function() {
if (this.event != 'install' && this.event != 'login'){
return;
}
var value = {data: this.data, count: 1};
if (this.event == 'install'){
var nextDay = new Date(this.date.getTime() + 24 * 60 * 60 * 1000)
emit({userId:this.userId, nextDayAfterInstall:nextDay}, value );
} else
if (this.event == 'login'){
emit({userId:this.userId, nextDayAfterInstall:this.date}, value );
}
};
var reduceFunction = function(event, values) {
var value = { data: null, count: 1 };
for (var index = 0; index < values.length; ++index) {
value.count += values[index].count;
value.data = values[index].data;
}
return value ;
};
db.events.mapReduce(
mapFunction,
reduceFunction,
{ out: "case1_mr_out" }
)
var groupByUserId = {
$group :
{
_id : { userId: "$_id.userId" },
data : { $last: '$value.data' },
count : { $max: '$value.count' }
}
}
var filterWhereOnlyOne = {
$match : {
"count" : 1
}
};
db.case1_mr_out.aggregate([groupByUserId,filterWhereOnlyOne])

Related

How to update array of objects to LowerCase in mongodb?

I need to update the role in team array to lowercase.
db.users.find().pretty().limit(1)
{
"_id" : ObjectId("5d9fd81d3d598088d2ea5dc9"),
"employed" : "USA-Atlanta",
"firstName" : "Rory",
"siteRole" : "super admin",
"status" : "active",
"team" : [
{
"name" : "SALES AND MARKETING",
"displayName" : "S&M",
"role" : "Manager"
}
]
}
Tried this code.I m getting it with normal fields.
db.users.find( {}, { 'role': 1 } ).forEach(function(doc) {
db.users.update(
{ _id: doc._id},
{ $set : { 'role' : doc.role.toLowerCase() } },
{ multi: true }
)
});
sample output
"team" : [
{
"name" : "SALES AND MARKETING",
"displayName" : "S&M",
"role" : "manager"
}
]
I think the below Aggregation query is what you are looking for
var count = 0;
db.users.aggregate([
{
"$match": {
"team.role": {$exists: true}
}
},
{
"$project": {
"_id": 1,
// "team": 1,
"teamModified": {
"$map": {
"input": "$team",
"as": "arrayElems",
"in": {
"$mergeObjects": [
"$$arrayElems",
{"role": {"$toLower": "$$arrayElems.role"}}
]
}
}
}
}
},
]).forEach(function(it) {
db.users.updateOne({
"_id": it["_id"]
}, {
"$set": {
"team": it["teamModified"]
}
})
printjson(++count);
})
printjson("DONE!!!")
Note: I haven't tested the script properly in my local, so do let me know if it didn't help you out

Mongo DB - To get the last data for every unique email address - Grouped by Regions

db.TYPE_Join_STEP_COUNTER_GPS.aggregate([
{ $match: { 'Time.ObservationTimestamp': { $gte: ISODate("2019-01-01T00:00:00+00:00"), $lt: ISODate("2019-02-02T00:00:00+00:00") } } },
{
$group: {
_id: { users: "$userAccount.userEmail" },
sensorValue: { $addToSet: "$sensorValue.sensorValue" }
}
},
{
$unwind: "$sensorValue"
},
{
$group: { _id: "$_id", lastdata: {$last: "$sensorValue"} }
}
])
I got the below output for this query.
/* 1 */
{
"_id" : {
"users" : "test1#gmail.com"
},
"lastdata" : "245"
},
/* 2 */
{
"_id" : {
"users" : "test2#gmail.com"
},
"lastdata" : "135"
}
What I need - I need the last data of Sensor value for unique email ids in every region. I tried to add one more group by condition with respect to region but it didnt work. Please let me know if I am missing something. Thank you in advance.
Sample Collection:
{
"_id" : ObjectId("5c52f5370ad3076c2398ba83"),
"userAccount" : {
"userId" : "115867679428105538091",
"userEmail" : "test1#gmail.com"
},
"deviceId" : "9312dfc5b63063c7",
"sensorType" : "TYPE_Join_STEP_COUNTER_GPS",
"sensorValue" : {
"sensorValue" : "257",
"regoin" : "Region1"
},
"Time" : {
"ObservationTimestamp" : ISODate("2019-01-30T00:00:00.000+00:00")
}
}
Expected Output:
/* 1 */
{
"_id" : {
"regoin" : "Region1"
},
users: "Test1#gmail.com" , "lastdata" : "135"
users: "Test2#gmail.com" , "lastdata" : "105"
},
/* 2 */
{
"_id" : {
"regoin" : "Region2"
},
users: "Test1#gmail.com" , "lastdata" : "105"
users: "Test2#gmail.com" , "lastdata" : "100"
},
You can use $last accumulator to get the last sensor value
db.TYPE_Join_STEP_COUNTER_GPS.aggregate([
{ "$match": {
"Time.ObservationTimestamp": {
"$gte": ISODate("2019-01-01T00:00:00+00:00"),
"$lt": ISODate("2019-02-02T00:00:00+00:00")
}
}},
{ "$group": {
"_id": { "userEmail": "$userAccount.userEmail", "regoin": "$sensorValue.regoin" },
"sensorValue": { "$last": "$sensorValue.sensorValue" }
}},
{ "$group": {
"_id": "$_id.regoin",
"data": {
"$push": {
"user": "$_id.userEmail",
"sensorValue": "$sensorValue"
}
}
}}
])

Find balance from debit and credit entries mongodb

I have a Mongo collection like this:
{
"user_id" : "1",
"branch_id" : "1",
"trans_type":"DEBIT",
"total" : 500
},
{
"user_id" : "1",
"branch_id" : "1",
"trans_type":"CREDIT",
"total" : 200
},
{
"user_id" : "1",
"branch_id" : "3",
"trans_type":"DEBIT",
"total" : 1400
},
{
"user_id" : "2",
"branch_id" : "1",
"trans_type":"DEBIT",
"total" : 100
},
{
"user_id" : "2",
"branch_id" : "1",
"trans_type":"CREDIT",
"total" : 100
}
The expected output is this:
[
{
"user_id":"1",
"branch_id":"1",
"final_balance":"300"
},
{
"user_id":"1",
"branch_id":"3",
"final_balance":"1400"
},
{
"user_id":"2",
"branch_id":"1",
"final_balance":"0"
}
]
Note that in the output I am looking for the final balance after checking out debit and credit entries per user per branch.
Thank you.
That sounds like a simple $group with a $cond would do the job for you:
db.collection.aggregate({
$group: {
"_id": { // group by both fields, "user_id" and "branch_id"
"user_id": "$user_id",
"branch_id": "$branch_id"
},
"final_balance": {
$sum: { // calculate the sum of all "total" values
$cond: {
if: { $eq: [ "$trans_type", "DEBIT" ] }, // in case of "DEBIT", we want the stored value for "total"
then: "$total",
else: { $multiply: [ "$total", -1 ] } // otherwise we want the stored value for "total" times -1
}
}
}
}
}, {
$project: { // this is not really needed unless you specifically need the output format you mentioned in the question
"_id": 0,
"user_id": "$_id.user_id",
"branch_id": "$_id.branch_id",
"final_balance": "$final_balance",
}
})
let docData = await db.Transactions.aggregate(
[{
$match: where(any condition)
},
{
$addFields: {
runningBalance: { $subtract: ['$debit', '$credit'] }
}
},
stage2 = {
$setWindowFields: {
sortBy: { transaction_date: 1 },
output: {
runningTotal: {
$sum: "$runningBalance",
window: {
documents: ["unbounded", "current"]
}
}
}
}
},
{
$sort: sortByObj(any sorted by object)
},
]
);

RESTHeart filtering and sorting by sub documents property

I m working with mongodb and restheart.
In my nosql db i have a unique document with this structure:
{
"_id": "docID",
"users": [
{
"userID": "12",
"elements": [
{
"elementID": "1492446877599",
"events": [
{
"id": 1,
"date": 356
},
{
"id": 2,
"date": 123
}
]
}
]
},
{
"userID": "11",
"elements": [
{
"elementID": "14924",
"events": [
{
"id": 1,
"date": 123
},
{
"id": 2,
"date": 356
}
]
},
{
"elementID": "14925",
"events": [
{
"id": 1,
"date": 12
},
{
"id": 2,
"date": 36
}
]
}
]
}
i need to filter the user with userID = 11 and i need to order his events by ascending date.
i was trying with:
http://myhost:port/myCollection?keys={"users":{"$elemMatch":{"userID":"11"}}}&sort_by={"users.elements.events.date":-1}
but it doesn t work.
db.v.aggregate([
{ $unwind : '$users'},
{ $match : { 'users.userID' : '11' }} ,
{ $unwind : '$users.elements'},
{ $unwind : '$users.elements.events'},
{ $sort : {'users.elements.events.date': 1}},
{ $group : {
_id : '$_id',
elementID : { $first : '$users.elements.elementID' },
userID : { $first : '$users.userID' },
events : { $push : '$users.elements.events'}
}
},
{ $project : {
_id : 1,
userID : 1,
'elements.elementID' : '$elementID',
'elements.events' : '$events'
}
}
]);
This will give you following :
{
"_id" : ObjectId("5911ba55f0d9c285c561ea33"),
"userID" : "11",
"elements" : {
"elementID" : "14924",
"events" : [
{
"id" : 1,
"date" : 123
},
{
"id" : 2,
"date" : 356
}
]
}
}

Combing aggregate operations in a single result

I have two aggregate operations that I'd like to combine. The first operation returns, for example:
{ "_id" : "Colors", "count" : 12 }
{ "_id" : "Animals", "count" : 6 }
and the second operation returns, for example:
{ "_id" : "Red", "count" : 10 }
{ "_id" : "Blue", "count" : 9 }
{ "_id" : "Green", "count" : 9 }
{ "_id" : "White", "count" : 7 }
{ "_id" : "Yellow", "count" : 7 }
{ "_id" : "Orange", "count" : 7 }
{ "_id" : "Black", "count" : 5 }
{ "_id" : "Goose", "count" : 4 }
{ "_id" : "Chicken", "count" : 3 }
{ "_id" : "Grey", "count" : 3 }
{ "_id" : "Cat", "count" : 3 }
{ "_id" : "Rabbit", "count" : 3 }
{ "_id" : "Duck", "count" : 3 }
{ "_id" : "Turkey", "count" : 2 }
{ "_id" : "Elephant", "count" : 2 }
{ "_id" : "Shark", "count" : 2 }
{ "_id" : "Fish", "count" : 2 }
{ "_id" : "Tiger", "count" : 2 }
{ "_id" : "Purple", "count" : 1 }
{ "_id" : "Pink", "count" : 1 }
How do I combine the 2 operations to achieve the following?
{ "_id" : "Colors", "count" : 12, "items" :
[
{ "_id" : "Red", "count" : 10 },
{ "_id" : "Blue", "count" : 9 },
{ "_id" : "Green", "count" : 9 },
{ "_id" : "White", "count" : 7 },
{ "_id" : "Yellow", "count" : 7 },
{ "_id" : "Orange", "count" : 7 },
{ "_id" : "Black", "count" : 5 },
{ "_id" : "Grey", "count" : 3 },
{ "_id" : "Purple", "count" : 1 },
{ "_id" : "Pink", "count" : 1 }
]
},
{ "_id" : "Animals", "count" : 6, "items" :
[
{ "_id" : "Goose", "count" : 4 },
{ "_id" : "Chicken", "count" : 3 },
{ "_id" : "Cat", "count" : 3 },
{ "_id" : "Rabbit", "count" : 3 },
{ "_id" : "Duck", "count" : 3 },
{ "_id" : "Turkey", "count" : 2 },
{ "_id" : "Elephant", "count" : 2 },
{ "_id" : "Shark", "count" : 2 },
{ "_id" : "Fish", "count" : 2 },
{ "_id" : "Tiger", "count" : 2 }
]
}
Schema
var ListSchema = new Schema({
created: {
type: Date,
default: Date.now
},
title: {
type: String,
default: '',
trim: true,
required: 'Title cannot be blank'
},
items: {
type: Array,
default: [String],
trim: true
},
creator: {
type: Schema.ObjectId,
ref: 'User'
}
});
Operation 1
db.lists.aggregate(
[
{ $group: { _id: "$title", count: { $sum: 1 } } },
{ $sort: { count: -1 } }
]
)
Operation 2
db.lists.aggregate(
[
{ $unwind: "$items" },
{ $group: { _id: "$items", count: { $sum: 1 } } },
{ $sort: { count: -1 } }
]
)
This really depends on the kind of results you are after in a respone. The things you are asking about seem to indicate that you are looking for "facet counts" in a result, but I'll touch on that a bit later.
For as basic result, there is nothing wrong with this as an approach:
Thing.aggregate(
[
{ "$group": {
"_id": {
"type": "$type", "name": "$name"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.type",
"count": { "$sum": "$count" },
"names": {
"$push": { "name": "$_id.name", "count": "$count" }
}
}}
],
function(err,results) {
console.log(JSON.stringify(results, undefined, 2));
callback(err);
}
)
Which should give you a result like this:
[
{
"_id": "colours",
"count": 50102,
"names": [
{ "name": "Green", "count": 9906 },
{ "name": "Yellow", "count": 10093 },
{ "name": "Red", "count": 10083 },
{ "name": "Orange", "count": 9997 },
{ "name": "Blue", "count": 10023 }
]
},
{
"_id": "animals",
"count": 49898,
"names": [
{ "name": "Tiger", "count": 9710 },
{ "name": "Lion", "count": 10058 },
{ "name": "Elephant", "count": 10069 },
{ "name": "Monkey", "count": 9963 },
{ "name": "Bear", "count": 10098 }
]
}
]
Where the very basic approach here is to simply $group in two stages, where the first stage aggregates on the combination of keys down to the lowest ( most granular ) grouping level, and then process a $group again to basically "add up" the totals on the highest ( least granular ) grouping level, also thus adding the lower results to an array of items.
But this is not "separated" as it would be in "facet counts", so to do this becomes a little more complex, as well as a little more insane. But first the example:
Thing.aggregate(
[
{ "$group": {
"_id": {
"type": "$type",
"name": "$name"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.type",
"count": { "$sum": "$count" },
"names": {
"$push": { "name": "$_id.name", "count": "$count" }
}
}},
{ "$group": {
"_id": null,
"types": {
"$push": {
"type": "$_id", "count": "$count"
}
},
"names": { "$push": "$names" }
}},
{ "$unwind": "$names" },
{ "$unwind": "$names" },
{ "$group": {
"_id": "$types",
"names": { "$push": "$names" }
}},
{ "$project": {
"_id": 0,
"facets": {
"types": "$_id",
"names": "$names",
},
"data": { "$literal": [] }
}}
],
function(err,results) {
console.log(JSON.stringify(results[0], undefined, 2));
callback(err);
}
);
Which will produce output like this:
{
"facets": {
"types": [
{ "type": "colours", "count": 50102 },
{ "type": "animals", "count": 49898 }
],
"names": [
{ "name": "Green", "count": 9906 },
{ "name": "Yellow", "count": 10093 },
{ "name": "Red", "count": 10083 },
{ "name": "Orange", "count": 9997 },
{ "name": "Blue", "count": 10023 },
{ "name": "Tiger", "count": 9710 },
{ "name": "Lion", "count": 10058 },
{ "name": "Elephant", "count": 10069 },
{ "name": "Monkey", "count": 9963 },
{ "name": "Bear", "count": 10098 }
]
},
"data": []
}
What should be apparent though is while "possible", the kind of "juggling" going on here in the pipeline to produce this output format is not really efficient. Compared to the first example, there is a lot of overhead in here just to simply split out the results into their own array responses and independently of the grouping keys. This notably becomes more complex with the more "facets" to generate.
Also as hinted at here in the output, what people generally ask of "facet counts" is that that the result "data" is also included in the response ( likely paged ) in addition to the aggregated facets. So the further complications should be apparent right here:
{ "$group": {
"_id": null,
(...)
Where the requirement of this type of operation is to basically "stuff" every piece of data into a single object. In most cases, and certainly where you want the actual data in results ( using 100,000 in this sample ) it becomes completely impractical to follow this approach and will almost certainly exceed the BSON document limit size of 16MB.
In such a case, where you want to produce results and the "facets" of that data in a response, then the best approach here is to run each aggregation and the output page as separate query operations and "stream" the output JSON ( or other format ) back to the receiving client.
As a self contained example:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/things');
var data = {
"colours": [
"Red","Blue","Green","Yellow","Orange"
],
"animals": [
"Lion","Tiger","Bear","Elephant","Monkey"
]
},
dataKeys = Object.keys(data);
var thingSchema = new Schema({
"name": String,
"type": String
});
var Thing = mongoose.model( 'Thing', thingSchema );
var writer = process.stdout;
mongoose.connection.on("open",function(err) {
if (err) throw err;
async.series(
[
function(callback) {
process.stderr.write("removing\n");
Thing.remove({},callback);
},
function(callback) {
process.stderr.write("inserting\n");
var bulk = Thing.collection.initializeUnorderedBulkOp(),
count = 0;
async.whilst(
function() { return count < 100000; },
function(callback) {
var keyLen = dataKeys.length,
keyIndex = Math.floor(Math.random(keyLen)*keyLen),
type = dataKeys[keyIndex],
types = data[type],
typeLen = types.length,
nameIndex = Math.floor(Math.random(typeLen)*typeLen),
name = types[nameIndex];
var obj = { "type": type, "name": name };
bulk.insert(obj);
count++;
if ( count % 1000 == 0 ) {
process.stderr.write('insert count: ' + count + "\n");
bulk.execute(function(err,resp) {
bulk = Thing.collection.initializeUnorderedBulkOp();
callback(err);
});
} else {
callback();
}
},
callback
);
},
function(callback) {
writer.write("{ \n \"page\": 1,\n \"pageSize\": 25,\n")
writer.write(" \"facets\": {\n"); // open object response
var stream = Thing.collection.aggregate(
[
{ "$group": {
"_id": "$name",
"count": { "$sum": 1 }
}}
],
{
"cursor": {
"batchSize": 1000
}
}
);
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"names\": [\n");
} else {
writer.write(",\n");
}
data = { "name": data._id, "count": data.count };
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ],\n");
var stream = Thing.collection.aggregate(
[
{ "$group": {
"_id": "$type",
"count": { "$sum": 1 }
}}
],
{
"cursor": {
"batchSize": 1000
}
}
);
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"types\": [\n");
} else {
writer.write(",\n");
}
data = { "name": data._id, "count": data.count };
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ]\n },\n");
var stream = Thing.find({}).limit(25).stream();
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"data\": [\n");
} else {
writer.write(",\n");
}
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ]\n}\n");
callback();
});
});
});
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
});
With the output like:
{
"page": 1,
"pageSize": 25,
"facets": {
"names": [
{"name":"Red","count":10007},
{"name":"Tiger","count":10012},
{"name":"Yellow","count":10119},
{"name":"Monkey","count":9970},
{"name":"Elephant","count":10046},
{"name":"Bear","count":10082},
{"name":"Orange","count":9982},
{"name":"Green","count":10005},
{"name":"Blue","count":9884},
{"name":"Lion","count":9893}
],
"types": [
{"name":"colours","count":49997},
{"name":"animals","count":50003}
]
},
"data": [
{"_id":"55bf141f3edc150b6abdcc02","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc81b","type":"colours","name":"Blue"},
{"_id":"55bf141f3edc150b6abdc81c","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc81d","type":"animals","name":"Bear"},
{"_id":"55bf141f3edc150b6abdc81e","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc81f","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc820","type":"colours","name":"Green"},
{"_id":"55bf141f3edc150b6abdc821","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc822","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc823","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc824","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc825","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc826","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc827","type":"colours","name":"Blue"},
{"_id":"55bf141f3edc150b6abdc828","type":"animals","name":"Tiger"},
{"_id":"55bf141f3edc150b6abdc829","type":"colours","name":"Red"},
{"_id":"55bf141f3edc150b6abdc82a","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc82b","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc82c","type":"animals","name":"Tiger"},
{"_id":"55bf141f3edc150b6abdc82d","type":"animals","name":"Bear"},
{"_id":"55bf141f3edc150b6abdc82e","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc82f","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc830","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc831","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc832","type":"animals","name":"Elephant"}
]
}
There are some considerations in here, notably that mongoose .aggregate() does not really directly support the standard node stream interface. There is an .each() method available from .cursor() on an aggregate method, but the "stream" implied from the core API method gives a lot more control here, so the .collection mehod here to get the underlying driver object is preferable. Hopefully a future mongoose release will consider this.
So if your end goal is such a "facet count" alongside the results as demonstrated here, then each aggregation and results make the most sense to "stream" in the way as demonstrated. Without that, the aggregation becomes both overcomplicated as well as very likely to exceed the BSON limit, just as doing otherwise in this case would.