Mongodb Is it possible to aggregate an object? - mongodb

I am trying to aggregate the total sum of packets in this document.
{
"_id" : ObjectId("51a6cd102769c63e65061bda"),
"capture" : "1369885967",
"packets" : {
"0" : "595",
"1" : "596",
"2" : "595",
"3" : "595",
...
}
}
The closest I can get is about
db.collection.aggregate({ $match: { capture : "1369885967" } }, {$group: { _id:null, sum: {$sum:"$packets"}}});
However it returns sum 0, which is obviously wrong.
{ "result" : [ { "_id" : null, "sum" : 0 } ], "ok" : 1 }
How do I get the sum of all the packets?

Since you have the values in an object instead of an array, you'll need to use mapReduce.
// Emit the values as integers
var mapFunction =
function() {
for (key in this.packets) {
emit(null, parseInt(this.packets[key]));
}
}
// Reduce to a simple sum
var reduceFunction =
function(key, values) {
return Array.sum(values);
}
> db.collection.mapReduce(mapFunction, reduceFunction, {out: {inline:1}})
{
"results" : [
{
"_id" : null,
"value" : 2381
}
],
"ok" : 1,
}
If at all possible, you should emit the values as an array of a numeric type instead since that gives you more options (ie aggregation) and (unless the data set is large) probably performance benefits.

If you don't know how many keys are in the packet subdocument and since you also seem to be storing counts as strings (why???) you will have to use mapReduce.
Something like:
m=function() {
for (f in "this.packets") {
emit(null, +this.packets[f]);
};
r=function(k, vals) {
int sum=0;
vals.forEach(function(v) { sum+=v; } );
return sum;
}
db.collection.mapreduce(m, r, {out:{inline:1}, query:{your query condition here}});

Related

Calculate average using mapreduce in MongoDb

I have a collection of 10 million records which resembles this.
{
"_id" : ObjectId("596dd10bbd1a6628ace1c14c"),
"X" : 13212,
"Z" : 173836,
"userID" : 9354785
}
User ID is unique. I have to calculate the average of X and sum of Z. I can calculate the sum of Z using the following mapReduce function
var mapFunction1 = function() {
emit(this.userID, this.Z);
};
var reduceFunction1 = function() {
return Array.sum(Z);
};
db.transaction.mapReduce(
mapfunction1,
reduceFunction1,
{out:"mapreduce"}
)
How do i calculate the average of X?
I tried Array.avg(Z) but it returns the same output as sum(Z).
It looks like the requirements can be expressed more simply using the Aggregation Pipeline with the $avg and $sum operators.
Input
> db.transactions.find()
{ "_id" : ObjectId("5970e59e26507421fa20bee9"), "X" : 13212, "Z" : 173836, "userID" : 9354785 }
{ "_id" : ObjectId("5970e5a426507421fa20beea"), "X" : 1234, "Z" : 5678, "userID" : 1 }
{ "_id" : ObjectId("5970e5a826507421fa20beeb"), "X" : 100, "Z" : 200, "userID" : 2 }
Aggregation Pipeline
> db.transactions.aggregate([
{
$group : {
_id: "aggregates",
avgX: {
$avg: "$X"
},
sumZ: {
$sum: "$Z"
}
}
}
])
Output
{ "_id" : "aggregates", "avgX" : 4848.666666666667, "sumZ" : 179714 }
You are not passing (key,value) pair parameter to reduceFunction1.
Try this:
var mapFunction1 = function() {
emit(this.userID, this.Z);
};
var reduceFunction1 = function(varKey,varZ) {
return Array.avg(varZ);
};
db.transaction.mapReduce(
mapfunction1,
reduceFunction1,
{out:"mapreduce"}
)

MongoDB - Aggregate fields inside and object

I have the following dataset in MongoDB:
{
_id: 574718ec2bc91f565db33897,
topic: {
T69: 0.9566255761668587
}
},
{
_id: 574718ec2bc91f565db33899,
topic: {
T257: 0.046038051058499445,
T2: 1.8206715756325407,
T31: 0.08838710118945285
}
},
{
_id: 574718ec2bc91f565db33889,
topic: {
T95: 0.37718859499517865,
T40: 0.2620479937270479,
T2: 0.3594989449758472,
T1: 1.9161288780994465
}
}
I've been trying to create an aggregation query which returns the sum of all topics, Tn, over the set of all such documents. Can anyone give me a pointer in the right direction? Since I'm new to MongoDB I couldn't find an answer to this problem (though this seemed related $unwind an object in aggregation framework).
Our best bet here is mapReduce. In our map function all we need is to iterate over the "topic" property and emit the value. To get the total sum in the collection we need to "emit" with null as key value.
In the reduce function we simply use the Array.sum method to return the sum.
db.coll.mapReduce(function() {
for (var key in this.topic) {
if (Object.prototype.hasOwnProperty.call(this.topic, key)) {
emit(null, this.topic[key])
}
}},
function(key, value) {
return Array.sum(value);
},
{ "out": { "inline": 1 } }
)
which produces:
{
"results" : [
{
"_id" : null,
"value" : 5.826586715844872
}
],
"timeMillis" : 26,
"counts" : {
"input" : 3,
"emit" : 8,
"reduce" : 1,
"output" : 1
},
"ok" : 1
}
If you want the "sum" for each document, simply call emit(this._id, this.topic[key]) in your map function instead of emit(null, this.topic[key])
I think you can't do it with the mongoDB aggregation framework (that works better with collections/array of subdocs), but is pretty simple with a map/reduce. For example you can try with:
db.YOURCOLLECTION.mapReduce(
function () {
var topic = this.topic;
Object.keys(topic).forEach(function(k) {
emit(k, topic[k]);
});
},
function (key, values) {
return Array.sum(values);
}
);

How to detect the re-reduce stage in MongoDB map/reduce?

I use the following map/reduce setup to collect some data into array:
map: function() { emit(this.key, [this.item]); },
reduce: function(key, values) {
var items = [];
values.forEach( function(value) {items.concat(value.item);} );
return items;
},
out: {reduce: "result_collection"}
I want to improve the code and detect if the resulting collection has been changed during the re-reduce stage (when mongo invokes reduce with the current content of the "result_collection").
In other words, how to know that any documents have been emitted by the Map contain "item" that does not exist in the "result_collection" yet (under the same key, of course)?
This information can help at some further processing stages e.g. query "result_collection" to get the documents that have been updated during the map/reduce stage.
If you must do this, use a finalize function to adjust the value after all reduction is finished. You'll have to add more logic to the reduce function to handle the modified output.
I'll show you an example with the simple map-reduce defined by the following map and reduce functions:
var map = function() { emit(this.k, this.v) }
var reduce = function(key, values) { return Array.sum(values) }
On documents that look like { "k" : 0, "v" : 1 }, the map-reduce defined by the above functions produces result documents that look like { "_id" : 0, "value" : 17 }. Define a finalize function to modify the final document:
var finalize = function (key, reducedValue) { return { "m" : true, "v" : reducedValue } }
Now modify reduce to handle an element of values that might be an object of the above form:
var reduce2 = function(key, values) {
var sum = 0;
for (var i = 0; i < values.length; i++) {
if (typeof values[i] == "object") { sum += values[i].v }
else { sum += values[i] }
}
return sum
}
Output looks like
{ "_id" : 0, "value" : { "m" : true, "v" : 14 } }
{ "_id" : 1, "value" : { "m" : true, "v" : 34 } }
{ "_id" : 2, "value" : { "m" : true, "v" : 8 } }
so you can tell what's been modified by value.m. Your further processing can set v.m to false so you'll see what hasn't been processed yet after each map-reduce.

MongoDB Map-Reduce combine document with dynamic schema

I'm trying what I think should be a simple map reduce, but am having trouble because I can't find a reference of how to write the server side javascript.
Given two documents:
{
"_id" : ObjectId("530c8b58d95cd926144055d9"),
"atomic" : "p",
"doc" : {
"d1" : "t"
},
"array" : ["e"]
},
{
"_id" : ObjectId("530c8b71d95cd926144055da"),
"atomic" : "p",
"doc" : {
"d2" : "r"
},
"array" : ["f"]
}
I would like the result to be
{
"_id" : "p",
"value" : {
"doc" : {
"d1" : "t",
"d2" : "r"
},
"array" : ["e", "f"]
}
}
The map function is:
function () {
emit(
this.atomic,
{doc: this.doc, array: this.array}
);
}
The incorrect reduce function is:
function (key, values) {
var reduced = {doc:{}, array:[]};
values.forEach(function(val){
for(var i = 0; i < val.array.length; i++)
reduced.array.push(val.array[i]);
val.doc.forEach(function(kvp){reduced.doc.add(kvp.key, kvp.value);});
});
return reduced;
}
The part with the array is fine, it is trying to combine the documents that is messing up (i.e. not executing due to missing function). I've tried all permutations I can think off -- if I add the val.doc to an array then they all show up, it's just that I can't figure out how to merge it into a single document.
The fields in the doc will be dynamic so there is no way to reference it by name.
Any help would be appreciated.
Not sure the reduced.doc.add bit will work.
Maybe try:
function (key, values) {
var reduced = {doc:{}, array:[]};
values.forEach(function(val){
for(var i = 0; i < val.array.length; i++)
reduced.array.push(val.array[i]);
for (kvp in val.doc){
reduced.doc[kvp]=val.doc[kvp];
}
});
return reduced;
}

Mongo: count the number of word occurrences in a set of documents

I have a set of documents in Mongo. Say:
[
{ summary:"This is good" },
{ summary:"This is bad" },
{ summary:"Something that is neither good nor bad" }
]
I'd like to count the number of occurrences of each word (case insensitive), then sort in descending order. The result should be something like:
[
"is": 3,
"bad": 2,
"good": 2,
"this": 2,
"neither": 1,
"nor": 1,
"something": 1,
"that": 1
]
Any idea how to do this? Aggregation framework would be preferred, as I understand it to some degree already :)
MapReduce might be a good fit that can process the documents on the server without doing manipulation on the client (as there isn't a feature to split a string on the DB server (open issue).
Start with the map function. In the example below (which likely needs to be more robust), each document is passed to the map function (as this). The code looks for the summary field and if it's there, lowercases it, splits on a space, and then emits a 1 for each word found.
var map = function() {
var summary = this.summary;
if (summary) {
// quick lowercase to normalize per your requirements
summary = summary.toLowerCase().split(" ");
for (var i = summary.length - 1; i >= 0; i--) {
// might want to remove punctuation, etc. here
if (summary[i]) { // make sure there's something
emit(summary[i], 1); // store a 1 for each word
}
}
}
};
Then, in the reduce function, it sums all of the results found by the map function and returns a discrete total for each word that was emitted above.
var reduce = function( key, values ) {
var count = 0;
values.forEach(function(v) {
count +=v;
});
return count;
}
Finally, execute the mapReduce:
> db.so.mapReduce(map, reduce, {out: "word_count"})
The results with your sample data:
> db.word_count.find().sort({value:-1})
{ "_id" : "is", "value" : 3 }
{ "_id" : "bad", "value" : 2 }
{ "_id" : "good", "value" : 2 }
{ "_id" : "this", "value" : 2 }
{ "_id" : "neither", "value" : 1 }
{ "_id" : "or", "value" : 1 }
{ "_id" : "something", "value" : 1 }
{ "_id" : "that", "value" : 1 }
A basic MapReduce example
var m = function() {
var words = this.summary.split(" ");
if (words) {
for(var i=0; i<words.length; i++) {
emit(words[i].toLowerCase(), 1);
}
}
}
var r = function(k, v) {
return v.length;
};
db.collection.mapReduce(
m, r, { out: { merge: "words_count" } }
)
This will insert word counts into a collection name words_count which you can sort (and index)
Note that it doesn't use stemming, omit punctuation, handles stop words etc.
Also note you can optimize the map function by accumulating repeating word(s) occurrences and emitting the count, not just 1
You can use #split.
Try Below query
db.summary.aggregate([
{ $project : { summary : { $split: ["$summary", " "] } } },
{ $unwind : "$summary" },
{ $group : { _id: "$summary" , total : { "$sum" : 1 } } },
{ $sort : { total : -1 } }
]);
Old question but since 4.2 this can be done with $regexFindAll now.
db.summaries.aggregate([
{$project: {
occurences: {
$regexFindAll: {
input: '$summary',
regex: /\b\w+\b/, // match words
}
}
}},
{$unwind: '$occurences'},
{$group: {
_id: '$occurences.match', // group by each word
totalOccurences: {
$sum: 1 // add up total occurences
}
}},
{$sort: {
totalOccurences: -1
}}
]);
This will output docs in the following format:
{
_id: "matchedwordstring",
totalOccurences: number
}