I have a Collection with many columns: col1,col2, WebsiteCode, CreatedDate, col3....,coln.
I want to group by WebsiteCode in a range of CreatedDate.
So I do:
map :
function Map() {
var key={WebsiteCode:this.WebsiteCode};
val={Count: 1};
emit(key,val);
}
reduce :
function Reduce(key, values) {
var res = {Total:0};
values.forEach(function(value) {
res.Total += value.Count;
});
return res;
}
And query range DateTime:
{ "CreatedDate" : { "$gte" : dateFrom , "$lte" : dateTo } }
Finally, I run this mapreduce command.
The result returns not what I expected with many rows having Total = NaN
Ex: {_id:{WebsiteCode:"websitecode1"}}, {value:{Total:NaN}}
But when I run count command:
db.collect.find({ "WebsiteCode" : "websitecode1", "CreatedDate" : { "$gte" : dateFrom), "$lte" : dateTo } }).count();
Result return: 927
Could you explain to me what I did wrong?
Your reduce function must return a value that has the same shape as the emitted values. So you can't use Count in the map and Total in the reduce.
Try this instead:
function Reduce(key, values) {
var res = {Count:0};
values.forEach(function(value) {
res.Count += value.Count;
});
return res;
}
Related
I use the following map/reduce setup to collect some data into array:
map: function() { emit(this.key, [this.item]); },
reduce: function(key, values) {
var items = [];
values.forEach( function(value) {items.concat(value.item);} );
return items;
},
out: {reduce: "result_collection"}
I want to improve the code and detect if the resulting collection has been changed during the re-reduce stage (when mongo invokes reduce with the current content of the "result_collection").
In other words, how to know that any documents have been emitted by the Map contain "item" that does not exist in the "result_collection" yet (under the same key, of course)?
This information can help at some further processing stages e.g. query "result_collection" to get the documents that have been updated during the map/reduce stage.
If you must do this, use a finalize function to adjust the value after all reduction is finished. You'll have to add more logic to the reduce function to handle the modified output.
I'll show you an example with the simple map-reduce defined by the following map and reduce functions:
var map = function() { emit(this.k, this.v) }
var reduce = function(key, values) { return Array.sum(values) }
On documents that look like { "k" : 0, "v" : 1 }, the map-reduce defined by the above functions produces result documents that look like { "_id" : 0, "value" : 17 }. Define a finalize function to modify the final document:
var finalize = function (key, reducedValue) { return { "m" : true, "v" : reducedValue } }
Now modify reduce to handle an element of values that might be an object of the above form:
var reduce2 = function(key, values) {
var sum = 0;
for (var i = 0; i < values.length; i++) {
if (typeof values[i] == "object") { sum += values[i].v }
else { sum += values[i] }
}
return sum
}
Output looks like
{ "_id" : 0, "value" : { "m" : true, "v" : 14 } }
{ "_id" : 1, "value" : { "m" : true, "v" : 34 } }
{ "_id" : 2, "value" : { "m" : true, "v" : 8 } }
so you can tell what's been modified by value.m. Your further processing can set v.m to false so you'll see what hasn't been processed yet after each map-reduce.
I need to do the weighted average.
Did the coding as below
db.runCommand(
{ mapreduce : "<collecton>" ,
map: function ()
{
emit ({nkey: this.nkey}, {price: this.tags["31"], qty: this.tags["32"]});
},
reduce: function(key, vals)
{
var ret = {wavg:0};
var mul1 = 0.0;
var sum1 = 0.0;
for (var i=0;i<vals.length;i++)
{ mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
out: 'res2', verbose: true
}
);
> db.res2.find()
{ "_id" : { "nkey" : "key1" }, "value" : { "wavg" : 311.7647058823529 } }
{ "_id" : { "nkey" : "ke2" }, "value" : { "wavg" : 585.7142857142857 } }
{ "_id" : { "nkey" : "key3" }, "value" : { "price" : 1000, "qty" : 110 } }
{ "_id" : { "nkey" : "key4" }, "value" : { "wavg" : 825 } }
If you notice, in the final reducer output(third row), it dint actually go thru the reduce functionality. The key occur only once, hence one result will be emitted. But I still want the reduce function to be acting on that to get the weighted average. I can't just go ahead with price and qty wherever I have only one occurence of the key, where I need weighted average for that key also.
Is there any way to achieve this ?
This is essentially how mapReduce works in that the reducer is never called when you only have one result. But you can always alter such results with a finalize stage:
db.runCommand({
"mapreduce" : "<collecton>" ,
"map": function () {
emit (
{ "nkey": this.nkey},
{ "price": this.tags["31"], qty: this.tags["32"]}
);
},
"reduce": function(key, vals) {
var ret = { "wavg": 0 };
var mul1 = 0.0;
var sum1 = 0.0;
for ( var i=0; i<vals.length; i++ ) {
mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
"finalize": function(key,value) {
if (value.hasOwnProperty("price") {
value.wavg = value.price;
delete value["price"];
}
return value;
},
"out": 'res2',
"verbose": true
});
Or otherwise alternately just sum your keys in the reduce stage and do all the division in the finalize stage if that suits you thinking better. But then you would need to do your "multiplication" part in the "mapper" for that to work.
I have a Collection with the following fields:
TestTable
{
"ID"
"Name"
"Ver"
"Serial"
"DateTime"
"FeatureID"
"FeatureName"
}
I want to have a map reduce function to get the count of the records in a particular Year.
The map reduce function i wrote is:
map= function(){
year= Date.UTC(this.DateTime.getFullYear());
emit({year: year}, {count: 1});
}
reduce= function(key, values){
var count=0;
for(v in values){
count+= v['count'];
});
return {count: count};
}
Now the output should give the count of douments in each year. Is the map reduce function correct?
The result i got is:
> db.Years.find()
{ "_id" : { "year" : NaN }, "value" : { "count" : NaN } }
> db.Years.find().count()
1
Which is not what i expected.
edited
one of the TestTable document:
> db.TestTable.findOne()
{
"_id" : ObjectId("527c48e99000cf10bc2a1d82"),
"ID" : "LogID16587",
"Name" : "LogName15247",
"Ver" : "VersionID11",
"Serial" : "ProductID727",
"DateTime" : ISODate("1998-12-15T18:30:00Z"),
"FeatureID" : "FeatureID465",
"FeatureName" : "FeatureName 1460"
}
Thanks in advance.
Date.UTC requires both year and month parameters. So your map function should look like this instead:
map= function(){
year= Date.UTC(this.DateTime.getFullYear(), 0);
emit({year: year}, {count: 1});
}
Also, don't use in to iterate over the elements of an array in your reduce method as it doesn't work in the way you're using it. Use a traditional for loop instead:
reduce= function(key, values){
var count=0;
for(var i=0; i<values.length; i++) {
count += values[i]['count'];
};
return {count: count};
}
I am trying to aggregate the total sum of packets in this document.
{
"_id" : ObjectId("51a6cd102769c63e65061bda"),
"capture" : "1369885967",
"packets" : {
"0" : "595",
"1" : "596",
"2" : "595",
"3" : "595",
...
}
}
The closest I can get is about
db.collection.aggregate({ $match: { capture : "1369885967" } }, {$group: { _id:null, sum: {$sum:"$packets"}}});
However it returns sum 0, which is obviously wrong.
{ "result" : [ { "_id" : null, "sum" : 0 } ], "ok" : 1 }
How do I get the sum of all the packets?
Since you have the values in an object instead of an array, you'll need to use mapReduce.
// Emit the values as integers
var mapFunction =
function() {
for (key in this.packets) {
emit(null, parseInt(this.packets[key]));
}
}
// Reduce to a simple sum
var reduceFunction =
function(key, values) {
return Array.sum(values);
}
> db.collection.mapReduce(mapFunction, reduceFunction, {out: {inline:1}})
{
"results" : [
{
"_id" : null,
"value" : 2381
}
],
"ok" : 1,
}
If at all possible, you should emit the values as an array of a numeric type instead since that gives you more options (ie aggregation) and (unless the data set is large) probably performance benefits.
If you don't know how many keys are in the packet subdocument and since you also seem to be storing counts as strings (why???) you will have to use mapReduce.
Something like:
m=function() {
for (f in "this.packets") {
emit(null, +this.packets[f]);
};
r=function(k, vals) {
int sum=0;
vals.forEach(function(v) { sum+=v; } );
return sum;
}
db.collection.mapreduce(m, r, {out:{inline:1}, query:{your query condition here}});
I know I'm missing something with MapReduce in MongoDB. I'm trying to build a tag-frequency collection and I'm getting different results, even if it seems that map and reduce functions are the "same".
Example document (forget values 100, 45... I'm not using them):
{
...
tags: [['Rock', 100], ['Indie Pop', 45], ...]
}
Emitting a scalar value 1:
var map = function () {
if (this.tags) {
this.tags.forEach(function (tag) {
emit(tag[0], 1); // Emit just 1
});
}
};
var reduce = function (key, vals) { // Vals should be [1, ...]
return vals.length; // Count the length of the array
};
db.tracks.mapReduce(map, reduce, { out: 'mapreduce_out' });
db.mapreduce_out.find().sort({ value: -1 }).limit(3);
Output is:
{ "_id" : "rubyrigby1", "value" : 9 }
{ "_id" : "Dom", "value" : 7 }
{ "_id" : "Feel Better", "value" : 7 }
Emitting an object { count: 1 }:
var map = function () {
if (this.tags) {
this.tags.forEach(function (tag) {
emit(tag[0], { count: 1 }); // Emit an object
});
}
};
var reduce = function (key, vals) { // vals should be [{ count: 1 }, ...]
var count = 0;
vals.forEach(function (val) {
count += val.count; // Accumul
});
return { count: count };
};
db.tracks.mapReduce(map, reduce, { out: 'mapreduce_out' });
db.mapreduce_out.find().sort({ 'value.count': -1 }).limit(3);
Result is different and appears to be "right":
{ "_id" : "rock", "value" : { "count" : 9472 } }
{ "_id" : "pop", "value" : { "count" : 7103 } }
{ "_id" : "electronic", "value" : { "count" : 5727 } }
What's wrong with the first approach?
Consider a collection of a thousand documents all with the tag 'tagname':
for (var i = 0; i < 1000; i++) {
db.collection.insert({tags: [['tagname']]});
}
If I write a proper mapReduce I should get the output {"_id": "tagname", "count": 1000}. But if I use your map and reduce functions I'll get a count of 101 instead of 1000.
The reason is, MongoDB calls your reduce function repeatedly with intermediate results, in order to avoid keeping too large a batch of results in memory. You can actually see this by putting a print statement in your reduce:
var reduce = function (key, vals) {
print(vals);
return vals.length; // Count the length of the array
};
The print output appears in the server log. The reduce function is called with the first 100 1's, and it returns 100. So far so good. Then MongoDB calls it again with the first reduce's output plus the next 100 1's:
reduce([100, 1, 1, ..., 1]) // 100 plus 100 more 1's
So now it returns 101, because that's the length of the array. But clearly it should return 200, the sum of the array. So to get a correct result, change your reduce function:
reduce = function (key, vals) {
var sum = 0;
vals.forEach(function(val) { sum += val; });
return sum;
}