mongodb: reduce function not action when only one result is there - mongodb

I need to do the weighted average.
Did the coding as below
db.runCommand(
{ mapreduce : "<collecton>" ,
map: function ()
{
emit ({nkey: this.nkey}, {price: this.tags["31"], qty: this.tags["32"]});
},
reduce: function(key, vals)
{
var ret = {wavg:0};
var mul1 = 0.0;
var sum1 = 0.0;
for (var i=0;i<vals.length;i++)
{ mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
out: 'res2', verbose: true
}
);
> db.res2.find()
{ "_id" : { "nkey" : "key1" }, "value" : { "wavg" : 311.7647058823529 } }
{ "_id" : { "nkey" : "ke2" }, "value" : { "wavg" : 585.7142857142857 } }
{ "_id" : { "nkey" : "key3" }, "value" : { "price" : 1000, "qty" : 110 } }
{ "_id" : { "nkey" : "key4" }, "value" : { "wavg" : 825 } }
If you notice, in the final reducer output(third row), it dint actually go thru the reduce functionality. The key occur only once, hence one result will be emitted. But I still want the reduce function to be acting on that to get the weighted average. I can't just go ahead with price and qty wherever I have only one occurence of the key, where I need weighted average for that key also.
Is there any way to achieve this ?

This is essentially how mapReduce works in that the reducer is never called when you only have one result. But you can always alter such results with a finalize stage:
db.runCommand({
"mapreduce" : "<collecton>" ,
"map": function () {
emit (
{ "nkey": this.nkey},
{ "price": this.tags["31"], qty: this.tags["32"]}
);
},
"reduce": function(key, vals) {
var ret = { "wavg": 0 };
var mul1 = 0.0;
var sum1 = 0.0;
for ( var i=0; i<vals.length; i++ ) {
mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
"finalize": function(key,value) {
if (value.hasOwnProperty("price") {
value.wavg = value.price;
delete value["price"];
}
return value;
},
"out": 'res2',
"verbose": true
});
Or otherwise alternately just sum your keys in the reduce stage and do all the division in the finalize stage if that suits you thinking better. But then you would need to do your "multiplication" part in the "mapper" for that to work.

Related

Mongodb arithmetic expression of elements inside an array

{
"_id" : ObjectId("5a4d0712368ff072a56a66e8"),
"a" : {
"nodes" : [
{
"b" : 1
},
{
"c" : {
"nodes" : [
{
"d" : 2
},
{
"e" : 2
}
]
}
}
]
},
"r" : 1
}
{
"_id" : ObjectId("5a4d0712368ff072a56a66e9"),
"a" : {
"nodes" : [
{
"b" : 4
},
{
"c" : {
"nodes" : [
{
"e" : 9
}
]
}
}
]
},
"r" : 2
}
Above are the sample documents, I want to find the sum of a.nodes.b * a.nodes.c.nodes.d * a.nodes.c.nodes.e
I have tried many pipelines, but not able to generalize the aggregation. Any help would be appreciated.
nodes array can be present again at any level. If I get the solution for this, I will try to generalize the aggregation pipeline.
MongoDB version 3.2
Expected calculation and output
(1 * 2 * 2) + (4 * 0 * 9) = 4
I doubt it is possible to handle "nodes array can be present again at any level." with aggregation.
With map-reduce it could be something like this:
db.collection.mapReduce(
function(){
let reducer = (result, node) => {
Object.keys(node).map(key => {
if (typeof node[key] === "object") {
if (node[key].nodes && Array.isArray(node[key].nodes)) {
result = node[key].nodes.reduce(reducer, result);
}
} else {
result.product = result.product * node[key];
result.keys.add(key);
}
});
return result;
};
let {product, keys} = this.a.nodes.reduce(reducer, {product: 1, keys: new Set()});
emit(null, {product, keys: Array.from(keys)})
},
function(key, values){
return values.reduce((total, item) => {
let totalSum = total.sum, itemSum = item.product;
for (let key of item.keys) if (!total.keys.includes(key)) totalSum = 0;
for (let key of total.keys) if (!item.keys.includes(key)) itemSum = 0;
return {sum: totalSum + itemSum, keys: Array.from(new Set([...total.keys, ...item.keys]))};
}, {sum: 0, keys: []}).sum;
},
{ query: { "a.nodes": { $exists: true } } , out: { inline: 1 }}
);
Map function recursively multiplies all keys, reducer checks the keys and calculate totals.

How to detect the re-reduce stage in MongoDB map/reduce?

I use the following map/reduce setup to collect some data into array:
map: function() { emit(this.key, [this.item]); },
reduce: function(key, values) {
var items = [];
values.forEach( function(value) {items.concat(value.item);} );
return items;
},
out: {reduce: "result_collection"}
I want to improve the code and detect if the resulting collection has been changed during the re-reduce stage (when mongo invokes reduce with the current content of the "result_collection").
In other words, how to know that any documents have been emitted by the Map contain "item" that does not exist in the "result_collection" yet (under the same key, of course)?
This information can help at some further processing stages e.g. query "result_collection" to get the documents that have been updated during the map/reduce stage.
If you must do this, use a finalize function to adjust the value after all reduction is finished. You'll have to add more logic to the reduce function to handle the modified output.
I'll show you an example with the simple map-reduce defined by the following map and reduce functions:
var map = function() { emit(this.k, this.v) }
var reduce = function(key, values) { return Array.sum(values) }
On documents that look like { "k" : 0, "v" : 1 }, the map-reduce defined by the above functions produces result documents that look like { "_id" : 0, "value" : 17 }. Define a finalize function to modify the final document:
var finalize = function (key, reducedValue) { return { "m" : true, "v" : reducedValue } }
Now modify reduce to handle an element of values that might be an object of the above form:
var reduce2 = function(key, values) {
var sum = 0;
for (var i = 0; i < values.length; i++) {
if (typeof values[i] == "object") { sum += values[i].v }
else { sum += values[i] }
}
return sum
}
Output looks like
{ "_id" : 0, "value" : { "m" : true, "v" : 14 } }
{ "_id" : 1, "value" : { "m" : true, "v" : 34 } }
{ "_id" : 2, "value" : { "m" : true, "v" : 8 } }
so you can tell what's been modified by value.m. Your further processing can set v.m to false so you'll see what hasn't been processed yet after each map-reduce.

MongoDB Map - Reduce result not exactly

I had data structure in MongoDB as below
{
"_id" : ObjectId("523aab00045624a385e5f549"),
"name" : "English Book 29",
"SKU" : 1000549081,
"price" : 249000,
"image" : null,
"category_id" : ObjectId("523a7802b50418baf38b4575"),
"category_name" : "English Book",
"details" : {
"Title" : "Title 549081",
"Binding" : 1,
"Author" : "Author 0",
"Publication data" : 0.5263832447608386,
"Publisher name" : "Publisher name 14",
"Number of page" : 90
}
}
Binding of book has 2 values:
0 that means soft binding, and 1 that means hard binding. I write Map Reduce to statistics for each values.
var map = function()
{
for(var key in this.details)
{
if(key == 'Binding')
{
emit({name: key}, {
'data':
[
{
name: this.details[key],
count: 1
}
]
});
}
}
};
var reduce = function (key, values) {
var reduced = {};
for(var i in values)
{
var inter = values[i];
for(var j in inter.data)
{
if(typeof(reduced[inter.data[j].name]) != "undefined")
{
reduced[inter.data[j].name] += inter.data[j].count;
}
else
{
reduced[inter.data[j].name] = 1;
}
}
}
return reduced;
};
When I run with small data (50 records) result return exactly. But when I run it with real data (192000 records) result return Not exactly. The result as below
{
"_id" : {
"name" : "Binding"
},
"value" : {
"0" : 50,
"1" : 50
}
}
I checked return data when Map/Reduce done, result as below
"counts" : {
"input" : 192000,
"emit" : 192000,
"reduce" : 1920,
"output" : 1
},
What wrong with it. Welcome any suggestion, explanation.
Thanks and best regards,
After researching about Map/Reduce yesterday, I realized that, "Emit" send 100 elements once, and "Reduce" perform on this data set. So my above code is wrong because it only "SUM" on small data set.
Below that is my new code for Map-Reduce
var map = function ()
{
for(var key in this.details)
{
if(key == 'Binding')
{
var value = {};
value[this.details[key]] = 1;
emit(key, value);
}
}
}
var reduce = function (key, values)
{
var reduced = {};
for(var idx = 0; idx < values.length; idx++)
{
var inner = values[idx];
for (var j in inner)
{
if (typeof (reduced[j]) == 'undefined')
{
reduced[j] = 0;
}
reduced[j] += inner[j];
}
}
return reduced;
}
I post here for anyone who meet similar situation. Thanks for reading.

MongoDB MapReduce, different results with the "same approach", what I'm missing?

I know I'm missing something with MapReduce in MongoDB. I'm trying to build a tag-frequency collection and I'm getting different results, even if it seems that map and reduce functions are the "same".
Example document (forget values 100, 45... I'm not using them):
{
...
tags: [['Rock', 100], ['Indie Pop', 45], ...]
}
Emitting a scalar value 1:
var map = function () {
if (this.tags) {
this.tags.forEach(function (tag) {
emit(tag[0], 1); // Emit just 1
});
}
};
var reduce = function (key, vals) { // Vals should be [1, ...]
return vals.length; // Count the length of the array
};
db.tracks.mapReduce(map, reduce, { out: 'mapreduce_out' });
db.mapreduce_out.find().sort({ value: -1 }).limit(3);
Output is:
{ "_id" : "rubyrigby1", "value" : 9 }
{ "_id" : "Dom", "value" : 7 }
{ "_id" : "Feel Better", "value" : 7 }
Emitting an object { count: 1 }:
var map = function () {
if (this.tags) {
this.tags.forEach(function (tag) {
emit(tag[0], { count: 1 }); // Emit an object
});
}
};
var reduce = function (key, vals) { // vals should be [{ count: 1 }, ...]
var count = 0;
vals.forEach(function (val) {
count += val.count; // Accumul
});
return { count: count };
};
db.tracks.mapReduce(map, reduce, { out: 'mapreduce_out' });
db.mapreduce_out.find().sort({ 'value.count': -1 }).limit(3);
Result is different and appears to be "right":
{ "_id" : "rock", "value" : { "count" : 9472 } }
{ "_id" : "pop", "value" : { "count" : 7103 } }
{ "_id" : "electronic", "value" : { "count" : 5727 } }
What's wrong with the first approach?
Consider a collection of a thousand documents all with the tag 'tagname':
for (var i = 0; i < 1000; i++) {
db.collection.insert({tags: [['tagname']]});
}
If I write a proper mapReduce I should get the output {"_id": "tagname", "count": 1000}. But if I use your map and reduce functions I'll get a count of 101 instead of 1000.
The reason is, MongoDB calls your reduce function repeatedly with intermediate results, in order to avoid keeping too large a batch of results in memory. You can actually see this by putting a print statement in your reduce:
var reduce = function (key, vals) {
print(vals);
return vals.length; // Count the length of the array
};
The print output appears in the server log. The reduce function is called with the first 100 1's, and it returns 100. So far so good. Then MongoDB calls it again with the first reduce's output plus the next 100 1's:
reduce([100, 1, 1, ..., 1]) // 100 plus 100 more 1's
So now it returns 101, because that's the length of the array. But clearly it should return 200, the sum of the array. So to get a correct result, change your reduce function:
reduce = function (key, vals) {
var sum = 0;
vals.forEach(function(val) { sum += val; });
return sum;
}

MongoDB - Geospatial Index with Aggregation

I've read from the to docs that it is not possible to use a geospatial index on an aggregation with MongoDB. Is there an alternative to this? I am attempting to run a query take grab all activities within a certain radius, then group/sort them by the number of times that activity has occurred. Is there way around this issue?
You can use map-reduce on a geo query. Here is an example based on geo_mapreduce.js (from the mongodb jstests):
// setup test collection
var act_date = new Date(2010,06,07);
for (i = 1; i <= 10; i++) {
db.activity.insert( { "geo" : { "lat" : 32.68331909, "long" : 69.41610718 }, "date":act_date, "activity" : 9 * i } );
db.activity.insert( { "geo" : { "lat" : 35.01860809, "long" : 70.92027283 }, "date":act_date, "activity" : 3 } );
db.activity.insert( { "geo" : { "lat" : 31.11639023, "long" : 64.19970703 }, "date":act_date, "activity" : 11 } );
db.activity.insert( { "geo" : { "lat" : 32.64500046, "long" : 69.36251068 }, "date":act_date, "activity" : 9 } );
db.activity.insert( { "geo" : { "lat" : 33.23638916, "long" : 69.81360626 }, "date":act_date, "activity" : 22 } );
act_date.setDate(act_date.getDate() + 1);
}
db.activity.ensureIndex( { "geo" : "2d" } );
center = [ 32.68, 69.41 ];
radius = 10 / 111; // 10km; 1 arcdegree ~= 111km
geo_query = { geo : { '$within' : { '$center' : [ center, radius ] } } };
// map function
m = function() {
emit( this.date, { "activity" : this.activity } );
};
// reduce function
r = function(key, values) {
var total = 0;
for ( var i = 0; i < values.length; i++ ) {
total += values[i].activity;
}
return {"activity":total };
};
// mapreduce with geo query
res = db.activity.mapReduce( m, r, { out : { inline : 1 }, query : geo_query } );
// sort results
res.results.sort(function(a, b){return b.value.activity - a.value.activity})
for (var i=0; i < res.results.length; i++) {
print("Date: " + res.results[i]._id + " Activity: "
+ res.results[i].value.activity)
}