MongoDB Map - Reduce result not exactly - mongodb

I had data structure in MongoDB as below
{
"_id" : ObjectId("523aab00045624a385e5f549"),
"name" : "English Book 29",
"SKU" : 1000549081,
"price" : 249000,
"image" : null,
"category_id" : ObjectId("523a7802b50418baf38b4575"),
"category_name" : "English Book",
"details" : {
"Title" : "Title 549081",
"Binding" : 1,
"Author" : "Author 0",
"Publication data" : 0.5263832447608386,
"Publisher name" : "Publisher name 14",
"Number of page" : 90
}
}
Binding of book has 2 values:
0 that means soft binding, and 1 that means hard binding. I write Map Reduce to statistics for each values.
var map = function()
{
for(var key in this.details)
{
if(key == 'Binding')
{
emit({name: key}, {
'data':
[
{
name: this.details[key],
count: 1
}
]
});
}
}
};
var reduce = function (key, values) {
var reduced = {};
for(var i in values)
{
var inter = values[i];
for(var j in inter.data)
{
if(typeof(reduced[inter.data[j].name]) != "undefined")
{
reduced[inter.data[j].name] += inter.data[j].count;
}
else
{
reduced[inter.data[j].name] = 1;
}
}
}
return reduced;
};
When I run with small data (50 records) result return exactly. But when I run it with real data (192000 records) result return Not exactly. The result as below
{
"_id" : {
"name" : "Binding"
},
"value" : {
"0" : 50,
"1" : 50
}
}
I checked return data when Map/Reduce done, result as below
"counts" : {
"input" : 192000,
"emit" : 192000,
"reduce" : 1920,
"output" : 1
},
What wrong with it. Welcome any suggestion, explanation.
Thanks and best regards,

After researching about Map/Reduce yesterday, I realized that, "Emit" send 100 elements once, and "Reduce" perform on this data set. So my above code is wrong because it only "SUM" on small data set.
Below that is my new code for Map-Reduce
var map = function ()
{
for(var key in this.details)
{
if(key == 'Binding')
{
var value = {};
value[this.details[key]] = 1;
emit(key, value);
}
}
}
var reduce = function (key, values)
{
var reduced = {};
for(var idx = 0; idx < values.length; idx++)
{
var inner = values[idx];
for (var j in inner)
{
if (typeof (reduced[j]) == 'undefined')
{
reduced[j] = 0;
}
reduced[j] += inner[j];
}
}
return reduced;
}
I post here for anyone who meet similar situation. Thanks for reading.

Related

whether mongodb mapreduce function accepts calculated value of document as a mapper

I use MongoDB version 2.6.10. Below is the collection structure. I use MapReduce function to group the names of the user based on (created(excluding seconds), event_name.
{
"_id" : ObjectId("59c11d79078dc54153c36ee8"),
"event_name" : "notification",
"created" : ISODate("2017-09-19T13:36:57.252Z"),
"sender_name" : "nathan",
"user_name": "Ragul"",
}
{
"_id" : ObjectId("59c11d79078dc54153c36eeb"),
"event_name" : "notification",
"created" : ISODate("2017-09-19T13:36:57.772Z"),
"sender_name" : "parmesh",
"user_name": "Ram",
}
{
"_id" : ObjectId("59c11d7a078dc54153c36ef0"),
"event_name" : "notification",
"created" : ISODate("2017-09-19T13:36:58.554Z"),
"sender_name" : "nathan",
"user_name": "Ram",
}
{
"_id" : ObjectId("59c11d7a078dc54153c36ef1"),
"event_name" : "message",
"created" : ISODate("2017-09-19T13:36:58.577Z"),
"sender_name" : "nathan",
"user_name": "Ragul"",
}
Below is my query using MapReduce function. My question is whether we can use calculated date as a mapper. Help me with your suggestions
var mapfn = function(){
if (this.event_name == "message"){
name = this.recipient_name
}
else if ((this.event_name == "notification") && (this.other_status == true)){
name = this.sender_name
}
else if ((this.event_name == "notification") && (this.other_status == false)){
name = "You"
}
this.cre = {$subtract:[this.created,{$add:[{$multiply:[{$second:this.created},1000]},{$millisecond:this.created}]}]}
emit({"event_name": this.event_name, "created": this.cre}, name)
}
var redfun = function(key, value){
return Array.append(value)
}
db.getCollection('users').mapReduce(mapfn, redfun, {out: "example"}).find()
Here instead calculating the date using MongoDB expression, I tried to use javascript to eliminate the seconds and then I mapped then its worked.
var mapfn = function(){
if (this.event_name == "message"){
name = this.recipient_name
}
else if ((this.event_name == "notification") && (this.other_status == true)){
name = this.sender_name
}
else if ((this.event_name == "notification") && (this.other_status == false)){
name = "You"
}
this.created.setSeconds(0);
this.created.setMilliseconds(0);
emit({"event_name": this.event_name, "created": this.created}, name)
}
var redfun = function(key, value){
var names = value.join(",")
return names
}
db.users.mapReduce(mapfn, redfun, {out: "example"}).find()

MongoDB query to return documents that only have keys amongst a predefined set

The MongoDB query language allows filtering documents based on the existence or absence of a given field with the $exists operator.
Is there a way, with the MongoDB syntax, and given a set K of allowed fields, to exclude documents that have fields not in K from the results, but:
not knowing in advance which extra fields (outside K) can be encountered
not using JavaScript, that is, the $where operator?
Example:
{
"Some field" : "foo"
}
{
"Some field" : "bar",
"Some other field" : "foobar"
}
With the set K = [ "Some field" ], only the first document is to be returned.
Note how this is not to be confused with a projection, which would return both documents but removing the extra field.
I'm not sure if MongoDB do support such kind of operations out of box but you can achieve so with help of mapReduce.
Assuming your sample data set;
// Variable for map
var map = function () {
var isAcceptable = true;
Object.keys(this).forEach(function (key) {
if (key != "_id" && white_list.indexOf(key) == -1) {
isAcceptable = false;
}
});
if (isAcceptable == true) {
emit(1, this);
}
};
// Variable for reduce
var reduce = function (key, values) {
return values;
};
db.collection.mapReduce(
map,
reduce,
{
scope: {"white_list": ["Some field"]},
out: {"inline": 1}
}
);
Will return:
{
"results" : [
{
"_id" : 1,
"value" : {
"_id" : ObjectId("57cd7503e55de957c62fb9c8"),
"Some field" : "foo"
}
}
],
"timeMillis" : 13,
"counts" : {
"input" : 2,
"emit" : 1,
"reduce" : 0,
"output" : 1
},
"ok" : 1
}
Desired result will be in results.values of returned document. However, keep in mind limitation of MongoDB mapReduce and maximum size of BSON document.
Given a set of known fields K, you can construct a query that takes the set as input and gives a query with the $exists operator along with the corresponding fields projection. Using an example, suppose you have the following documents in a test collection
db.test.insert({ "fieldX": "foo", "fieldY": "bar", "fieldZ": 1 })
db.test.insert({ "fieldX": "123", "fieldY": "bar", "fieldZ": 2 })
db.test.insert({ "fieldY": "abc", "fieldZ": 3 })
db.test.insert({ "fieldX": "xyz", "fieldZ": 4 })
db.test.insert({ "fieldZ": 5 })
Then you can construct a query Q and a projection P from an input set K as follows:
var K = [ "fieldX", "fieldZ" ];
var or = K.map(function(field) {
var obj = {};
obj[field] = { "$exists": true };
return obj;
});
var P = K.reduce(function(doc, field) {
doc[field] = 1;
return doc;
}, {} );
var Q = { "$or": or };
db.test.find(Q, P);
Sample Output:
/* 1 */
{
"_id" : ObjectId("57cd78322c241f5870c82b7d"),
"fieldX" : "foo",
"fieldZ" : 1
}
/* 2 */
{
"_id" : ObjectId("57cd78332c241f5870c82b7e"),
"fieldX" : "123",
"fieldZ" : 2
}
/* 3 */
{
"_id" : ObjectId("57cd78332c241f5870c82b7f"),
"fieldZ" : 3
}
/* 4 */
{
"_id" : ObjectId("57cd78332c241f5870c82b80"),
"fieldX" : "xyz",
"fieldZ" : 4
}
/* 5 */
{
"_id" : ObjectId("57cd78332c241f5870c82b81"),
"fieldZ" : 5
}

How to detect the re-reduce stage in MongoDB map/reduce?

I use the following map/reduce setup to collect some data into array:
map: function() { emit(this.key, [this.item]); },
reduce: function(key, values) {
var items = [];
values.forEach( function(value) {items.concat(value.item);} );
return items;
},
out: {reduce: "result_collection"}
I want to improve the code and detect if the resulting collection has been changed during the re-reduce stage (when mongo invokes reduce with the current content of the "result_collection").
In other words, how to know that any documents have been emitted by the Map contain "item" that does not exist in the "result_collection" yet (under the same key, of course)?
This information can help at some further processing stages e.g. query "result_collection" to get the documents that have been updated during the map/reduce stage.
If you must do this, use a finalize function to adjust the value after all reduction is finished. You'll have to add more logic to the reduce function to handle the modified output.
I'll show you an example with the simple map-reduce defined by the following map and reduce functions:
var map = function() { emit(this.k, this.v) }
var reduce = function(key, values) { return Array.sum(values) }
On documents that look like { "k" : 0, "v" : 1 }, the map-reduce defined by the above functions produces result documents that look like { "_id" : 0, "value" : 17 }. Define a finalize function to modify the final document:
var finalize = function (key, reducedValue) { return { "m" : true, "v" : reducedValue } }
Now modify reduce to handle an element of values that might be an object of the above form:
var reduce2 = function(key, values) {
var sum = 0;
for (var i = 0; i < values.length; i++) {
if (typeof values[i] == "object") { sum += values[i].v }
else { sum += values[i] }
}
return sum
}
Output looks like
{ "_id" : 0, "value" : { "m" : true, "v" : 14 } }
{ "_id" : 1, "value" : { "m" : true, "v" : 34 } }
{ "_id" : 2, "value" : { "m" : true, "v" : 8 } }
so you can tell what's been modified by value.m. Your further processing can set v.m to false so you'll see what hasn't been processed yet after each map-reduce.

mongodb: reduce function not action when only one result is there

I need to do the weighted average.
Did the coding as below
db.runCommand(
{ mapreduce : "<collecton>" ,
map: function ()
{
emit ({nkey: this.nkey}, {price: this.tags["31"], qty: this.tags["32"]});
},
reduce: function(key, vals)
{
var ret = {wavg:0};
var mul1 = 0.0;
var sum1 = 0.0;
for (var i=0;i<vals.length;i++)
{ mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
out: 'res2', verbose: true
}
);
> db.res2.find()
{ "_id" : { "nkey" : "key1" }, "value" : { "wavg" : 311.7647058823529 } }
{ "_id" : { "nkey" : "ke2" }, "value" : { "wavg" : 585.7142857142857 } }
{ "_id" : { "nkey" : "key3" }, "value" : { "price" : 1000, "qty" : 110 } }
{ "_id" : { "nkey" : "key4" }, "value" : { "wavg" : 825 } }
If you notice, in the final reducer output(third row), it dint actually go thru the reduce functionality. The key occur only once, hence one result will be emitted. But I still want the reduce function to be acting on that to get the weighted average. I can't just go ahead with price and qty wherever I have only one occurence of the key, where I need weighted average for that key also.
Is there any way to achieve this ?
This is essentially how mapReduce works in that the reducer is never called when you only have one result. But you can always alter such results with a finalize stage:
db.runCommand({
"mapreduce" : "<collecton>" ,
"map": function () {
emit (
{ "nkey": this.nkey},
{ "price": this.tags["31"], qty: this.tags["32"]}
);
},
"reduce": function(key, vals) {
var ret = { "wavg": 0 };
var mul1 = 0.0;
var sum1 = 0.0;
for ( var i=0; i<vals.length; i++ ) {
mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
"finalize": function(key,value) {
if (value.hasOwnProperty("price") {
value.wavg = value.price;
delete value["price"];
}
return value;
},
"out": 'res2',
"verbose": true
});
Or otherwise alternately just sum your keys in the reduce stage and do all the division in the finalize stage if that suits you thinking better. But then you would need to do your "multiplication" part in the "mapper" for that to work.

MongoDB - Geospatial Index with Aggregation

I've read from the to docs that it is not possible to use a geospatial index on an aggregation with MongoDB. Is there an alternative to this? I am attempting to run a query take grab all activities within a certain radius, then group/sort them by the number of times that activity has occurred. Is there way around this issue?
You can use map-reduce on a geo query. Here is an example based on geo_mapreduce.js (from the mongodb jstests):
// setup test collection
var act_date = new Date(2010,06,07);
for (i = 1; i <= 10; i++) {
db.activity.insert( { "geo" : { "lat" : 32.68331909, "long" : 69.41610718 }, "date":act_date, "activity" : 9 * i } );
db.activity.insert( { "geo" : { "lat" : 35.01860809, "long" : 70.92027283 }, "date":act_date, "activity" : 3 } );
db.activity.insert( { "geo" : { "lat" : 31.11639023, "long" : 64.19970703 }, "date":act_date, "activity" : 11 } );
db.activity.insert( { "geo" : { "lat" : 32.64500046, "long" : 69.36251068 }, "date":act_date, "activity" : 9 } );
db.activity.insert( { "geo" : { "lat" : 33.23638916, "long" : 69.81360626 }, "date":act_date, "activity" : 22 } );
act_date.setDate(act_date.getDate() + 1);
}
db.activity.ensureIndex( { "geo" : "2d" } );
center = [ 32.68, 69.41 ];
radius = 10 / 111; // 10km; 1 arcdegree ~= 111km
geo_query = { geo : { '$within' : { '$center' : [ center, radius ] } } };
// map function
m = function() {
emit( this.date, { "activity" : this.activity } );
};
// reduce function
r = function(key, values) {
var total = 0;
for ( var i = 0; i < values.length; i++ ) {
total += values[i].activity;
}
return {"activity":total };
};
// mapreduce with geo query
res = db.activity.mapReduce( m, r, { out : { inline : 1 }, query : geo_query } );
// sort results
res.results.sort(function(a, b){return b.value.activity - a.value.activity})
for (var i=0; i < res.results.length; i++) {
print("Date: " + res.results[i]._id + " Activity: "
+ res.results[i].value.activity)
}