Incorrect response to mapReduce query in mongo-db - mongodb

I have 1000 user records in collecton, in which 459 document has gender male and remaining as female
//document structure
> db.user_details.find().pretty()
{
"_id" : ObjectId("557e610d626754910f0974a4"),
"id" : 0,
"name" : "Leanne Flinn",
"email" : "leanne.flinn#unilogic.com",
"work" : "Unilogic",
"dob" : "Fri Jun 11 1965 20:50:58 GMT+0530 (IST)",
"age" : 5,
"gender" : "female",
"salary" : 35696,
"hobbies" : "Acrobatics,Meditation,Music"
}
{
"_id" : ObjectId("557e610d626754910f0974a5"),
"id" : 1,
"name" : "Edward Young",
"email" : "edward.young#solexis.com",
"work" : "Solexis",
"dob" : "Wed Feb 12 1941 16:45:53 GMT+0530 (IST)",
"age" : 1,
"gender" : "female",
"salary" : 72291,
"hobbies" : "Acrobatics,Meditation,Music"
}
{
"_id" : ObjectId("557e610d626754910f0974a6"),
"id" : 2,
"name" : "Haydee Milligan",
"email" : "haydee.milligan#dalserve.com",
"work" : "Dalserve",
"dob" : "Tue Sep 13 1994 13:45:04 GMT+0530 (IST)",
"age" : 17,
"gender" : "male",
"salary" : 20026,
"hobbies" : "Papier-Mache"
}
{
"_id" : ObjectId("557e610d626754910f0974a7"),
"id" : 3,
"name" : "Lyle Keesee",
"email" : "lyle.keesee#terrasys.com",
"work" : "Terrasys",
"dob" : "Tue Apr 25 1922 13:39:46 GMT+0530 (IST)",
"age" : 79,
"gender" : "female",
"salary" : 48032,
"hobbies" : "Acrobatics,Meditation,Music"
}
{
"_id" : ObjectId("557e610d626754910f0974a8"),
"id" : 4,
"name" : "Shea Mercer",
"email" : "shea.mercer#pancast.com",
"work" : "Pancast",
"dob" : "Mon Apr 08 1935 06:10:30 GMT+0530 (IST)",
"age" : 51,
"gender" : "male",
"salary" : 31511,
"hobbies" : "Acrobatics,Photography,Papier-Mache"
}
Number of users in each gender
> db.user_details.find({gender:'male'}).count()
459
>
> db.user_details.find({gender:'female'}).count()
541
> db.user_details.find({name:{$ne:null}}).count()
1000
> db.user_details.find({age:{$ne:null}}).count()
1000
Map reduce code
mapper = function(){
emit(this.gender, {name:this.name,age:this.age})
}
reducer = function(gender, users){
var res = 0;
users.forEach(function(user){
res = res + 1
})
return res;
}
db.user_details.mapReduce(mapper, reducer, {out: {inline:1}})
Why map reduce result has only 112 documents? It should contain 459 and 541 for male and female respectively, isn't it?
// Map reduce result
{
"results" : [
{
"_id" : "female",
"value" : 56
},
{
"_id" : "male",
"value" : 46
}
],
"timeMillis" : 45,
"counts" : {
"input" : 1000,
"emit" : 1000,
"reduce" : 20,
"output" : 2
},
"ok" : 1
}
Note : I know this is not a proper way to use map reduce, Actually i faced some more creepy problem in map reduce. Once i get solution to this question i could solve that

Your problem here is that you have missed one of the core concepts of how mapReduce works. The relevant documentation that explains this is found here:
MongoDB can invoke the reduce function more than once for the same key. In this case, the previous output from the reduce function for that key will become one of the input values to the next reduce function invocation for that key.
And then also a bit later:
the type of the return object must be identical to the type of the value emitted by the map function
What those two statements mean is you need to use the exact same signature issued from both the mapper and the reducer functions as the reduce process will indeed get called "multiple times".
This is how mapReduce deals with large data, but not necessarily processing all of the same values for a given "key" at once, but doing it in incremental "chunks":
There fore if all you want in the output is a "number" then all you "emit" is just a "number" as well:
db.collection.mapReduce(
function() {
emit(this.gender, this.age);
},
function(key,values) {
return Array.sum( values )
},
{ "out": { "inline": 1 } }
)
Or just "count" per type:
db.collection.mapReduce(
function() {
emit(this.gender, 1);
},
function(key,values) {
return Array.sum( values )
},
{ "out": { "inline": 1 } }
)
The point is "you need to put out the same as what you put in", as it will likely "go back in again". So whatever data you want to collect, the output structure for both mapper and reducer must be the same.

This is probably wrong.
users.forEach(function(user){
res = res + 1
})
Try this,
function(gender, users){
return Array.sum( users)
}

There is a mistake in the reduce function.
MONGODB reduce function can be called multiple times for the same KEY, so in your reduce code its getting overridden.
Also in map function you are emmitting the document of structure { user, age}, but in reduce function you are returning the count.
reduce = function(gender, doc) {
reducedVal = { user: 0, age: 0 };
for (var idx = 0; idx < doc.length; idx++) {
reducedVal.user += 1 ;
reducedVal.age += 1;
}
return reducedVal;
};
please check the below link as well:
http://thejackalofjavascript.com/mapreduce-in-mongodb/

This is a proper way to use map reduce(), for display gender-wise count of users
db.yourCollectionName.mapReduce(
function(){
emit(this.gender,1);
},
function(k,v){
return Array.sum(v);
},
{out:"genderCount"}
);
db.genderCount.find();

Related

MongoDB MapReduce, second argument of reduce function is multidimensional array

I tried to use mapReduce for my collection. Just for debug I returned vals value passed as second argument do reduce function, like this:
db.runCommand({
"mapreduce":"MyCollection",
"map":function() {
emit( {
country_code:this.cc,
partner:this.di,
registeredPeriod:Math.floor((this.ca - 1399240800)/604800)
},
{
count:Math.ceil((this.lla - this.ca)/86400)
});
},
"reduce":function(k, vals) {
return {
'count':vals
};
},
"query":{
"ca":{
"$gte":1399240800
},
"di":405,
"cc":"1"
},
"out":{
"inline":true
}
});
And I got result like this:
{
"results" : [
{
"_id" : {
"country_code" : "1",
"distribution" : 405,
"installationPeriod" : 0
},
"value" : {
"count" : [
{
"count" : 37
},
{
"count" : 38
}
]
}
},
{
"_id" : {
"country_code" : "1",
"distribution" : 405,
"installationPeriod" : 1
},
"value" : {
"count" : 36
}
},
{
"_id" : {
"country_code" : "1",
"distribution" : 405,
"installationPeriod" : 4
},
"value" : {
"count" : [
{
"count" : [
{
"count" : 16
},
{
"count" : 16
}
]
},
{
"count" : 15
}
]
}
}
],
"timeMillis" : 38,
"counts" : {
"input" : 130,
"emit" : 130,
"reduce" : 5,
"output" : 6
},
"ok" : 1
}
I really don't know why I got multidimensional array as second argument for my reduce function. I mean about this part of result:
{
"_id" : {
"country_code" : "1",
"distribution" : 405,
"installationPeriod" : 4
},
"value" : {
"count" : [
{
"count" : [ // <= Why is this multidimensional?
{
"count" : 16
}
Why is this multidimensional? And why key of embedded array is same like returned from reduce function?
The reason is because this is mapReduce works. From the documentation point:
MongoDB can invoke the reduce function more than once for the same key. In this case, the previous output from the reduce function for that key will become one of the input values to the next reduce function invocation for that key.
And a later point:
the type of the return object must be identical to the type of the value emitted by the map function to ensure that the following operations is true:
So even though you have not "changed the signature" as that documentation points to, you are still only processing n items at once in one reduce pass and then another n items in the next pass. What happens in the eventual processing of this is that the array that was returned in one fragment is combined with the array from another fragment.
So what happened is your reduce returns an array, but it is not "all" of the items you emitted for the key, just some of them. Then another reduce on the same "key" processes more items. Finally those two arrays (or probably more) are again sent to the reduce, in an attempt to actually "reduce" those items as is intended.
That is the general concept, so it is no surprise that when you are just pushing back the array then that is what you get.
Short version, mapReduce processes the ouput "keys" in chunks and not all at once. Better to learn that now before it becomes a problem for you later.

get undefined value in mongodb MapReduce

I tried to use twice MapReduce aggregation to get unique user number per month.
The first MR function work out a mr_buyer_payment collection, like this:
{ "_id" : { "u" : "01329f19-27b0-435b-9ca1-450984024a31", "tid" : ISODate("2013-09-01T00:00:00Z") }, "value" : { "payment" : 38, "count_pay" : 1 } }
{ "_id" : { "u" : "264dd104-b934-490b-988e-5822fd7970f6", "tid" : ISODate("2013-09-01T00:00:00Z") }, "value" : { "payment" : 4.99, "count_pay" : 1 } }
{ "_id" : { "u" : "27bb8f72-a13e-4676-862c-02f41fea1bc0", "tid" : ISODate("2013-09-01T00:00:00Z") }, "value" : { "payment" : 11.98, "count_pay" : 2 } }
The second MR function works well with small data set , but when query grows more than 100 records, it gets wrong result , some value is NaN.
The debug log shows some value in Reduce function like v.payment, v.count_user became undefine.
date:Sun Jun 30 2013 17:00:00 GMT-0700 (PDT) value:undefined / 162 / undefined
And the MR result info is wired:
{
"result" : "mr_buyer_all",
"timeMillis" : 29,
"counts" : {
"input" : 167,
"emit" : 167,
"reduce" : 6, // it should be 3, as same as "output" number
"output" : 3
},
"ok" : 1,
}
This is 2nd MR function:
db.mr_buyer_payment.mapReduce(
function(){
var key = this._id.tid;
var value = {
payment:this.value.payment,
count_pay:this.value.count_pay,
count_user:1
};
if (value.count_pay>0)
{
print("date:"+key+" u:"+this._id.u+"value:"+value.payment+" / "+value.count_pay+" / "+value.count_user);
emit(key,value);
}
},
function(key,values){
var result = {revenue:0,count_pay:0,user:0};
values.forEach(function(v){
if (!v.count_user)
{
print("date:"+key+" "+"value:"+v.payment+" / "+v.count_pay+" / "+v.count_user);
} else
{
result.revenue += v.payment;
result.count_pay += v.count_pay;
result.user += v.count_user;
}
});
return result;
},
{
out:{replace:"mr_buyer_all"}
}
)
The sub-document in Reduce function should use same format as one in Map function. So the solution is :
function(key,values){
// the following key must be as same as the object in map
var r = {payment:0,count_pay:0,count_user:0}
values.forEach(function(v){
r.payment += v.payment;
r.count_pay += v.count_pay;
r.count_user += v.count_user;
});
return r;
},

MongoDB MapReduce producing different results for each document

This is a follow-up from this question, where I tried to solve this problem with the aggregation framework. Unfortunately, I have to wait before being able to update this particular mongodb installation to a version that includes the aggregation framework, so have had to use MapReduce for this fairly simple pivot operation.
I have input data in the format below, with multiple daily dumps:
"_id" : "daily_dump_2013-05-23",
"authors_who_sold_books" : [
{
"id" : "Charles Dickens",
"original_stock" : 253,
"customers" : [
{
"time_bought" : 1368627290,
"customer_id" : 9715923
}
]
},
{
"id" : "JRR Tolkien",
"original_stock" : 24,
"customers" : [
{
"date_bought" : 1368540890,
"customer_id" : 9872345
},
{
"date_bought" : 1368537290,
"customer_id" : 9163893
}
]
}
]
}
I'm after output in the following format, that aggregates across all instances of each (unique) author across all daily dumps:
{
"_id" : "Charles Dickens",
"original_stock" : 253,
"customers" : [
{
"date_bought" : 1368627290,
"customer_id" : 9715923
},
{
"date_bought" : 1368622358,
"customer_id" : 9876234
},
etc...
]
}
I have written this map function...
function map() {
for (var i in this.authors_who_sold_books)
{
author = this.authors_who_sold_books[i];
emit(author.id, {customers: author.customers, original_stock: author.original_stock, num_sold: 1});
}
}
...and this reduce function.
function reduce(key, values) {
sum = 0
for (i in values)
{
sum += values[i].customers.length
}
return {num_sold : sum};
}
However, this gives me the following output:
{
"_id" : "Charles Dickens",
"value" : {
"customers" : [
{
"date_bought" : 1368627290,
"customer_id" : 9715923
},
{
"date_bought" : 1368622358,
"customer_id" : 9876234
},
],
"original_stock" : 253,
"num_sold" : 1
}
}
{ "_id" : "JRR Tolkien", "value" : { "num_sold" : 3 } }
{
"_id" : "JK Rowling",
"value" : {
"customers" : [
{
"date_bought" : 1368627290,
"customer_id" : 9715923
},
{
"date_bought" : 1368622358,
"customer_id" : 9876234
},
],
"original_stock" : 183,
"num_sold" : 1
}
}
{ "_id" : "John Grisham", "value" : { "num_sold" : 2 } }
The even indexed documents have the customers and original_stock listed, but an incorrect sum of num_sold.
The odd indexed documents only have the num_sold listed, but it is the correct number.
Could anyone tell me what it is I'm missing, please?
Your problem is due to the fact that the format of the output of the reduce function should be identical to the format of the map function (see requirements for the reduce function for an explanation).
You need to change the code to something like the following to fix the problem, :
function map() {
for (var i in this.authors_who_sold_books)
{
author = this.authors_who_sold_books[i];
emit(author.id, {customers: author.customers, original_stock: author.original_stock, num_sold: author.customers.length});
}
}
function reduce(key, values) {
var result = {customers:[] , num_sold:0, original_stock: (values.length ? values[0].original_stock : 0)};
for (i in values)
{
result.num_sold += values[i].num_sold;
result.customers = result.customers.concat(values[i].customers);
}
return result;
}
I hope that helps.
Note : the change num_sold: author.customers.length in the map function. I think that's what you want

Mongodb map reduce trivial query

I have a below map:
var mapFunction = function() {
if(this.url.match(/http:\/\/test.com\/category\/.*?\/checkout/)) {
var key=this.em;
var value = {
url : 'checkout',
count : 1,
account_id:this.accId
}emit(key,value); };
if(this.url.match(/http:\/\/test.com\/landing/)) {
var key=this.em;
var value = {
url : 'landing',
count : 1,
account_id:this.accId
}emit(key,value); };
}
Then I have defined reduce something like below:
var reduceFunction = function (keys, values) {
var reducedValue = {count_checkout:0, count_landing:0};
for (var idx = 0; idx < values.length; idx++) {
if(values[idx].url=='checkout'){
reducedValue.count_checkout++;
}
else {
reducedValue.count_landing++;
}
}
return reducedValue;
}
Now, lets say I have only 1 record:
{
"_id" : ObjectId("516a7cff6dad5949ddf3f7b6"),
"ip" : "1.2.3.4",
"accId" : 123,
"em" : "testing#test.com",
"pgLdTs" : ISODate("2013-04-11T18:30:00Z"),
"url" : "http://test.com/category/prr/checkout",
"domain" : "www.test.com",
"pgUdTs" : ISODate("2013-04-14T09:55:11.682Z"),
"title" : "Test",
"ua" : "Mozilla",
"res" : "1024*768",
"rfr" : "www.google.com"
}
Now if I fire my map reduce like below:
db.test_views.mapReduce(mapFunction,reduceFunction,{out:{inline:1}})
The I get below result returned:
{
"_id" : "testing#test.com",
"value" : {
"url" : "checkout",
"count" : 1,
"account_id" : 123
}
}
So, its basically returning me the map. Now, if I go a add another document for this email id. Finally it becomes something like below.
{
"_id" : ObjectId("516a7cff6dad5949ddf3f7b6"),
"ip" : "1.2.3.4",
"accId" : 123,
"em" : "testing#test.com",
"pgLdTs" : ISODate("2013-04-11T18:30:00Z"),
"url" : "http://test.com/category/prr/checkout",
"domain" : "www.test.com",
"pgUdTs" : ISODate("2013-04-14T09:55:11.682Z"),
"title" : "Test",
"ua" : "Mozilla",
"res" : "1024*768",
"rfr" : "www.google.com"
}
{
"_id" : ObjectId("516a7e1b6dad5949ddf3f7b7"),
"ip" : "1.2.3.4",
"accId" : 123,
"em" : "testing#test.com",
"pgLdTs" : ISODate("2013-04-11T18:30:00Z"),
"url" : "http://test.com/category/prr/checkout",
"domain" : "www.test.com",
"pgUdTs" : ISODate("2013-04-14T09:59:55.326Z"),
"title" : "Test",
"ua" : "Mozilla",
"res" : "1024*768",
"rfr" : "www.google.com"
}
Then, I go again and fire the map reduce, it gives me proper results
{
"_id" : "testing#test.com",
"value" : {
"count_checkout" : 2,
"count_landing" : 0
}
}
Can anyone please help me out in understanding why it returns me a map for single document and doesn't do the counting in reduce.
Thanks for help.
-Lalit
Can anyone please help me out in understanding why it returns me a map for single document and doesn't do the counting in reduce.
The Reduce step combines documents with the same key into a single result document. If you only have one key in the data emitted by your Map function, the data is already "reduced" and the reduce() will not be called.
This is the expected behaviour of the MapReduce algorithm.
The reduce function should return the same type of value objects as the map function emits.
Like you've experienced, when there's a single value associated with a key - the reduce function will not be called at all .
From the MongoDB MapReduce Documentation:
Requirements for the reduce Function:
...
the type of the return object must be identical to the type of the value emitted by the map function to ensure that the following operations is true:
reduce(key, [ C, reduce(key, [ A, B ]) ] ) == reduce( key, [ C, A, B ] )

Merging two collections in MongoDB

I've been trying to use MapReduce in MongoDB to do what I think is a simple procedure. I don't know if this is the right approach, of if I should even be using MapReduce. I googled what keywords I thought of and tried to hit the docs where I thought I would have the most success - but nothing. Maybe I'm thinking too hard about this?
I have two collections: details and gpas
details is made up of a whole bunch of documents (3+ million). The studentid element can be repeated two times, one for each year, like the following:
{ "_id" : ObjectId("4d49b7yah5b6d8372v640100"), "classes" : [1,17,19,21], "studentid" : "12345a", "year" : 1}
{ "_id" : ObjectId("4d76b7oij7s2d8372v640100"), "classes" : [2,12,19,22], "studentid" : "98765a", "year" : 1}
{ "_id" : ObjectId("4d49b7oij7s2d8372v640100"), "classes" : [32,91,101,217], "studentid" : "12345a", "year" : 2}
{ "_id" : ObjectId("4d76b7rty7s2d8372v640100"), "classes" : [1,11,18,22], "studentid" : "24680a", "year" : 1}
{ "_id" : ObjectId("4d49b7oij7s2d8856v640100"), "classes" : [32,99,110,215], "studentid" : "98765a", "year" : 2}
...
gpas has elements with the same studentid's from details. Only one entry per studentid, like this:
{ "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "overall" : 97, "subscore": 1}
{ "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "overall" : 85, "subscore": 5}
{ "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "overall" : 76, "subscore": 2}
...
In the end I want to have a collection with one row for each student in this format:
{ "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "classes_1": [1,17,19,21], "classes_2": [32,91,101,217], "overall" : 97, "subscore": 1}
{ "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "classes_1": [2,12,19,22], "classes_2": [32,99,110,215], "overall" : 85, "subscore": 5}
{ "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "classes_1": [1,11,18,22], "classes_2": [], "overall" : 76, "subscore": 2}
...
The way I was going to do this was by running MapReduce like this:
var mapDetails = function() {
emit(this.studentid, {studentid: this.studentid, classes: this.classes, year: this.year, overall: 0, subscore: 0});
};
var mapGpas = function() {
emit(this.studentid, {studentid: this.studentid, classes: [], year: 0, overall: this.overall, subscore: this.subscore});
};
var reduce = function(key, values) {
var outs = { studentid: "0", classes_1: [], classes_2: [], overall: 0, subscore: 0};
values.forEach(function(value) {
if (value.year == 0) {
outs.overall = value.overall;
outs.subscore = value.subscore;
}
else {
if (value.year == 1) {
outs.classes_1 = value.classes;
}
if (value.year == 2) {
outs.classes_2 = value.classes;
}
outs.studentid = value.studentid;
}
});
return outs;
};
res = db.details.mapReduce(mapDetails, reduce, {out: {reduce: 'joined'}})
res = db.gpas.mapReduce(mapGpas, reduce, {out: {reduce: 'joined'}})
But when I run it, this is my resulting collection:
{ "_id" : "12345a", "value" : { "studentid" : "12345a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 97, "subscore" : 1 } }
{ "_id" : "98765a", "value" : { "studentid" : "98765a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 85, "subscore" : 5 } }
{ "_id" : "24680a", "value" : { "studentid" : "24680a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 76, "subscore" : 2 } }
I'm missing the classes arrays.
Also, as an aside, how do I access the elements in resulting MapReduce value element? Does MapReduce always output to value or whatever else you name it?
This is similar to a question that was asked on the MongoDB-users Google Groups.
https://groups.google.com/group/mongodb-user/browse_thread/thread/60a8b683e2626ada?pli=1
The answer references an on-line tutorial which looks similar to your example:
http://tebros.com/2011/07/using-mongodb-mapreduce-to-join-2-collections/
For more information on MapReduce in MongoDB, please see the documentation:
http://www.mongodb.org/display/DOCS/MapReduce
Additionally, there is a useful step-by-step walkthrough of how a MapReduce operation works in the "Extras" Section of the MongoDB Cookbook article titled, "Finding Max And Min Values with Versioned Documents":
http://cookbook.mongodb.org/patterns/finding_max_and_min/
Forgive me if you have already read some of the referenced documents. I have included them for the benefit of other users who may be reading this post and new to using MapReduce in MongoDB
It is important that the outputs from the 'emit' statements in the Map functions match the outputs of the Reduce function. If there is only one document output by the Map function, the Reduce function might not be run at all, and then your output collection will have mismatched documents.
I have slightly modified your map statements to emit documents in the format of your desired output, with two separate "classes" arrays.
I have also reworked your reduce statement to add new classes to the classes_1 and classes_2 arrays, only if they do not already exist.
var mapDetails = function(){
var output = {studentid: this.studentid, classes_1: [], classes_2: [], year: this.year, overall: 0, subscore: 0}
if (this.year == 1) {
output.classes_1 = this.classes;
}
if (this.year == 2) {
output.classes_2 = this.classes;
}
emit(this.studentid, output);
};
var mapGpas = function() {
emit(this.studentid, {studentid: this.studentid, classes_1: [], classes_2: [], year: 0, overall: this.overall, subscore: this.subscore});
};
var r = function(key, values) {
var outs = { studentid: "0", classes_1: [], classes_2: [], overall: 0, subscore: 0};
values.forEach(function(v){
outs.studentid = v.studentid;
v.classes_1.forEach(function(class){if(outs.classes_1.indexOf(class)==-1){outs.classes_1.push(class)}})
v.classes_2.forEach(function(class){if(outs.classes_2.indexOf(class)==-1){outs.classes_2.push(class)}})
if (v.year == 0) {
outs.overall = v.overall;
outs.subscore = v.subscore;
}
});
return outs;
};
res = db.details.mapReduce(mapDetails, r, {out: {reduce: 'joined'}})
res = db.gpas.mapReduce(mapGpas, r, {out: {reduce: 'joined'}})
Running the two MapReduce operations results in the following collection, which matches your desired format:
> db.joined.find()
{ "_id" : "12345a", "value" : { "studentid" : "12345a", "classes_1" : [ 1, 17, 19, 21 ], "classes_2" : [ 32, 91, 101, 217 ], "overall" : 97, "subscore" : 1 } }
{ "_id" : "24680a", "value" : { "studentid" : "24680a", "classes_1" : [ 1, 11, 18, 22 ], "classes_2" : [ ], "overall" : 76, "subscore" : 2 } }
{ "_id" : "98765a", "value" : { "studentid" : "98765a", "classes_1" : [ 2, 12, 19, 22 ], "classes_2" : [ 32, 99, 110, 215 ], "overall" : 85, "subscore" : 5 } }
>
MapReduce always outputs documents in the form of {_id:"id", value:"value"}
There is more information available on working with sub-documents in the document titled, "Dot Notation (Reaching into Objects)":
http://www.mongodb.org/display/DOCS/Dot+Notation+%28Reaching+into+Objects%29
If you would like the output of MapReduce to appear in a different format, you will have to do that programmatically in your application.
Hopefully this will improve your understanding of MapReduce, and get you one step closer to producing your desired output collection. Good Luck!
You cannot use m/r for this since that is designed to only apply on one collection. Reading from more than one collection will break sharding compatibility and is therefore not allowed. You can do what you want with either the new aggregation framework (2.1+) or do this inside your application.