Mongodb map reduce trivial query - mongodb

I have a below map:
var mapFunction = function() {
if(this.url.match(/http:\/\/test.com\/category\/.*?\/checkout/)) {
var key=this.em;
var value = {
url : 'checkout',
count : 1,
account_id:this.accId
}emit(key,value); };
if(this.url.match(/http:\/\/test.com\/landing/)) {
var key=this.em;
var value = {
url : 'landing',
count : 1,
account_id:this.accId
}emit(key,value); };
}
Then I have defined reduce something like below:
var reduceFunction = function (keys, values) {
var reducedValue = {count_checkout:0, count_landing:0};
for (var idx = 0; idx < values.length; idx++) {
if(values[idx].url=='checkout'){
reducedValue.count_checkout++;
}
else {
reducedValue.count_landing++;
}
}
return reducedValue;
}
Now, lets say I have only 1 record:
{
"_id" : ObjectId("516a7cff6dad5949ddf3f7b6"),
"ip" : "1.2.3.4",
"accId" : 123,
"em" : "testing#test.com",
"pgLdTs" : ISODate("2013-04-11T18:30:00Z"),
"url" : "http://test.com/category/prr/checkout",
"domain" : "www.test.com",
"pgUdTs" : ISODate("2013-04-14T09:55:11.682Z"),
"title" : "Test",
"ua" : "Mozilla",
"res" : "1024*768",
"rfr" : "www.google.com"
}
Now if I fire my map reduce like below:
db.test_views.mapReduce(mapFunction,reduceFunction,{out:{inline:1}})
The I get below result returned:
{
"_id" : "testing#test.com",
"value" : {
"url" : "checkout",
"count" : 1,
"account_id" : 123
}
}
So, its basically returning me the map. Now, if I go a add another document for this email id. Finally it becomes something like below.
{
"_id" : ObjectId("516a7cff6dad5949ddf3f7b6"),
"ip" : "1.2.3.4",
"accId" : 123,
"em" : "testing#test.com",
"pgLdTs" : ISODate("2013-04-11T18:30:00Z"),
"url" : "http://test.com/category/prr/checkout",
"domain" : "www.test.com",
"pgUdTs" : ISODate("2013-04-14T09:55:11.682Z"),
"title" : "Test",
"ua" : "Mozilla",
"res" : "1024*768",
"rfr" : "www.google.com"
}
{
"_id" : ObjectId("516a7e1b6dad5949ddf3f7b7"),
"ip" : "1.2.3.4",
"accId" : 123,
"em" : "testing#test.com",
"pgLdTs" : ISODate("2013-04-11T18:30:00Z"),
"url" : "http://test.com/category/prr/checkout",
"domain" : "www.test.com",
"pgUdTs" : ISODate("2013-04-14T09:59:55.326Z"),
"title" : "Test",
"ua" : "Mozilla",
"res" : "1024*768",
"rfr" : "www.google.com"
}
Then, I go again and fire the map reduce, it gives me proper results
{
"_id" : "testing#test.com",
"value" : {
"count_checkout" : 2,
"count_landing" : 0
}
}
Can anyone please help me out in understanding why it returns me a map for single document and doesn't do the counting in reduce.
Thanks for help.
-Lalit

Can anyone please help me out in understanding why it returns me a map for single document and doesn't do the counting in reduce.
The Reduce step combines documents with the same key into a single result document. If you only have one key in the data emitted by your Map function, the data is already "reduced" and the reduce() will not be called.
This is the expected behaviour of the MapReduce algorithm.

The reduce function should return the same type of value objects as the map function emits.
Like you've experienced, when there's a single value associated with a key - the reduce function will not be called at all .
From the MongoDB MapReduce Documentation:
Requirements for the reduce Function:
...
the type of the return object must be identical to the type of the value emitted by the map function to ensure that the following operations is true:
reduce(key, [ C, reduce(key, [ A, B ]) ] ) == reduce( key, [ C, A, B ] )

Related

Insert document into mongodb from existing table

I am trying to write a query in mongo that will create a new table, loop through my data set, and insert the TopExecutiveTitle into the new table. I also would like it to keep count of each position and only insert a position into the table when it is new.
This is what I have so far. This code loops through my table and inserts the TopExectuiveTitle into a new table. However, it does not group them together and keep count. How do I write my query so that it will?
db.car.find().forEach( function (x) {
db.TopExecutiveTable.insert({Topexecutivetitle: x.Topexecutivetitle})
});
Here is a sample of a document in my database.
{
"_id" : ObjectId("5a22c8e562c2e489c5df70fa"),
"2016rank" : 1,
"Dealershipgroupname" : "AutoNation Inc.?",
"Address" : "200 S.W. 1st Ave.",
"City/State/Zip" : "Fort Lauderdale, FL 33301",
"Phone" : "(954) 769-7000",
"Companywebsite" : "www.autonation.com",
"Topexecutive" : "Mike Jackson",
"Topexecutivetitle" : "chairman & CEO",
"Totalnewretailunits" : "337,622",
"Totalusedunits" : "225,713",
"Totalfleetunits" : 3,
"Totalwholesaleunits" : "82,342",
"Total_units" : "649,415",
"Total_number_of _dealerships" : 260,
"Grouprevenuealldepartments*" : "$21,609,000,000",
"2015rank" : 1
}
The result I would like is something like this
"Topexecutivetitle" : "chairman & CEO"
"Count" : 3
"Topexecutivetitle" : "president"
"Count" : 7
}
To do this you need to use the aggregate function of mongo, something like this:
db.car.aggregate([
{
$group:{
_id:"$Topexecutivetitle",
count:{$sum:1}
}
},
{
$project:{
Topexecutivetitle:"$_id",
count:1,
_id:0
}
},
{
$out:"result"
}])
This will give you your desired output and store it into a new collection "result":
{
"_id" : "president",
"count" : 1.0
},
{
"_id" : "chairman & CEO",
"count" : 3.0
}

MongoDB MapReduce producing different results for each document

This is a follow-up from this question, where I tried to solve this problem with the aggregation framework. Unfortunately, I have to wait before being able to update this particular mongodb installation to a version that includes the aggregation framework, so have had to use MapReduce for this fairly simple pivot operation.
I have input data in the format below, with multiple daily dumps:
"_id" : "daily_dump_2013-05-23",
"authors_who_sold_books" : [
{
"id" : "Charles Dickens",
"original_stock" : 253,
"customers" : [
{
"time_bought" : 1368627290,
"customer_id" : 9715923
}
]
},
{
"id" : "JRR Tolkien",
"original_stock" : 24,
"customers" : [
{
"date_bought" : 1368540890,
"customer_id" : 9872345
},
{
"date_bought" : 1368537290,
"customer_id" : 9163893
}
]
}
]
}
I'm after output in the following format, that aggregates across all instances of each (unique) author across all daily dumps:
{
"_id" : "Charles Dickens",
"original_stock" : 253,
"customers" : [
{
"date_bought" : 1368627290,
"customer_id" : 9715923
},
{
"date_bought" : 1368622358,
"customer_id" : 9876234
},
etc...
]
}
I have written this map function...
function map() {
for (var i in this.authors_who_sold_books)
{
author = this.authors_who_sold_books[i];
emit(author.id, {customers: author.customers, original_stock: author.original_stock, num_sold: 1});
}
}
...and this reduce function.
function reduce(key, values) {
sum = 0
for (i in values)
{
sum += values[i].customers.length
}
return {num_sold : sum};
}
However, this gives me the following output:
{
"_id" : "Charles Dickens",
"value" : {
"customers" : [
{
"date_bought" : 1368627290,
"customer_id" : 9715923
},
{
"date_bought" : 1368622358,
"customer_id" : 9876234
},
],
"original_stock" : 253,
"num_sold" : 1
}
}
{ "_id" : "JRR Tolkien", "value" : { "num_sold" : 3 } }
{
"_id" : "JK Rowling",
"value" : {
"customers" : [
{
"date_bought" : 1368627290,
"customer_id" : 9715923
},
{
"date_bought" : 1368622358,
"customer_id" : 9876234
},
],
"original_stock" : 183,
"num_sold" : 1
}
}
{ "_id" : "John Grisham", "value" : { "num_sold" : 2 } }
The even indexed documents have the customers and original_stock listed, but an incorrect sum of num_sold.
The odd indexed documents only have the num_sold listed, but it is the correct number.
Could anyone tell me what it is I'm missing, please?
Your problem is due to the fact that the format of the output of the reduce function should be identical to the format of the map function (see requirements for the reduce function for an explanation).
You need to change the code to something like the following to fix the problem, :
function map() {
for (var i in this.authors_who_sold_books)
{
author = this.authors_who_sold_books[i];
emit(author.id, {customers: author.customers, original_stock: author.original_stock, num_sold: author.customers.length});
}
}
function reduce(key, values) {
var result = {customers:[] , num_sold:0, original_stock: (values.length ? values[0].original_stock : 0)};
for (i in values)
{
result.num_sold += values[i].num_sold;
result.customers = result.customers.concat(values[i].customers);
}
return result;
}
I hope that helps.
Note : the change num_sold: author.customers.length in the map function. I think that's what you want

Get all objects for each tag

I'm new in mongodbs mapreduce and for sure I have not completely understood it for now. And I have a problem, which I try to solve for few days without success.
I have a collection of let's say posts with a tags field. Now I want to mapreduce a new collection of tags. Where every tag have an array of all posts ids that have this one particular tag assigned.
one of my attempts to do this (which doesn't do this right)
m = function() {
for (var i in this.tags) {
emit(this.tags[i], {"ids" : [this._id]});
};
}
r = function(key, emits) {
var total = {ids : []}
for (var i in emits) {
emits[i].ids.forEach(function(id) {
total.ids.push(id);
}
}
return total;
};
I know, that I have to pivot the date some how around, but I just cant get my head wrapped around it.
I think you're missing a ")" in your reduce function to close the emits[i].ids.forEach(). Is this what you're trying to do?
r = function (key, values) {
var total = {ids:[]};
for (var i in values) {
values[i].ids.forEach(
function (id){
total.ids.push(id);
}
);
}
return total;
}
input
{_id:2, tags: ["dog", "Jenna"]}
{_id:1, tags: ["cat", "Jenna"]}
result:
{"results" : [
{"_id" : "Jenna",
"value" : {"ids" : [2,1]}
},
{"_id" : "cat",
"value" : {"ids" : [1]}
},
{"_id" : "dog",
"value" : {"ids" : [2]}
}
],
"timeMillis" : 1,
"counts" : {
"input" : 2,
"emit" : 4,
"reduce" : 1,
"output" : 3
},
"ok" : 1,
}

Mongodb Map/Reduce - Multiple Group By

I am trying to run a map/reduce function in mongodb where I group by 3 different fields contained in objects in my collection. I can get the map/reduce function to run, but all the emitted fields run together in the output collection. I'm not sure this is normal or not, but outputting the data for analysis takes more work to clean up. Is there a way to separate them, then use mongoexport?
Let me show you what I mean:
The fields I am trying to group by are the day, user ID (or uid) and destination.
I run these functions:
map = function() {
day = (this.created_at.getFullYear() + "-" + (this.created_at.getMonth()+1) + "-" + this.created_at.getDate());
emit({day: day, uid: this.uid, destination: this.destination}, {count:1});
}
/* Reduce Function */
reduce = function(key, values) {
var count = 0;
values.forEach(function(v) {
count += v['count'];
}
);
return {count: count};
}
/* Output Function */
db.events.mapReduce(map, reduce, {query: {destination: {$ne:null}}, out: "TMP"});
The output looks like this:
{ "_id" : { "day" : "2012-4-9", "uid" : "1234456", "destination" : "Home" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "2345678", "destination" : "Home" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "3456789", "destination" : "Login" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "4567890", "destination" : "Contact" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "5678901", "destination" : "Help" }, "value" : { "count" : 1 } }
When I attempt to use mongoexport, I can not separate day, uid, or destination by columns because the map combines the fields together.
What I would like to have would look like this:
{ { "day" : "2012-4-9" }, { "uid" : "1234456" }, { "destination" : "Home"}, { "count" : 1 } }
Is this even possible?
As an aside - I was able to make the output work by applying sed to the file and cleaning up the CSV. More work, but it worked. It would be ideal if I could get it out of mongodb in the correct format.
MapReduce only returns documents of the form {_id:some_id, value:some_value}
see: How to change the structure of MongoDB's map-reduce results?

MongoDB: _id Cannot Be An Array

I have a large dataset (about 1.1M documents) that I need to run mapreduce on.
The field to group on is an array named xref. Due to the size of the collection and the fact I'm doing this in a 32-bit environment, I'm trying to reduce the collection to another collection in a new database.
First, here's a data sample:
{ "_id" : ObjectId("4ec6d3aa61910ad451f12e01"),
"bii" : -32.9867,
"class" : 2456,
"decdeg" : -82.4856,
"lii" : 297.4896,
"name" : "HD 22237",
"radeg" : 50.3284,
"vmag" : 8,
"xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336",
"CP-82 65","GC 4125" ] }
{ "_id" : ObjectId("4ec6d44661910ad451f78eba"),
"bii" : -32.9901,
"class" : 2450,
"decdeg" : -82.4781,
"decpm" : 0.013,
"lii" : 297.4807,
"name" : "PPM 376283",
"radeg" : 50.3543,
"rapm" : 0.0357,
"vmag" : 8.4,
"xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336",
"CP-82 65","GC 4125" ] }
{ "_id" : ObjectId("4ec6d48a61910ad451feae04"),
"bii" : -32.9903,
"class" : 2450,
"decdeg" : -82.4779,
"decpm" : 0.027,
"hd_component" : 0,
"lii" : 297.4806,
"name" : "SAO 258336",
"radeg" : 50.3543,
"rapm" : 0.0355,
"vmag" : 8,
"xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336",
"CP-82 65","GC 4125" ] }
Here are the map and reduce functions (right now I'm only lii and bii fields):
function map() {
try {
emit(this.xref, {lii:this.lii, bii:this.bii});
} catch(e) {
}
}
function reduce(key, values) {
var result = {xref:key, lii: 0.0, bii: 0.0};
try {
values.forEach(function(value) {
if (value.lii && value.bii) {
result.lii += value.lii;
result.bii += value.bii;
}
});
result.bii /= values.length;
result.lii /= values.length;
} catch(e) {
}
return result;
}
Unfortunately, running this eventually comes up with an error message:
db.catalog.mapReduce(map, reduce, {out:{replace:"catalog2", db:"astro2"}});
Wed Nov 23 10:12:25 uncaught exception: map reduce failed:{
"assertion" : "_id cannot be an array",
"assertionCode" : 10099,
"errmsg" : "db assertion failure",
"ok" : 0
The xref field IS an array, but all values are equal in that array. Is it trying to use that array as the id field in the new collections?
Yes it is not possible to set _id as an array, because it has a special behavior for indexing.
The key you emit by is used as _id in the output collection.
Potentially this could work only with an "inline" output mode if the result is small, since it wont go to a collection.
But ideally you would translate the array into a string (for example concat the values) and use that as _id, or make it a sub-object instead of an array.
Also note that the result of your reduce function should not include the key.
Just return {lii: .., bii: ..}