I have a collection that stores search query logs. It's two main attributes are user_id and search_query. user_id is null for a logged out user. I am trying to run a mapreduce job to find out the count and terms per user.
var map = function(){
if(this.user_id !== null){
emit(this.user_id, this.search_query);
}
}
var reduce = function(id, queries){
return Array.sum(queries + ",");
}
db.searchhistories.mapReduce(map,
reduce,
{
query: { "time" : {
$gte : ISODate("2013-10-26T14:40:00.000Z"),
$lt : ISODate("2013-10-26T14:45:00.000Z")
}
},
out : "mr2"
}
)
throws the following exception
Wed Nov 27 06:00:07 uncaught exception: map reduce failed:{
"errmsg" : "exception: assertion src/mongo/db/commands/mr.cpp:760",
"code" : 0,
"ok" : 0
}
I looked at mr.cpp L#760 but could not gather any vital information. What could be causing this?
My Collection has values like
> db.searchhistories.find()
{ "_id" : ObjectId("5247a9e03815ef4a2a005d8b"), "results" : 82883, "response_time" : 0.86, "time" : ISODate("2013-09-29T04:17:36.768Z"), "type" : 0, "user_id" : null, "search_query" : "awareness campaign" }
{ "_id" : ObjectId("5247a9e0606c791838005cba"), "results" : 39545, "response_time" : 0.369, "time" : ISODate("2013-09-29T04:17:36.794Z"), "type" : 0, "user_id" : 34225174, "search_query" : "eficaz eficiencia efectividad" }
Looking at the docs I could see that this is not possible in the slave. It will work perfectly fine in the master though. If you still want to use the slave then you have to use the following syntax.
db.searchhistories.mapReduce(map,
reduce,
{
query: { "time" : {
$gte : ISODate("2013-10-26T14:40:00.000Z"),
$lt : ISODate("2013-10-26T14:45:00.000Z")
}
},
out : { inline : 1 }
}
)
** Ensure that the output document size does not exceed 16MB limit while using inline function.
Related
Schema -
Database: yelp_camp,
Collection: campgrounds,
Number of records in the collection is equaled to Four
ISSUE - Duplicate records exist in the collection, wish to delete all except the first one.
The database collection snapshot,
{ "_id" : ObjectId("5cc9729f48ec2b0add99866e"), "name" : "CampAliBaba101234", "image" : "https://photosforclass.com/download/flickr-2770447094", "__v" : 0 }
{ "_id" : ObjectId("5cc974b46e587f0b00855b0d"), "name" : "CampAliBaba101234", "image" : "https://photosforclass.com/download/flickr-2770447094", "__v" : 0 }
{ "_id" : ObjectId("5cc9facd11f218081b57f8f0"), "name" : "CampAliBaba101234", "image" : "https://photosforclass.com/download/flickr-2770447094", "__v" : 0 }
{ "_id" : ObjectId("5cca658c5ecabc0a7ff79e4e"), "name" : "CampAliBaba101234", "image" : "https://photosforclass.com/download/flickr-2770447094", "__v" : 0 }
Tried using the below-mentioned code but in vain,
Tried below mentioned mongo query in shell,
db.campgrounds.ensureIndex({"name":1}, {unique:"true", dropDups:"true"})
Expected -
The duplicate records should be deleted and only one unique record should exist.
Actual -
The following error is returned,
{
"ok" : 0,
"errmsg" : "E11000 duplicate key error collection: yelp_camp.campgrounds index: image_1 dup key: { : \"https://photosforclass.com/download/flickr-2770447094\" }",
"code" : 11000,
"codeName" : "DuplicateKey"
}
Just try with this
db.getCollection('Test').find({
"name":"CampAliBaba101234"
}).forEach((data)=>{
var i = 0;
db.getCollection('Test').find( {
"name":"CampAliBaba101234"
} ).forEach((doc)=> {
if (i>0) db.getCollection('Test').remove( {
_id:data._id
} )
i++
} )
});
I am new to Mongo DB , I am using Mongo DB 2.4.4 . When we are trying to pass more than 100 records in Mapreduce we are getting following error :
com.mongodb.CommandFailureException: { "serverUsed" : "localhost/XXXX" , "errmsg" : "exception: TypeError: Cannot read property 'AAAA' of undefined\n at _funcs2:1:296\n at Array.forEach (native)\n at _funcs2 (_funcs2:1:252) near 'ssionEvent.AAAA>0)
Following is the MapReduceCommand being used:
MapReduceCommand(eventsCollection,map,reduce,null,MapReduceCommand.OutputType.INLINE, compoundquery);
I have identical records but if Number of records are more than 100 its showing above error , if it is less than 100 then process successfully.
We are using following mongo options in our application.
"maxWaitTime=300"
"connectionsPerHost=100"
"threadsAllowedToBlockForConnectionMultiplier=1500"
"socketKeepAlive=true"
"connectTimeout=60000"
"socketTimeout=60000"
"autoConnectRetry=true"
The Details about Map,reduce and compound query are as follows:
String map = "function() {"+
" if (this.Doc1 !== undefined & this.Doc1.innerDoc1 !== undefined) { "+
"emit({field1:this.Doc1.innerDoc1.field1,field2:this.Doc1.field2,field3:this.Doc1.field3,field4:this.Doc1.field4,field5:this.Doc1.field5},this);}";
String reduce = "function(keyMappedId, valuesPrices){"+
"var amtValue1=0;"+
"var processedObj=new Object();"+
"valuesPrices.forEach(function(doc){"+
if(doc.Doc1 !== undefined && doc.Doc1.AmtValue !== undefined && doc.Doc1.AmtValue>0){"+
"var amtNumber=new Number(doc.Doc1.AmtValue);"+
"amtValue1=amtNumber;}});"+
"if(amtValue1>0 && valuesPrices[0].Doc1 !== undefined){"+
"valuesPrices[0].Doc1.AmtValue=amtValue1; }"+
"processedObj=valuesPrices[0];"+
"return {aggregatedObject:processedObj};}";
DBObject compoundquery = QueryBuilder.start("_id").in(idList).get();
where idList will contain "_id" of the documents in the collection which i want to pass on to map reduce
MapReduceCommand cmd=null;
cmd = new MapReduceCommand("MyCollection",map,reduce,null,MapReduceCommand.OutputType.INLINE, compoundquery);
MapReduceOutput out = eventsCollection.mapReduce(cmd);
Adding the Error output as below: 103 records
{ "aggregatedObject" : { "aggregatedObject" : { "_id" : "Order_REC16904427" , "Status" : "PROCESSING" , "Doc1" : { "field1" : "ABCS57829" , "field2" : "XYZ" , "AmtValue" : 9000.0 , "field3" : "AAAABCD" , "Doc2" : { "Serial_No" : "1" , "field4" : "Order" , "field5" : "ABC_1234"} , "Aggregation_Status" : "Submitted"}}}
For 311 Records
{ "aggregatedObject" : { "aggregatedObject" : { "aggregatedObject" : { "aggregatedObject" : { "_id" : "Order_REC16904427" , "Status" : "PROCESSING" , "Doc1" : { "field1" : "ABCS57829" , "field2" : "XYZ" , "AmtValue" : 9000.0 , "field3" : "AAAABCD" , "Doc2" : { "Serial_No" : "1" , "field4" : "Order" , "field5" : "ABC_1234"} , "Aggregation_Status" : "Submitted"}}}}}
Basically, MongoDB supports no more than 100 levels of nesting for BSON documents.
(This discussion has same issue which you are facing. may be helpful I guess.)
I have collection with document structure :
{
'year' : 2014,
'month' : 1
}
I am executing the following operation :
db.collname.aggregate(
[
{
$project : {
'year100' : {$multiply : ["$year" , 100]},
'result' : { '$add' : ['$year100', '$month'] }
}
}
]
);
I get the following result :
{
"result" : [
{
"_id" : ObjectId("5563596c515a88832210f0e4"),
"year100" : 201400.0000000000000000,
"result" : null
},
}
Why is add operation returuning null value as against to actual value ? Please help.
MongoDb not allow to used same fields in project to arithmetic operation instead of one $project used two different projects like this :
db.collname.aggregate({ $project : { 'year100' : {$multiply : ["$year" , 100]} ,"month":"$month"} },{"$project":{"year100":1,"result":{"$add":["$year100","$month"]}}})
I am trying to run a map/reduce function in mongodb where I group by 3 different fields contained in objects in my collection. I can get the map/reduce function to run, but all the emitted fields run together in the output collection. I'm not sure this is normal or not, but outputting the data for analysis takes more work to clean up. Is there a way to separate them, then use mongoexport?
Let me show you what I mean:
The fields I am trying to group by are the day, user ID (or uid) and destination.
I run these functions:
map = function() {
day = (this.created_at.getFullYear() + "-" + (this.created_at.getMonth()+1) + "-" + this.created_at.getDate());
emit({day: day, uid: this.uid, destination: this.destination}, {count:1});
}
/* Reduce Function */
reduce = function(key, values) {
var count = 0;
values.forEach(function(v) {
count += v['count'];
}
);
return {count: count};
}
/* Output Function */
db.events.mapReduce(map, reduce, {query: {destination: {$ne:null}}, out: "TMP"});
The output looks like this:
{ "_id" : { "day" : "2012-4-9", "uid" : "1234456", "destination" : "Home" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "2345678", "destination" : "Home" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "3456789", "destination" : "Login" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "4567890", "destination" : "Contact" }, "value" : { "count" : 1 } }
{ "_id" : { "day" : "2012-4-9", "uid" : "5678901", "destination" : "Help" }, "value" : { "count" : 1 } }
When I attempt to use mongoexport, I can not separate day, uid, or destination by columns because the map combines the fields together.
What I would like to have would look like this:
{ { "day" : "2012-4-9" }, { "uid" : "1234456" }, { "destination" : "Home"}, { "count" : 1 } }
Is this even possible?
As an aside - I was able to make the output work by applying sed to the file and cleaning up the CSV. More work, but it worked. It would be ideal if I could get it out of mongodb in the correct format.
MapReduce only returns documents of the form {_id:some_id, value:some_value}
see: How to change the structure of MongoDB's map-reduce results?
I have a large dataset (about 1.1M documents) that I need to run mapreduce on.
The field to group on is an array named xref. Due to the size of the collection and the fact I'm doing this in a 32-bit environment, I'm trying to reduce the collection to another collection in a new database.
First, here's a data sample:
{ "_id" : ObjectId("4ec6d3aa61910ad451f12e01"),
"bii" : -32.9867,
"class" : 2456,
"decdeg" : -82.4856,
"lii" : 297.4896,
"name" : "HD 22237",
"radeg" : 50.3284,
"vmag" : 8,
"xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336",
"CP-82 65","GC 4125" ] }
{ "_id" : ObjectId("4ec6d44661910ad451f78eba"),
"bii" : -32.9901,
"class" : 2450,
"decdeg" : -82.4781,
"decpm" : 0.013,
"lii" : 297.4807,
"name" : "PPM 376283",
"radeg" : 50.3543,
"rapm" : 0.0357,
"vmag" : 8.4,
"xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336",
"CP-82 65","GC 4125" ] }
{ "_id" : ObjectId("4ec6d48a61910ad451feae04"),
"bii" : -32.9903,
"class" : 2450,
"decdeg" : -82.4779,
"decpm" : 0.027,
"hd_component" : 0,
"lii" : 297.4806,
"name" : "SAO 258336",
"radeg" : 50.3543,
"rapm" : 0.0355,
"vmag" : 8,
"xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336",
"CP-82 65","GC 4125" ] }
Here are the map and reduce functions (right now I'm only lii and bii fields):
function map() {
try {
emit(this.xref, {lii:this.lii, bii:this.bii});
} catch(e) {
}
}
function reduce(key, values) {
var result = {xref:key, lii: 0.0, bii: 0.0};
try {
values.forEach(function(value) {
if (value.lii && value.bii) {
result.lii += value.lii;
result.bii += value.bii;
}
});
result.bii /= values.length;
result.lii /= values.length;
} catch(e) {
}
return result;
}
Unfortunately, running this eventually comes up with an error message:
db.catalog.mapReduce(map, reduce, {out:{replace:"catalog2", db:"astro2"}});
Wed Nov 23 10:12:25 uncaught exception: map reduce failed:{
"assertion" : "_id cannot be an array",
"assertionCode" : 10099,
"errmsg" : "db assertion failure",
"ok" : 0
The xref field IS an array, but all values are equal in that array. Is it trying to use that array as the id field in the new collections?
Yes it is not possible to set _id as an array, because it has a special behavior for indexing.
The key you emit by is used as _id in the output collection.
Potentially this could work only with an "inline" output mode if the result is small, since it wont go to a collection.
But ideally you would translate the array into a string (for example concat the values) and use that as _id, or make it a sub-object instead of an array.
Also note that the result of your reduce function should not include the key.
Just return {lii: .., bii: ..}