MongoDB/Mongoose: Can't put simplest MapReduce to work - mongodb

Hello all I'm trying to do is to get the count of each distinct departmentType:
fnMap = function() {
emit(this.departments.departmentType, {typeCount:1} );
}
fnReduce = function(key, values) {
var result = {typeCount: 0};
values.forEach(function(value) {
result.typeCount += value.brandCount;
});
return result;
};
var command = {
mapreduce : "clients",
query : {"departments.departmentType": {$exists: true}},
map : fnMap.toString(),
reduce : fnReduce.toString(),
//sort: {"departments.departmentType":1},
out: {inline: 1}
};
mongoose.connection.db.executeDbCommand(command, function(err, dbres) {
});
When executing the command, dbres.documents[0].results only contains 1 item with the total number of departmentTypes, instead of several items one for each departmentType with its count.
Any ideas what am I doing wrong?
Also, when I uncomment the SORT line, I get error "db assertion failure: could not create cursor over...", I believe the field name is written correctly.

Mongoose v3 has now a Model.mapreduce() function (see doc).
The full example shown is:
var o = {};
o.map = function () { emit(this.name, 1) }
o.reduce = function (k, vals) { return vals.length }
o.out = { replace: 'createdCollectionNameForResults' }
o.verbose = true;
User.mapReduce(o, function (err, model, stats) {
console.log('map reduce took %d ms', stats.processtime)
model.find().where('value').gt(10).exec(function (err, docs) {
console.log(docs);
});
})
The problem with count i believe is because in your fnReduce() function you are summit the results instead of displaying them in an array.

You can use:
db.clients.distinct("departments.departmentType")
That will give an array with all the distinct departmentType values.
There were two problems in your map/reduce. One is brandCount in reduce rather than typeCount. But more importantly, you are trying to emit once per document, when you need to emit once per department array element. Corrected (and slightly simplified) code:
> fnMap = function () {
this.departments.forEach(
function (d) {
emit(d.departmentType, 1);
}
);
}
> fnReduce = function (key, values) {
var result = 0;
values.forEach(
function (value) {result += value;});
return result;
}

Related

mongodb showing array of null when printing the outside of query

i am trying to push the resultant of the count to an array in mogodb query, while pushing it showing the array after that if print it outside of query it is showing empty array.
collection1 in db is like below
[{title:Home,
date:24-10-2016},
{title:Accesories,
date:13-02-2016}
]
my code
exports.listOfCategories=function(req,res){
collection1.find().exec(function (err, categories) {
if (err) {
return res.status(400).send({
message: errorHandler.getErrorMessage(err)
});
} else {
var categoryList = categories;
var catTitle;
var allCat = [];
// console.log(categoryList);
for (var i = 0; i < categoryList.length; i++) {
catTitle = categoryList[i].title;
contentCounts(catTitle);
function contentCounts(content, callback) {
var catName = new RegExp(content, 'i');
var mongoQuery = {
"ProCategory.title": catName
}
collection2.find(mongoQuery).count(function (err, count) {
generateContentArr(content, count)
});
}
function generateContentArr(content, count) {
allCat.push({
name: content,
count: count
});
console.log(JSON.stringify(allCat));
// Here it is showing the array what i pushed
}
}
console.log(JSON.stringify(allCat));
// Here it not showing the total array, it showing an empty array
res.json(allCat);
}
});
}
Thanks in advance
You are not waiting for the result of an async operation, in your case in the for loop you need to wait for the result of mongo operation, but as for loop is synchronous, you are just making calls to mongo but don't wait for the results, and print the empty array right after the loop.
I would suggest you to use promises instead of callbacks, I don't know which version of mongoose you are using but the last version have promise support for mongo methods like find and count. Here is an example for your case:
var Promise = require("bluebird");
function countByTitle(catTitle){
var mongoQuery = {"ProCategory.title": new RegExp(catTitle, 'i')}
return collection2.count(mongoQuery).then(function(count) {
return {
name: catTitle,
count: count
};
});
}
collection1.find().then(function (categories) {
var categoryList = categories;
var promises = [];
for (var i = 0; i < categoryList.length; i++) {
promises.push(countByTitle(categoryList[i].title));
}
return Promise.all(promises).then(results => {
console.log(JSON.stringify(results));
})
}).catch(function (err) {
//if there is any error while resolving the promises, this block will be called
return res.status(400).send({
message: errorHandler.getErrorMessage(err)
});
});

How to pass a test if expect fails

I have this code
it('This should pass anyway', function (done) {
testObj.testIt(regStr);
});
testObj
this.testIt = function (regStr) {
selector.count().then(function (orgCount) {
for (var curr = 0; curr < count; curr++) {
checkField(curr, regStr);
}
});
};
function checkField(curr, regStr) {
selector.get(curr).all(by.tagName('li')).get(0).getInnerHtml().then(function (text) {
expect(text).to.match(regStr, curr + '#ERR');
});
}
If one of these expects get a failure, test fails. How can i handle this? I mean - can i somehow count passed and failed expect()ations and return it? or, at least, dont let test break on first error.
I've tried try-catch, but nothing good happened.
it('This should pass anyway', function (done) {
try {
testObj.testIt(regStr);
} catch (e) {
console.log('#err' + e);
}
});
And then i wanted to use done(), but havent found any examples to do the similar. Can u please help me?
Sry for my english
UPD
You can return either null or a string from checkField(), join them up, and expect the array to be empty:
this.testIt = function (regStr) {
selector.count().then(function (orgCount) {
var errors = [];
for (var curr = 0; curr < orgCount; curr++) {
var e = checkField(curr, regStr);
if (e) { errors.push(e); }
}
assert.equal(0, errors.length, errors);
});
};
A cleaner approach would be to use map() to collect the data into an array:
var data = selector.map(function (elm) {
return elm.element(by.tagName('li')).getText();
});
expect(data).toEqual(["test1", "test2", "test3"]);

How should i update documents, each with different update data set, in mongodb collections

I have mongodb in which there is 3 huge collections say 'A', 'B' and 'C'
Each collection contains about 2 million documents.
There are certain properties for each of the document.
Each document need to be updated based on those values of certain properties, from which i can determine what should be the '$set' to that document.
currently i am using the same approach for each collection.
that to find all documents in batches. collection them in memory (which i think the culprit for the current approach), then one by one update them all.
For the first collection(that have similar data as in other collections), it takes 10 minutes to get completed. then the next two collections taking 2 hours approx to get the task done or mongodb client get crashed earlier.
There is something wrong and no desired in the current approach.
Model.collection.find({}).batchSize(BATCH).toArray(function(err, docs){
if(err || !docs || !docs.length)
return afterCompleteOneCollection(err);
var spec = function(index) {
if(index % 1000 === 0) console.log('at index : ' + index);
var toSet = { };
var toUnset = { };
var over = function(){
var afterOver = function(err){
if(err) return afterCompleteOneCollection(err);
if(index < docs.length - 1) spec(index+1);
else afterCompleteOneCollection(null);
};
var sb = Object.keys(toSet).length;
var ub = Object.keys(toUnset).length;
if(sb || ub) {
var all = {};
if(sb) all.$set = toSet;
if(ub) all.$unset = toUnset;
Model.collection.update({ _id : docs[index]._id }, all, {}, afterOver);
} else afterOver(null);
};
forEachOfDocument(docs[index], toSet, toUnset, over);
};
spec(0);
});
Is there any better solution for the same.?
The streaming approach from here http://mongodb.github.io/node-mongodb-native/api-generated/cursor.html#stream worked for me
This is what i am doing :
var stream = Model.collection.find().stream();
stream.on('data', function(data){
if(data){
var toSet = { };
var toUnset = { };
var over = function(){
var afterOver = function(err){
if(err) console.log(err);
};
var sb = Object.keys(toSet).length;
var ub = Object.keys(toUnset).length;
if(sb || ub) {
var all = {};
if(sb) all.$set = toSet;
if(ub) all.$unset = toUnset;
Model.collection.update({ _id : data._id }, all, {}, afterOver);
} else afterOver(null);
};
forEachOfDocument(data, toSet, toUnset, over);
}
});
stream.on('close', function() {
afterCompleteOneCollection();
});

Is it possible to Rename the output key in Mongo's MapReduce Result?

I am trying to perform an Inline mapreduce operation using pyMongo.
The code looks like this:
import pymongo
from bson.code import Code
con = pymongo.MongoClient()
map_func = Code("""
function() {
var all = this.members.concat(this.admins)
var group_id = this._id
all.forEach(function(_id) {
emit(_id, [group_id])
})
}
""")
reduce_func = Code("""
function(key, values) {
var ob = {};
ob[key] = [];
for (var i=0; i<values.length; i++) {
ob[key].push(values[i][0])
}
return ob
}
""")
finalize_func = Code("""
function(key, value) {
if (typeof(value.push) == "function") {
return value
} else {
return value[key]
}
}
""")
result = con.test.group.inline_map_reduce(
map_func,
reduce_func,
finalize=finalize_func)
import pprint; pprint.pprint(result)
Output for this operation is:
[{u'_id': u'135348133252952338363702',
u'value': [u'135457069105859781018098',
u'135661481520484615218098',
u'135391961249458761918098',
u'135758863859275369318098',
u'135156779012512657918098',
u'135285081801846289218098',
u'136040996346306049718098',
u'136237587535011048218098',
u'136862399436556328318098']},
{u'_id': u'136068596781820946163702',
u'value': [u'136068597966313224518098',
u'135156779012512657918098',
u'136415311739865096818098']}]
Is there any hook/operator by which I can rename the output field to any custom string example "group_id" instead of "values" ?
I have read Mongo's MapReduce documentation, but did not find any tip on how to accomplish this.
Using the MapReduce finalize hook described here, I reformatted my output to the field names I wanted:
db.collection.mapReduce(
mapFunction(),
reduceFunction(),
{
query: {params},
finalize: function(key, reduced_value) {
return {
my_field: key,
my_value: reduced_value
}
}
}
)
No, the reduced values are always in a field named value.

in mongo need to join two collections using Identity columns and emit the needed columns from both collections

I have book and author collection.in this name and works_written are the same value column respectively.so i tried the following script but it emit only first map values,second map values not emitted.
book = function() {
emit(this.id, {name: this.name,editions:this.editions});
}
author = function() {
emit(this.id, {name:this.name,works_written: this.works_writtten,});
}
r_b = function(k, values) {
var result = {};
values.forEach(function(value) {
var name;
for (name in value) {
if (value.hasOwnProperty(name)) {
result[name] = value[name];
}
}
});
return result;
};
r_a = function(k, values) {
var result = {};
values.forEach(function(value) {
var works_written;
for (works_written in value) {
if (value.hasOwnProperty(works_written)) {
result[works_written] = value[works_written];
}
}
});
return result;
};
res = db.book.mapReduce(book, r_ja, {out: {reduce: 'joined'}})
res = db.author.mapReduce(author, r_jp, {out: {reduce: 'joined'}})
can someone help me out?
From looking at your code, it seems like you have two collections, "book" and "author". Each book is structured as
{
id: <some id>,
name: <some name>,
editions: <comma-separated string of editions>
}
and each author is structured as
{
id: <some id>,
name: <some name>,
works_written: <comma-separated string of works written>
}
It would be more reasonable to store both works_written and editions as arrays rather than comma-separated lists each packed into an individual string. This would make iterating over the array possible.
Additionally, do you have multiple documents for each author and each book? If not, you do not need a mapreduce to do what you are attempting to do - a simple find() should work.
In case I have misinterpreted, what exactly are you attempting to do?