Mongodb - Map-Reduce - Complete data is not returned - mongodb

I am using Map Reduce. The problem is that if the no. input of documents is > 100, then I am not getting the expected no. of results but if the no. of input documents is <= 100, then I am getting the results as expected.
Sample output that I am getting:
{
"_id" : "5504",
"value" : [
ObjectId("51c921bae4b0f0f776b339d2"),
ObjectId("51b06b5be4b021e44bc69755")
]
}
Problem: If there are <= 100 documents for user (id:5504), then I am getting that many no. of ids in the output array but if the no. of documents >100, then I am getting very few ids in the output array. I got the above output when the no. of documents for this user was 101, but when it was 100, I got 100 ids. Why this strange behaviour and what's the solution for this?
Map Function:
db.system.js.save({
_id: "map1",
value: function () {
var value = {
"data": [{
"_id": this._id,
"creation_time": this.creation_time
}]
};
emit(this.user_id, value);
}
});
Reduce Function:
db.system.js.save({
_id: "reduce1",
value: function (key, values) {
var reducedValue = [];
for (var i = 0; i < values.length; i++) {
reducedValue.push({
"_id": values[i].data[0]._id,
"creation_time": values[i].data[0].creation_time
});
}
return {
data: reducedValue
};
}
});
Finalize Function:
db.system.js.save({
_id: "finalize1",
value: function (key, reducedValue) {
var a = reducedValue.data.sort(compare1);
var ids = [];
for (var i = 0; i < a.length; i++) {
ids.push(a[i]._id);
}
return ids;
}
});
Compare Function:
db.system.js.save({
_id: "compare1",
value: function (a, b) {
if (a.creation_time < b.creation_time) return 1;
if (a.creation_time > b.creation_time) return -1;
return 0;
}
});
MapReduce() call
db.notifications.mapReduce(map1, reduce1, {out: "notifications_result", query: {delivered:true, user_id:"5504"}, finalize: finalize1});

Since MongoDB could call reduce function many times, you must ensure Function Idempotence. A little modification on your reduce function solves the problem:
db.system.js.save({
_id: "reduce1",
value: function (key, values) {
var reducedValue = [];
for (var i = 0; i < values.length; i++) {
for(var j = 0; j < values[i].data.length; j++) {
reducedValue.push({
"_id": values[i].data[j]._id,
"creation_time": values[i].data[j].creation_time
});
}
}
return {
data: reducedValue
};
}
});
Note that now the values[i].dataarray is traversed too, because the return of other reduce1 calls are in the values array.

Related

How can I pass a variable in sort funtcion of mongobd?

I want to pass this name variable in sort function. I am getting the value of name in console.log but its not working in sort function.
var coldata = req.body.order[0].column;
var name = req.body.columns[coldata].data; // Want to pass this variable in sort()
var first = req.body.order[0].dir;
var last = req.body.order[0].dir;
var x,y;
if (first == 'asc'){
x = 1
}else{
x = -1;
}
if (last == 'asc'){
y = 1
}else{
y = -1;
}
var searchStr = req.body.search.value;
if(req.body.search.value)
{
var regex = new RegExp(req.body.search.value, "i")
searchStr = { $or: [{'firstname':regex },{'lastname': regex}] };
}
else
{
searchStr={};
}
console.log(req.body.search.value)
var recordsTotal = 0;
var recordsFiltered=0;
console.log(searchStr);
db.count({}, function(err, c) {
recordsTotal=c;
db.count(searchStr, function(err, c) {
recordsFiltered=c;
db.find(searchStr, 'firstname lastname',{'skip': Number( req.body.start), 'limit': Number(req.body.length) }, function (err, results) {
if (err) {
console.log('error while getting results'+err);
return;
}
var data = JSON.stringify({
"draw": req.body.draw,
"recordsFiltered": recordsFiltered,
"recordsTotal": recordsTotal,
"data": results
});
res.send(data);
}).sort({name:x});// Not getting value of name here
});
});
});
You can use an aggregation pipeline
const sort = {};
sort[name] = x
const pipeline = [
{ $match: searchStr },
{ $skip: Number( req.body.start) },
{ $limit: Number( req.body.length) },
{ $sort: sort }
];
db.aggregate(pipeline) ...

MongoDB - Many counts using an array

How to make many counts using an array as input in Mongoose, and return an array
I am trying to use the code below but it is not working, list2 is returning as empty.
list = ['Ann', 'Bob', 'John', 'Karl'];
list2 = [];
for(let i = 0; i < list.length; i++) {
Clients.count({name: list[i]}, function(err, doc){
list2.push(doc);
})
}
return list2
You could run an aggregation pipeline as follows:
list = ['Ann', 'Bob', 'John', 'Karl'];
list2 = [];
Clients.aggregate([
{ "$match": { "name": { "$in": list } } },
{
"$group": {
"_id": "$name",
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": null,
"list2": {
"$push": {
"name": "$_id",
"count": "$count"
}
}
}
}
]).exec(function(err, results) {
list2 = results[0].list2;
console.log(list2);
});
const async = require('async');
var list = ['Ann', 'Bob', 'John', 'Karl'];
async.map(list, function(item, callback) {
result = {};
Clients.count({name: item}, function(err, data) {
result[item] = data || 0;
return callback(null, result);
});
}, function(err, data) {
console.log(data);
});
Here's another way based on Med Lazhari's answer
const async = require('async');
var list = ['Ann', 'Bob', 'John', 'Karl'];
var counting = function (item, doneCallback) {
var query = Clients.count({name: item});
query.then(function (doc) {
return doneCallback(null, doc);
});
};
async.map(list, counting, function(err, data) {
console.log(data);
});

Sailsjs native with Mapreduce

I am working on sailsjs project, i just looking for suggestion to achieve the below output to make best performance with code samples.
My existing collection having this below document.
[{
"word" : "DAD",
"createdAt":"6/10/2016 7:25:59 AM",
"gamescore":1
},
{
"word" : "SAD",
"createdAt":"6/09/2016 7:25:59 AM",
"gamescore":1
},
{
"word" : "PAD",
"createdAt":"6/10/2016 8:25:59 AM",
"gamescore":1
}]
I need the below output which is something like this.
[{
"word" : "A",
"repeatedTimes" : "3",
"LatestRepeatedTime": "6/10/2016 8:25:59 AM"
},
{
"word" : "D",
"repeatedTimes" : "4",
"LatestRepeatedTime": "6/10/2016 8:25:59 AM"
},
{
"word" : "P",
"repeatedTimes" : "1",
"LatestRepeatedTime": "6/10/2016 8:25:59 AM"
},
{
"word" : "S",
"repeatedTimes" : "1",
"LatestRepeatedTime": "6/09/2016 8:25:59 AM"
}]
For the above scenario i implemented the below code to fetch, but it is not working at find query.
var m = function () {
var words = this.word;
if (words) {
for (var i = 0; i < words.length; i++) {
emit(words[i], 1);
}
}
}
var r = function (key, values) {
var count = 0;
values.forEach(function (v) {
count += v;
});
return count;
}
console.log(req.params.childid);
Activity.native(function (err, collection) {
console.log("hello");
collection.mapReduce(m, r, {
out: {merge: "words_count" + "_" + "575a4952bfb2ad01481e9060"}
}, function (err, result) {
Activity.getDB(function (err, db) {
var colname = "words_count" + "_" + "575a4952bfb2ad01481e9060";
var natCol = db.collection('words_count' + "_" + "575a4952bfb2ad01481e9060");
natCol.find({},..... **is not working**
natCol.count({}, function (err, docs) {
console.log(err);
console.log(docs);
res.ok(docs);
});
});
});
});
Answer:
natCol.aggregate([
{
$project:
{
_id: "$_id" ,
value:"$value"
}
}
], function(err, data){
console.log(data);
res.ok(data);
});
You could try the following
var m = function () {
if (this.word) {
for (var i = 0; i < this.word.length; i++) {
emit(this.word[i], {
"repeatedTimes": 1,
"LatestRepeatedTime": this.createdAt
});
}
}
};
var r = function (key, values) {
var obj = {};
values.forEach(function(value) {
printjson(value);
Object.keys(value).forEach(function(key) {
if (!obj.hasOwnProperty(key)) obj[key] = 0;
if (key === "repeatedTimes") obj[key] += value[key];
});
obj["LatestRepeatedTime"] = value["LatestRepeatedTime"];
});
return obj;
};
var opts = { out: {inline: 1} };
Activity.native(function (err, collection) {
collection.mapReduce(m, r, opts, function (err, result) {
console.log(err);
console.log(result);
res.ok(result);
});
});

How can I find documents in a collection based on array values

I tried adding a helper to return array with documents, which looks like:
documents = {
realPlayers: [],
subscribers: [
{playerId: },
{playerId: },
{playerId: }
],
privatGame:,
gameType:,
gameStatus: 'active'
}
I tried this, but it doesn't work (forEach too):
Template.myGames.helpers({
'myGames': function() {
let gamePul = [],
activeGames = Games.find({gameStatus: "active"});
for (var i = 0; i < activeGame.length; i++) {
if (activeGame[i].subscribers) {
for (var j = 0; j < activeGame[i].subscribers.length; j++) {
if (activeGame[i].subscribers[j].playerId = Meteor.userId()) gamePul.push(activeGame[i]);
}
}
};
return gamePul;
}
});
You're just trying to find the set of games the current user is a subscriber to right? You just need $elemMatch in your query:
Template.myGames.helpers({
myGames: function() {
return Games.find({ gameStatus: "active",
subscribers: { $elemMatch: { playerId: Meteor.userId() }}});
}
});
docs

Trying to get a count of each word in a MongoDB field is this a job for MapReduce?

I've got a collection with a bunch of body posts in it. For example:
posts = { { id: 0, body: "foo bar baz", otherstuff: {...} },
{ id: 1, body: "baz bar oof", otherstuff: {...} },
{ id: 2, body: "baz foo oof", otherstuff: {...} }
};
I'd like to figure out how to loop through each document in the collection and carry a count of each word in each post body.
post_word_frequency = { { foo: 2 },
{ bar: 2 },
{ baz: 3 },
{ oof: 2 },
};
I've never used MapReduce and I'm still really fresh to mongo, but I'm looking at the documentation on http://cookbook.mongodb.org/patterns/unique_items_map_reduce/
map = function() {
words = this.body.split(' ');
for (i in words) {
emit({ words[i] }, {count: 1});
}
};
reduce = function(key, values) {
var count = 0;
values.forEach(function(v) {
count += v['count'];
});
return {count: count};
};
db.posts.mapReduce(map, reduce, {out: post_word_frequency});
As a bit of an added difficulty, I'm doing it in node.js (with node-mongo-native, though am willing to switch to do the reduce query if there's an easier way).
var db = new Db('mydb', new Server('localhost', 27017, {}), {native_parser:false});
db.open(function(err, db){
db.collection('posts', function(err, col) {
db.col.mapReduce(map, reduce, {out: post_word_frequency});
});
});
So far, I'm having difficulty in that node's telling me ReferenceError: post_word_frequency is not defined (I tried creating it in the shell, but that still didn't help).
So has anyone done a mapreduce with node.js? Is this the wrong use for map reduce? maybe another way to do it? (perhaps just loop and upsert into another collection?)
Thanks for any feedback and advice! :)
EDIT Ryanos below was correct (thanks!) one thing that's missing from my MongoDB based solution was finding the collection and converting it to an array.
db.open(function(err, db){
db.collection('posts', function(err, col) {
col.find({}).toArray(function(err, posts){ // this line creates the 'posts' array as needed by the MAPreduce functions.
var words= _.flatten(_.map(posts, function(val) {
Theres a bug with {out: post_word_frequency} maybe you want {out: "post_word_frequency"} but it should work without this out variable.
Using underscore it can be simply done.
/*
[{"word": "foo", "count": 1}, ...]
*/
var words = _.flatten(_.map(posts, function(val) {
return _.map(val.body.split(" "), function(val) {
return {"word": val, "count": 1};
});
}));
/*
{
"foo": n, ...
}
*/
var count = _.reduce(words, function(memo, val) {
if (_.isNaN(++memo[val.word])) {
memo[val.word] = 1;
}
return memo;
}, {});
Live Example
_.reduce, _.map, _.isNaN, _.flatten