Update large collection - mongodb

Does anyone have a suggestion about how to update a field in each document in a large collection?
I use something like this:
MyModel.find().exec(function(err,data){
if(err){
return console.log(err);
}
data.forEach(function(doc){
doc.Field = doc.Field + 1;
doc.save(function (err) {
if(err) {
console.error('ERROR!');
}
});
});
});
But I get FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory.
Is there a way to process the above update in chunks or something like that?

You can use the async.eachLimit method of the async library to limit the number of concurrent save operations (doc link is to each, scroll down to see the eachLimit variant).
For example, to limit the saves to no more than 5 outstanding at a time:
MyModel.find().exec(function(err, data){
if (err) {
return console.log(err);
}
async.eachLimit(data, 5, function(doc, callback){
doc.Field = doc.Field + 1;
doc.save(function(err) {
if (err) {
console.error('ERROR!');
}
callback(err);
});
});
});
However, in this case it would be much more efficient to use a single update with the $inc operator and the multi: true option to increment each doc's Field value by 1.
MyModel.update({}, {$inc: {Field: 1}}, {multi: true), function(err) { ... });

you need more memory: --max_new_space_size and/or --max_old_space_size, like this:
node --max-old-space-size=4096 server.js
Currently, by default v8 has a memory limit of 512MB on 32-bit
systems, and 1.4GB on 64-bit systems. The limit can be raised by
setting --max_old_space_size to a maximum of ~1024 (~1 GB) (32-bit)
and ~4096 (~4GB) (64-bit), but it is recommended that you split your
single process into several workers if you are hitting memory limits

Related

What is the best way to help limit MongoDB CPU usage?

We use MongoDB with Parse-server as backend and our application queries and save data at a rate of 5 request/second.
And out of our 3-stack solution (Nginx, Node.js, MongoDB), MongoDB takes the highest CPU hit, perhaps due to the query and save operation, we are using Jelastic so what we do is to pump-up the CPU resources available for our server, but everytime MongoDB CPU usage is keeping up.
I think this can be attributed to the fact that we implemented a check if a document with the same field value exists before saving and after saving doing a dirty check of duplicate record and remove the last record that is a duplicate (just keeping the oldest one) at Node.js level.
The question now would be:
Would configuring MongoDB Replica set help to reduce CPU usage?
What could be done at MongoDB level to optimize it to be able to handle such process/request describe above?
Here's the code:
Parse.Cloud.beforeSave("ProcessedDocument", function(request, response) {
var d = request.object;
var documentId = d.get("documentId");
var query = new Parse.Query("ProcessedDocument");
query.equalTo("documentId", documentId);
query.first({
success: function(results) {
//console.log('Results ' + results);
if(results) {
if (!request.object.isNew()) {
response.success();
} else {
response.error({errorCode:400,errorMsg:"Document already exist"});
}
} else {
response.success();
}
},
error: function(error) {
response.success();
}
});
});
Parse.Cloud.afterSave("ProcessedDocument", function(request) {
var query = new Parse.Query("ProcessedDocument");
query.equalTo("documentId", request.object.get("documentId"));
query.ascending("createdAt");
query.find({
success:function(results) {
if (results && results.length > 1) {
for(var i = (results.length - 1); i > 0 ; i--) {
results[i].destroy();
}
}
else {
// No duplicates
}
},
error:function(error) {
}
});
});
Here's the performance snapshot from MongoDB Compass:

Mongodb.collection.count() is not accurate for collection.insertmany()

All, my code below is not giving me the correct number of rows, basically I am reading file data and storing it in mongodb collection; might be related to asynchronous vs synchronous operations? would appreciate if someone can point me to the right resource
collection.insertMany(jsonArray);
db.collection('collection1').count(function(function(err,count){
if(err) throw err;
console.log('Total Rows:' + count);
}
Total Rows: 3803
Now if I go to the mongodb command shell it gives me the accurate number of rows
Most probably you are trying to fetch the count before insert operation is complete. So, first wait for the data to be inserted and after that run the count query. Hope this helps.
Try this:
collection.insertMany(jsonArray, function(err, res) {
if (err) {
throw err;
} else {
db.collection('collection1').count(function(err, count) {
if(err) throw err;
console.log('Total Rows:' + count);
})
}
})

meteor how to manage async updates in a loop

I have this loop:
properties.forEach(function(property) {
console.log("property: " + property);
var upsertValues = {};
upsertValues["ID"] = property.ID;
Properties.upsert(upsertValues,
{$set: property},
function(err, nbr) {
if(err)
console.log(err);
else
console.log("upsert successful" + nbr);
});
});
setTimeout(function () {
Fiber(function() {
Meteor.call("removeOldProperties", modification_date);
}).run();
}, 30000)
})
Basically, it updates a bench of documents and at the end, it removes all the once who have not been updated.
I had to use a TimeOut because without that, I removes the documents before their update, as all the Meteor.upsert statements are async.
Is there a better way to do it (without having to use this timeout) ?
Thanks,
Couple thoughts:
upserts are fast, no need for a callback
Fiber is for the server
I don't understand how your upsertValues was a valid query. Is this referring to the document _id? If so, convention is to keep using the name _id, if not, I'd use a more descriptive name. Was this code functioning??
What remains:
var upsertsCompleted = 0;
properties.forEach(function(property) {
Meteor.call("upsertProperties", property, function() {
if (++upsertsCompleted === properties.length) {
Meteor.call("removeOldProperties", modification_date);
}
}
Meteor.methods({
upsertProperties: function (property) {
return Properties.upsert(property.ID, {$set: property});
}
});

MongoDb get last few documents and the await tailable cursor

I want to get 5 last documents from a MongoDB collection, then keep tailing it for new documents. Can this be done at all with one query, or do I really need two queries? If two queries, what's the best way to achieve this without adding extra fields?
While answer in any language is fine, here's an example node.js code snippet of what I try to achieve (error handling omitted, and snippet edited based on first answer to the question):
MongoClient.connect("mongodb://localhost:1338/mydb", function(err, db) {
db.collection('mycollection', function(err, col) {
col.count({}, function(err, total) {
col.find({}, { tailable:true, awaitdata:true, timeout:false, skip:total-5, limit:5 }, function(err, cursor) {
cursor.each(function(err, doc) {
console.dir(doc); // print the document object to console
});
});
});
});
});
Problem: Above code prints all the documents starting from first one, and then waits for more. Options skip and limit have no effect.
Question: How to easily get 5 latest documents, then keep on tailing for more? Example in any language is fine, does not have to be node.js.
(Answer edited, it's useful to know this does not work with these versions.)
If collection was not tailable, you'd need to find out how many items there is, for that use count, and then use skip option, to skip first count-5 items.
This will NOT work, tailable and skip do not work together (MongoDB 2.4.6, node.js 0.10.18):
MongoClient.connect("mongodb://localhost:1338/mydb", function(err, db) {
db.collection('mycollection', function(err, col) {
col.count({ }, function(err, total) {
col.find({ }, { tailable: true, awaitdata: true, timeout: false, skip: total - 5, limit: 5 }, function(err, cursor) {
cursor.each(function(err, doc) {
console.dir(doc);
});
});
});
});
});

concurrency issues while upserting and then reading the data from mongodb using mongoose

Hi I am trying to build an application which upserts data and fetches from the mongodb baser on the userid.This approach works fine for a single user.But when i try hitting for multiple users say 25 the data fetched seems to be null. Below is my upsert code
collection.update({'USER_ID': passVal.ID},
{'RESPONSE': Data}, { upsert: true }, function (err) {
if (err) {
console.log("Error in saving data");
}
var query = collection.findOne({'USER_ID': passVal.ID});
query.select('RESPONSE');
query.exec(function (err, data) {
if (err) return handleError(err);
console.log(data.RESPONSE);
});
})
I always get an error insome cases as data is null.I have written the read code in the call back of upsert only.I am stuck here any help regarding this will be much helpful.