What is the best way to help limit MongoDB CPU usage? - mongodb

We use MongoDB with Parse-server as backend and our application queries and save data at a rate of 5 request/second.
And out of our 3-stack solution (Nginx, Node.js, MongoDB), MongoDB takes the highest CPU hit, perhaps due to the query and save operation, we are using Jelastic so what we do is to pump-up the CPU resources available for our server, but everytime MongoDB CPU usage is keeping up.
I think this can be attributed to the fact that we implemented a check if a document with the same field value exists before saving and after saving doing a dirty check of duplicate record and remove the last record that is a duplicate (just keeping the oldest one) at Node.js level.
The question now would be:
Would configuring MongoDB Replica set help to reduce CPU usage?
What could be done at MongoDB level to optimize it to be able to handle such process/request describe above?
Here's the code:
Parse.Cloud.beforeSave("ProcessedDocument", function(request, response) {
var d = request.object;
var documentId = d.get("documentId");
var query = new Parse.Query("ProcessedDocument");
query.equalTo("documentId", documentId);
query.first({
success: function(results) {
//console.log('Results ' + results);
if(results) {
if (!request.object.isNew()) {
response.success();
} else {
response.error({errorCode:400,errorMsg:"Document already exist"});
}
} else {
response.success();
}
},
error: function(error) {
response.success();
}
});
});
Parse.Cloud.afterSave("ProcessedDocument", function(request) {
var query = new Parse.Query("ProcessedDocument");
query.equalTo("documentId", request.object.get("documentId"));
query.ascending("createdAt");
query.find({
success:function(results) {
if (results && results.length > 1) {
for(var i = (results.length - 1); i > 0 ; i--) {
results[i].destroy();
}
}
else {
// No duplicates
}
},
error:function(error) {
}
});
});
Here's the performance snapshot from MongoDB Compass:

Related

Update with upsert, but only update if date field of document in db is less than updated document

I am having a bit of an issue trying to come up with the logic for this. So, what I want to do is:
Bulk update a bunch of posts to my remote MongoDB instance BUT
If update, only update if lastModified field on the remote collection is less than lastModified field in the same document that I am about to update/insert
Basically, I want to update my list of documents if they have been modified since the last time I updated them.
I can think of two brute force ways to do it...
First, querying my entire collection, trying to manually remove and replace the documents that match the criteria, add the new ones, and then mass insert everything back to the remote collection after deleting everything in remote.
Second, query each item and then deciding, if there is one in remote, if I want to update it or no. This seems like it would be very tasking when dealing with remote collections.
If relevant, I am working on a NodeJS environment, using the mondodb npm package for database operations.
You can use the bulkWrite API to carry out the updates based on the logic you specified as it handles this better.
For example, the following snippet shows how to go about this assuming you already have the data from the web service you need to update the remote collection with:
mongodb.connect(mongo_url, function(err, db) {
if(err) console.log(err);
else {
var mongo_remote_collection = db.collection("remote_collection_name");
/* data is from http call to an external service or ideally
place this within the service callback
*/
mongoUpsert(mongo_remote_collection, data, function() {
db.close();
})
}
})
function mongoUpsert(collection, data_array, cb) {
var ops = data_array.map(function(data) {
return {
"updateOne": {
"filter": {
"_id": data._id, // or any other filtering mechanism to identify a doc
"lastModified": { "$lt": data.lastModified }
},
"update": { "$set": data },
"upsert": true
}
};
});
collection.bulkWrite(ops, function(err, r) {
// do something with result
});
return cb(false);
}
If the data from the external service is huge then consider sending the writes to the server in batches of 500 which gives you a better performance as you are not sending every request to the server, just once in every 500 requests.
For bulk operations MongoDB imposes a default internal limit of 1000 operations per batch and so the choice of 500 documents is good in the sense that you have some control over the batch size rather than let MongoDB impose the default, i.e. for larger operations in the magnitude of > 1000 documents. So for the above case in the first approach one could just write all the array at once as this is small but the 500 choice is for larger arrays.
var ops = [],
counter = 0;
data_array.forEach(function(data) {
ops.push({
"updateOne": {
"filter": {
"_id": data._id,
"lastModified": { "$lt": data.lastModified }
},
"update": { "$set": data },
"upsert": true
}
});
counter++;
if (counter % 500 === 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
});
ops = [];
}
})
if (counter % 500 != 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
}
}

How do I Bulk Add a Random Field to each record in MongoDB

There are a number of questions and answers about randomly ordering results or randomly getting a single record. The answers recommend adding a random field, creating an index on that field, and then doing a random draw. It looks like:
db.myindex.find().forEach(function(doc) {
db.myindex.update({_id: doc._id}, {$set: {rand: Math.random()}})
})
This works great, but it takes several hours (lots and lots of data). It looks like is limited by write locking which makes sense since the update is happening for each record. How do I do this in bulk? I tried:
var bulk = db.myindex.initializeUnorderedBulkOp();
bulk.find({}).update( { $set: { rand: Math.random() } } );
bulk.execute();
But it sets the rand field to the same value for every record! How do I fix this?
Edit: By the way, the reason that I need to do this is because I get a huge bson file from someone else and I need to import it frequently, so can't wait multiple hours to get it updated.
Introduce a loop with the bulk operations sent to the server once per 1000 documents, or as many modifications as you can fit under the 64MB BSON limit:
var bulk = db.myindex.initializeOrderedBulkOp();
var counter = 0;
db.myindex.find().forEach(function(doc) {
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "rand": Math.random() }
});
counter++;
if (counter % 1000 == 0) {
bulk.execute();
bulk = db.myindex.initializeOrderedBulkOp();
}
});
if (counter % 1000 != 0){
bulk.execute();
}
If the collection is just static data, and you're getting a BSON file from someone else, it might be quicker to stream the BSON file through a filter to generate a new BSON file that you can then import using mongoimport.
Here is one that I wrote using nodeJS that can process a BSON file at around 1GB/min.
var bson = require('bson');
var BSON = new bson.BSONPure.BSON();
var BSONStream = require('bson-stream');
var fs = require('fs');
var sb = require('stream-buffers');
var rs = fs.createReadStream('tweets.bson');
var ws = fs.createWriteStream('tweets_random.bson',{flags:'a'});
var writeBuffer = new sb.WritableStreamBuffer({
initialSize: (1024*1024),
incrementAmount: (10*1024)
});
rs.pipe(new BSONStream()).on('data',function(obj) {
obj.rand = Math.random();
writeBuffer.write(BSON.serialize(obj));
if(writeBuffer.size()>(1024*1024)) {
var size = writeBuffer.size();
ws.write(writeBuffer.getContents(),function() {
console.log("Wrote",size,"bytes");
console.log("Buffer has:",writeBuffer.size(),"bytes left");
});
}
});
It might go faster if you modify the buffer size/increment parameters.
This is of course assuming that you have the luxury of reimporting your data.

meteor how to manage async updates in a loop

I have this loop:
properties.forEach(function(property) {
console.log("property: " + property);
var upsertValues = {};
upsertValues["ID"] = property.ID;
Properties.upsert(upsertValues,
{$set: property},
function(err, nbr) {
if(err)
console.log(err);
else
console.log("upsert successful" + nbr);
});
});
setTimeout(function () {
Fiber(function() {
Meteor.call("removeOldProperties", modification_date);
}).run();
}, 30000)
})
Basically, it updates a bench of documents and at the end, it removes all the once who have not been updated.
I had to use a TimeOut because without that, I removes the documents before their update, as all the Meteor.upsert statements are async.
Is there a better way to do it (without having to use this timeout) ?
Thanks,
Couple thoughts:
upserts are fast, no need for a callback
Fiber is for the server
I don't understand how your upsertValues was a valid query. Is this referring to the document _id? If so, convention is to keep using the name _id, if not, I'd use a more descriptive name. Was this code functioning??
What remains:
var upsertsCompleted = 0;
properties.forEach(function(property) {
Meteor.call("upsertProperties", property, function() {
if (++upsertsCompleted === properties.length) {
Meteor.call("removeOldProperties", modification_date);
}
}
Meteor.methods({
upsertProperties: function (property) {
return Properties.upsert(property.ID, {$set: property});
}
});

mongodb move documents from one collection to another collection

How can documents be moved from one collection to another collection in MongoDB?? For example: I have lot of documents in collection A and I want to move all 1 month older documents to collection B (these 1 month older documents should not be in collection A).
Using aggregation we can do copy. But what I am trying to do is moving of documents.
What method can be used to move documents?
The bulk operations #markus-w-mahlberg showed (and #mark-mullin refined) are efficient but unsafe as written. If the bulkInsert fails, the bulkRemove will still continue. To make sure you don't lose any records when moving, use this instead:
function insertBatch(collection, documents) {
var bulkInsert = collection.initializeUnorderedBulkOp();
var insertedIds = [];
var id;
documents.forEach(function(doc) {
id = doc._id;
// Insert without raising an error for duplicates
bulkInsert.find({_id: id}).upsert().replaceOne(doc);
insertedIds.push(id);
});
bulkInsert.execute();
return insertedIds;
}
function deleteBatch(collection, documents) {
var bulkRemove = collection.initializeUnorderedBulkOp();
documents.forEach(function(doc) {
bulkRemove.find({_id: doc._id}).removeOne();
});
bulkRemove.execute();
}
function moveDocuments(sourceCollection, targetCollection, filter, batchSize) {
print("Moving " + sourceCollection.find(filter).count() + " documents from " + sourceCollection + " to " + targetCollection);
var count;
while ((count = sourceCollection.find(filter).count()) > 0) {
print(count + " documents remaining");
sourceDocs = sourceCollection.find(filter).limit(batchSize);
idsOfCopiedDocs = insertBatch(targetCollection, sourceDocs);
targetDocs = targetCollection.find({_id: {$in: idsOfCopiedDocs}});
deleteBatch(sourceCollection, targetDocs);
}
print("Done!")
}
Update 2
Please do NOT upvote this answer any more. As written #jasongarber's answer is better in any aspect.
Update
This answer by #jasongarber is a safer approach and should be used instead of mine.
Provided I got you right and you want to move all documents older than 1 month, and you use mongoDB 2.6, there is no reason not to use bulk operations, which are the most efficient way of doing multiple operations I am aware of:
> var bulkInsert = db.target.initializeUnorderedBulkOp()
> var bulkRemove = db.source.initializeUnorderedBulkOp()
> var date = new Date()
> date.setMonth(date.getMonth() -1)
> db.source.find({"yourDateField":{$lt: date}}).forEach(
function(doc){
bulkInsert.insert(doc);
bulkRemove.find({_id:doc._id}).removeOne();
}
)
> bulkInsert.execute()
> bulkRemove.execute()
This should be pretty fast and it has the advantage that in case something goes wrong during the bulk insert, the original data still exists.
Edit
In order to prevent too much memory to be utilized, you can execute the bulk operation on every x docs processed:
> var bulkInsert = db.target.initializeUnorderedBulkOp()
> var bulkRemove = db.source.initializeUnorderedBulkOp()
> var x = 10000
> var counter = 0
> var date = new Date()
> date.setMonth(date.getMonth() -1)
> db.source.find({"yourDateField":{$lt: date}}).forEach(
function(doc){
bulkInsert.insert(doc);
bulkRemove.find({_id:doc._id}).removeOne();
counter ++
if( counter % x == 0){
bulkInsert.execute()
bulkRemove.execute()
bulkInsert = db.target.initializeUnorderedBulkOp()
bulkRemove = db.source.initializeUnorderedBulkOp()
}
}
)
> bulkInsert.execute()
> bulkRemove.execute()
Insert and remove:
var documentsToMove = db.collectionA.find({});
documentsToMove.forEach(function(doc) {
db.collectionB.insert(doc);
db.collectionA.remove(doc);
});
note: this method might be quite slow for large collections or collections holding large documents.
$out is use to create the new collection with data , so use $out
db.oldCollection.aggregate([{$out : "newCollection"}])
then use drop
db.oldCollection.drop()
you can use range query to get data from sourceCollection and keep the cursor data in variable and loop on it and insert to target collection:
var doc = db.sourceCollection.find({
"Timestamp":{
$gte:ISODate("2014-09-01T00:00:00Z"),
$lt:ISODate("2014-10-01T00:00:00Z")
}
});
doc.forEach(function(doc){
db.targetCollection.insert(doc);
})
Hope so it helps!!
First option (Using mongo dump)
1.Get a dump from collection
mongodump -d db -c source_collection
2.Restore from collection
mongorestore -d db -c target_collection dir=dump/db_name/source_collection.bson
Second Option
Running aggregate
db.getCollection('source_collection').aggregate([ { $match: {"emailAddress" : "apitester#mailinator.com"} }, { $out: "target_collection" } ])
Third Option (Slowest)
Running a through for loop
db.getCollection('source_collection').find().forEach(function(docs){ db.getCollection('target_collection').insert(docs); }) print("Rolleback Completed!");
May be from the performance point of view it's better to remove a lot of documents using one command(especially if you have indexes for query part) rather than deleting them one-by-one.
For example:
db.source.find({$gte: start, $lt: end}).forEach(function(doc){
db.target.insert(doc);
});
db.source.remove({$gte: start, $lt: end});
This is a restatement of #Markus W Mahlberg
Returning the favor - as a function
function moveDocuments(sourceCollection,targetCollection,filter) {
var bulkInsert = targetCollection.initializeUnorderedBulkOp();
var bulkRemove = sourceCollection.initializeUnorderedBulkOp();
sourceCollection.find(filter)
.forEach(function(doc) {
bulkInsert.insert(doc);
bulkRemove.find({_id:doc._id}).removeOne();
}
)
bulkInsert.execute();
bulkRemove.execute();
}
An example use
var x = {dsid:{$exists: true}};
moveDocuments(db.pictures,db.artifacts,x)
to move all documents that have top level element dsid from the pictures to the artifacts collection
Here's an update to #jasongarber's answer which uses the more recent mongo 'bulkWrite' operation (Read docs here), and also keeps the whole process asynchronous so you can run it as part of a wider script which depends on its' completion.
async function moveDocuments (sourceCollection, targetCollection, filter) {
const sourceDocs = await sourceCollection.find(filter)
console.log(`Moving ${await sourceDocs.count()} documents from ${sourceCollection.collectionName} to ${targetCollection.collectionName}`)
const idsOfCopiedDocs = await insertDocuments(targetCollection, sourceDocs)
const targetDocs = await targetCollection.find({_id: {$in: idsOfCopiedDocs}})
await deleteDocuments(sourceCollection, targetDocs)
console.log('Done!')
}
async function insertDocuments (collection, documents) {
const insertedIds = []
const bulkWrites = []
await documents.forEach(doc => {
const {_id} = doc
insertedIds.push(_id)
bulkWrites.push({
replaceOne: {
filter: {_id},
replacement: doc,
upsert: true,
},
})
})
if (bulkWrites.length) await collection.bulkWrite(bulkWrites, {ordered: false})
return insertedIds
}
async function deleteDocuments (collection, documents) {
const bulkWrites = []
await documents.forEach(({_id}) => {
bulkWrites.push({
deleteOne: {
filter: {_id},
},
})
})
if (bulkWrites.length) await collection.bulkWrite(bulkWrites, {ordered: false})
}
From MongoDB 3.0 up, you can use the copyTo command with the following syntax:
db.source_collection.copyTo("target_collection")
Then you can use the drop command to remove the old collection:
db.source_collection.drop()
I do like the response from #markus-w-mahlberg, however at times, I have seen the need to keep it a bit simpler for people. As such I have a couple of functions that are below. You could naturally wrap thing here with bulk operators as he did, but this code works with new and old Mongo systems equally.
function parseNS(ns){
//Expects we are forcing people to not violate the rules and not doing "foodb.foocollection.month.day.year" if they do they need to use an array.
if (ns instanceof Array){
database = ns[0];
collection = ns[1];
}
else{
tNS = ns.split(".");
if (tNS.length > 2){
print('ERROR: NS had more than 1 period in it, please pass as an [ "dbname","coll.name.with.dots"] !');
return false;
}
database = tNS[0];
collection = tNS[1];
}
return {database: database,collection: collection};
}
function insertFromCollection( sourceNS, destNS, query, batchSize, pauseMS){
//Parse and check namespaces
srcNS = parseNS(sourceNS);
destNS = parseNS(destNS);
if ( srcNS == false || destNS == false){return false;}
batchBucket = new Array();
totalToProcess = db.getDB(srcNS.database).getCollection(srcNS.collection).find(query,{_id:1}).count();
currentCount = 0;
print("Processed "+currentCount+"/"+totalToProcess+"...");
db.getDB(srcNS.database).getCollection(srcNS.collection).find(query).addOption(DBQuery.Option.noTimeout).forEach(function(doc){
batchBucket.push(doc);
if ( batchBucket.length > batchSize){
db.getDB(destNS.database).getCollection(destNS.collection)insert(batchBucket);
currentCount += batchBucket.length;
batchBucket = [];
sleep (pauseMS);
print("Processed "+currentCount+"/"+totalToProcess+"...");
}
}
print("Completed");
}
/** Example Usage:
insertFromCollection("foo.bar","foo2.bar",{"type":"archive"},1000,20);
You could obviously add a db.getSiblingDB(srcNS.database).getCollection(srcNS.collection).remove(query,true)
If you wanted to also remove the records after they are copied to the new location. The code can easily be built like that to make it restartable.
I had 2297 collection for 15 million of documents but some collection was empty.
Using only copyTo the script failed, but with this script optimization:
db.getCollectionNames().forEach(function(collname) {
var c = db.getCollection(collname).count();
if(c!==0){
db.getCollection(collname).copyTo('master-collection');
print('Copied collection ' + collname);
}
});
all works fine for me.
NB: copyTo is deprecated because it block the read/write operation: so I think is fine if you know that the database is not usable during this operation.
In my case for each didn't work. So I had to make some changes.
var kittySchema = new mongoose.Schema({
name: String
});
var Kitten = mongoose.model('Kitten', kittySchema);
var catSchema = new mongoose.Schema({
name: String
});
var Cat = mongoose.model('Cat', catSchema);
This is Model for both the collection
`function Recursion(){
Kitten.findOne().lean().exec(function(error, results){
if(!error){
var objectResponse = results;
var RequiredId = objectResponse._id;
delete objectResponse._id;
var swap = new Cat(objectResponse);
swap.save(function (err) {
if (err) {
return err;
}
else {
console.log("SUCCESSFULL");
Kitten.deleteOne({ _id: RequiredId }, function(err) {
if (!err) {
console.log('notification!');
}
else {
return err;
}
});
Recursion();
}
});
}
if (err) {
console.log("No object found");
// return err;
}
})
}`
I planned to arhieve 1000 records at a time using bulkinsert and bulkdelete methods of pymongo.
For both source and target
create mongodb objects to connect to the database.
instantiate the bulk objects. Note: I created a backup of bulk objects too. This will help me to rollback the insertion or removal when an error occurs.
example:
For source
// replace this with mongodb object creation logic
source_db_obj = db_help.create_db_obj(source_db, source_col)
source_bulk = source_db_obj.initialize_ordered_bulk_op()
source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
For target
// replace this with mogodb object creation logic
target_db_obj = db_help.create_db_obj(target_db, target_col)
target_bulk = target_db_obj.initialize_ordered_bulk_op()
target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
Obtain the source records that matches the filter criteria
source_find_results = source_db_obj.find(filter)
Loop through the source records
create target and source bulk operations
Append archived_at field with the current datetime to the target collection
//replace this with the logic to obtain the UTCtime.
doc['archived_at'] = db_help.getUTCTime()
target_bulk.insert(document)
source_bulk.remove(document)
for rollback in case of any errors or exceptions, create target_bulk_bak and source_bulk_bak operations.
target_bulk_bak.find({'_id':doc['_id']}).remove_one()
source_bulk_bak.insert(doc)
//remove the extra column
doc.pop('archieved_at', None)
When the record count to 1000, execute the target - bulk insertion and source - bulk removal. Note: this method takes target_bulk and source_bulk objects for execution.
execute_bulk_insert_remove(source_bulk, target_bulk)
When exception occurs, execute the target_bulk_bak removal and source_bulk_bak inesertions. This would rollback the changes. Since mongodb doesn't have rollback, I came up with this hack
execute_bulk_insert_remove(source_bulk_bak, target_bulk_bak)
Finally re-initialize the source and target bulk and bulk_bak objects. This is necessary because you can use them only once.
Complete code
def execute_bulk_insert_remove(source_bulk, target_bulk):
try:
target_bulk.execute()
source_bulk.execute()
except BulkWriteError as bwe:
raise Exception(
"could not archive document, reason: {}".format(bwe.details))
def archive_bulk_immediate(filter, source_db, source_col, target_db, target_col):
"""
filter: filter criteria for backup
source_db: source database name
source_col: source collection name
target_db: target database name
target_col: target collection name
"""
count = 0
bulk_count = 1000
source_db_obj = db_help.create_db_obj(source_db, source_col)
source_bulk = source_db_obj.initialize_ordered_bulk_op()
source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
target_db_obj = db_help.create_db_obj(target_db, target_col)
target_bulk = target_db_obj.initialize_ordered_bulk_op()
target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
source_find_results = source_db_obj.find(filter)
start = datetime.now()
for doc in source_find_results:
doc['archived_at'] = db_help.getUTCTime()
target_bulk.insert(doc)
source_bulk.find({'_id': doc['_id']}).remove_one()
target_bulk_bak.find({'_id': doc['_id']}).remove_one()
doc.pop('archieved_at', None)
source_bulk_bak.insert(doc)
count += 1
if count % 1000 == 0:
logger.info("count: {}".format(count))
try:
execute_bulk_insert_remove(source_bulk, target_bulk)
except BulkWriteError as bwe:
execute_bulk_insert_remove(source_bulk_bak, target_bulk_bak)
logger.info("Bulk Write Error: {}".format(bwe.details))
raise
source_bulk = source_db_obj.initialize_ordered_bulk_op()
source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
target_bulk = target_db_obj.initialize_ordered_bulk_op()
target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
end = datetime.now()
logger.info("archived {} documents to {} in ms.".format(
count, target_col, (end - start)))

Is there a way to perform a "dry run" of an update operation?

I am in the process of changing the schema for one of my MongoDB collections. (I had been storing dates as strings, and now my application stores them as ISODates; I need to go back and change all of the old records to use ISODates as well.) I think I know how to do this using an update, but since this operation will affect tens of thousands of records I'm hesitant to issue an operation that I'm not 100% sure will work. Is there any way to do a "dry run" of an update that will show me, for a small number of records, the original record and how it would be changed?
Edit: I ended up using the approach of adding a new field to each record, and then (after verifying that the data was right) renaming that field to match the original. It looked like this:
db.events.find({timestamp: {$type: 2}})
.forEach( function (e) {
e.newTimestamp = new ISODate(e.timestamp);
db.events.save(e);
} )
db.events.update({},
{$rename: {'newTimestamp': 'timestamp'}},
{multi: true})
By the way, that method for converting the string times to ISODates was what ended up working. (I got the idea from this SO answer.)
My advice would be to add the ISODate as a new field. Once confirmed that all looks good you could then unset the the string date.
Create a test environment with your database structure. Copy a handful of records to it. Problem solved. Not the solution you were looking for, I'm sure. But, I believe, this is the exact circumstances that a 'test environment' should be used for.
Select ID of particular records that you would like to monitor. place in the update {_id:{$in:[<your monitored id>]}}
Another option which depends of the amount of overhead it will cause you -
You can consider writing a script, that performs the find operation, add printouts or run in debug while the save operation is commented out. Once you've gained confidence you can apply the save operation.
var changesLog = [];
var errorsLog = [];
events.find({timestamp: {$type: 2}}, function (err, events) {
if (err) {
debugger;
throw err;
} else {
for (var i = 0; i < events.length; i++) {
console.log('events' + i +"/"+(candidates.length-1));
var currentEvent = events[i];
var shouldUpdateCandidateData = false;
currentEvent.timestamp = new ISODate(currentEvent.timestamp);
var change = currentEvent._id;
changesLog.push(change);
// // ** Dry Run **
// currentEvent.save(function (err) {
// if (err) {
// debugger;
// errorsLog.push(currentEvent._id + ", " + currentEvent.timeStamp + ', ' + err);
// throw err;
// }
// });
}
console.log('Done');
console.log('Changes:');
console.log(changesLog);
console.log('Errors:');
console.log(errorsLog);
return;
}
});
db.collection.find({"_manager": { $exists: true, $ne: null }}).forEach(
function(doc){
doc['_managers']=[doc._manager]; // String --> List
delete doc['_manager']; // Remove "_managers" key-value pair
printjson(doc); // Debug by output the doc result
//db.teams.save(doc); // Save all the changes into doc data
}
)
In my case the collection contain _manager and I would like to change it to _managers list. I have tested it in my local working as expected.
In the several latest versions of MongoDB (at least starting with 4.2), you could do that using a transaction.
const { MongoClient } = require('mongodb')
async function main({ dryRun }) {
const client = new MongoClient('mongodb://127.0.0.1:27017', {
maxPoolSize: 1
})
const pool = await client.connect()
const db = pool.db('someDB')
const session = pool.startSession()
session.startTransaction()
try {
const filter = { id: 'some-id' }
const update = { $rename: { 'newTimestamp': 'timestamp' } }
// This is the important bit
const options = { session: session }
await db.collection('someCollection').updateMany(
filter,
update,
options // using session
)
const afterUpdate = db.collection('someCollection')
.find(
filter,
options // using session
)
.toArray()
console.debug('updated documents', afterUpdate)
if (dryRun) {
// This will roll back any changes made within the session
await session.abortTransaction()
} else {
await session.commitTransaction()
}
} finally {
await session.endSession()
await pool.close()
}
}
const _ = main({ dryRun: true })