MongoDB atlas trigger - execution time limit exceeded - mongodb

I'm testing out a trigger on MongoDB atlas which runs a Realm function for adding an object to Algolia index upon insertion to the MongoDB collection. In my case the record gets uploaded to Algolia index successfully but the function doesn't stop there and happens to exceed the time limit.
The docs mention that
Function runtime is limited to 120 seconds
and that's the reason for the function to timeout
Here is my Realm function
exports = function(changeEvent) {
const algoliasearch = require('algoliasearch');
const client = algoliasearch(context.values.get('algolia_app'),context.values.get('algolia_key'));
const index = client.initIndex("movies");
changeEvent.fullDocument.objectID = changeEvent.fullDocument._id;
delete changeEvent.fullDocument._id;
index.saveObject(changeEvent.fullDocument)
.then(({objectID}) => {
console.log('successfully inserted: ',objectID);
})
.catch(err => {
console.log(err);
});
};
Here is the result I get on the logs
Logs:
[
"successfully inserted: 61cf0a79c577393620dd8c80"
]
Error:
execution time limit exceeded
I even tried with return statements after the console.logs but still the same issue.
What I'm I doing wrong

Apparently this was fixed by MongoDB team early this March as seen by https://www.mongodb.com/community/forums/t/extremely-slow-execution-of-an-external-dependency-function/16919/27.
I tested with this code below and it worked perfect without any timeouts this time.
I made the function to be an async function. According to the logs it didn't even take 1 second to perform the indexing.
exports = async function(changeEvent) {
const algoliasearch = require('algoliasearch');
const client = algoliasearch(context.values.get('algolia_app'),context.values.get('algolia_key'));
const index = client.initIndex("movies");
changeEvent.fullDocument.objectID = changeEvent.fullDocument._id;
delete changeEvent.fullDocument._id;
try{
const result = await index.saveObject(changeEvent.fullDocument);
console.log(Date.now(),'successfully updated: ',result);
}
catch(e){
console.error(e);
}
}
Logs

Related

Mongo transactions make a snapshot for the reading operations?

Imagine the following scenario:
Start a session
Start a transaction for that session
run an read on Document A
a different session made an update on Document A (During execution)
write Document B based on the original read of Document A
Commit the transaction
End the session
Will the update on Document A be atomic between read and write, or is there a concurrency problem? I understand transaction does a snapshot of all write operations but not sure what happens on the reading side.
await session.withTransaction(async () => {
const coll1 = client.db('mydb1').collection('foo');
const coll2 = client.db('mydb2').collection('bar');
const docA = await coll1.findOne({ abc: 1 }, { session });
// docA is deleted by other session on this point
if (docA){
//Does this runs on an outdated condition?
await coll2.insertOne({ xyz: 999 }, { session });
}
}, transactionOptions)

Download large sets of documents from MongoDB using Meteor methods

I am trying to export all the documents from a collection (which is about 12 MB) using a Meteor method but it is almost always crashing the app or never returning the results.
I am considering to upload the documents to S3 then sending a download link to the client, however it seems like having an unnecessary network connections and will make the process even longer.
Is there a better way to get large sets of data from server to client?
here is the example of that code, it is very simple.
'downloadUserActions': () => {
if (Roles.userIsInRole(Meteor.userId(), ['admin'])) {
const userData = userActions.find({}).fetch();
return userData
}
}
Thanks.
You can use an approach, where you split the requests into multiple ones:
get the document count
until document count is completely fetched
get the current count of already fetched docs
fetch the next bunch of docs and skip already fetched ones
For this you need the skip option in the mongo query in order to skip the already fetched docs.
Code example
const limit = 250
Meteor.methods({
// get the max amount of docs
getCount () {
return userActions.find().count()
},
// get the next block of docs
// from: skip to: skip + limit
// example: skip = 1000, limit = 500 is
// from: 1000 to: 1500
downloadUserActions (skip) {
this.unblock()
return userActions.find({}, { skip, limit }).fetch()
}
})
Client:
// wrap the Meteor.call into a promise
const asyncCall = (name, args) => new Promise((resolve, reject) => {
Meteor.call(name, args, (err, res) => {
if (err) {
return reject(err)
}
return resolve(res)
})
})
const asyncTimeout = ms => new Promise(resolve => setTimeout(() => resolve(), ms)
const fetchAllDocs = async (destination) => {
const maxDocs = await asyncCall('getCount')
let loadedDocs = 0
while (loadedDocs < maxDocs) {
const docs = await asyncCall('downloadUserActions', loadedDocs)
docs.forEach(doc => {
// think about using upsert to fix multiple docs issues
destination.insert(doc)
})
// increase counter (skip value)
loadedDocs = destination.find().count()
// wait 10ms for next request, increase if server needs
// more time
await asyncTimeout(10)
}
return destination
}
Use it with a local Mongo Collection on the client:
await fetchAllDocs(new Mongo.Collection(null))
After the function all docs are now stored in this local collection.
Play with the limit and the timeout (miliseconds) values in order to find a sweet-spot between user-experience and server-performance.
Additional improvements
The code does not authenticate or validate requests. This is up to you!
Aƶlso you might think about adding a failsafe-machanism in case the while loop never completes due to some unintended errors.
Further readings
https://docs.meteor.com/api/methods.html#DDPCommon-MethodInvocation-unblock
https://docs.meteor.com/api/collections.html#Mongo-Collection
https://docs.meteor.com/api/collections.html#Mongo-Collection-find

MongoDB bulkWrite multiple updateOne vs updateMany

I have cases where I build bulkWrite operations where some documents have the same update object, is there any performance benefit to merging the filters and send one updateMany with those filters instead of multiple updateOnes in the same bulkWrite?
It's obviously better to use updateMany over multiple updateOnes when using the normal methods, but with bulkWrite, since it's a single command, are there any significant gains of preferring one over the other?
Example:
I have 200k documents that I need to update, I have 10 total unique status field for all 200K documents, so my options are:
Solutions:
A) Send one single bulkWrite with 10 updateMany operations, and each one of those operations will affect 20K documents.
B) Send one single bulkWrite with 200K updateOne each operations holding its filter and status.
As #AlexBlex noted, I have to look out for accidentally updating more than one document with the same filter, in my case I use _id as my filter, so accidentally updating other documents is not a concern in my case, but is definitely something to look out for when considering the updateMany option.
Thanks #AlexBlex.
Short answer:
Using updateMany is at least twice faster, but might accidentally update more documents than you intended, keep reading to learn how to avoid this and gain the performance benefits.
Long answer:
We ran the following experiment to know the answer for that, the following are the steps:
Create a bankaccounts mongodb collection, each document contains only one field (balance).
Insert 1 million documents into the bankaccounts collection.
Randomize the order in memory of all 1 million documents to avoid any possible optimizations from the database using ids that are inserted in the same sequence, simulating a real-world scenario.
Build write operations for bulkWrite from the documents with a random number between 0 and 100.
Execute the bulkWrite.
Log the time the bulkWrite took.
Now, the experiment lies in the 4th step.
In one variation of the experiment we build an array consisting of 1 million updateOne operations, each updateOne has filter for a single document, and its respective `update object.
In the second variation, we build 100 updateMany operations, each including filter for 10K documents ids, and their respective update.
Results:
updateMany with multiple documents ids is 243% faster than multiple updateOnes, this can not be used everywhere though, please read "The risk" section to learn when it should be used.
Details:
We ran the script 5 times for each variation, the detailed results are as follows:
With updateOne: 51.28 seconds on average.
With updateMany: 21.04 seconds on average.
The risk:
As many people have already pointed out, updateMany is not a direct substitute to updateOne, since it can incorrectly update multiple documents when our intention was to really update only one document.
This approach is only valid when you're using a field that is unique such as _id or any other field that is unique, if the filter is depending on fields that are not unique, multiple documents will be updated and the results will not be equivalent.
65831219.js
// 65831219.js
'use strict';
const mongoose = require('mongoose');
const { Schema } = mongoose;
const DOCUMENTS_COUNT = 1_000_000;
const UPDATE_MANY_OPERATIONS_COUNT = 100;
const MINIMUM_BALANCE = 0;
const MAXIMUM_BALANCE = 100;
const SAMPLES_COUNT = 10;
const bankAccountSchema = new Schema({
balance: { type: Number }
});
const BankAccount = mongoose.model('BankAccount', bankAccountSchema);
mainRunner().catch(console.error);
async function mainRunner () {
for (let i = 0; i < SAMPLES_COUNT; i++) {
await runOneCycle(buildUpdateManyWriteOperations).catch(console.error);
await runOneCycle(buildUpdateOneWriteOperations).catch(console.error);
console.log('-'.repeat(80));
}
process.exit(0);
}
/**
*
* #param {buildUpdateManyWriteOperations|buildUpdateOneWriteOperations} buildBulkWrite
*/
async function runOneCycle (buildBulkWrite) {
await mongoose.connect('mongodb://localhost:27017/test', {
useNewUrlParser: true,
useUnifiedTopology: true
});
await mongoose.connection.dropDatabase();
const { accounts } = await createAccounts({ accountsCount: DOCUMENTS_COUNT });
const { writeOperations } = buildBulkWrite({ accounts });
const writeStartedAt = Date.now();
await BankAccount.bulkWrite(writeOperations);
const writeEndedAt = Date.now();
console.log(`Write operations took ${(writeEndedAt - writeStartedAt) / 1000} seconds with \`${buildBulkWrite.name}\`.`);
}
async function createAccounts ({ accountsCount }) {
const rawAccounts = Array.from({ length: accountsCount }, () => ({ balance: getRandomInteger(MINIMUM_BALANCE, MAXIMUM_BALANCE) }));
const accounts = await BankAccount.insertMany(rawAccounts);
return { accounts };
}
function buildUpdateOneWriteOperations ({ accounts }) {
const writeOperations = shuffleArray(accounts).map((account) => ({
updateOne: {
filter: { _id: account._id },
update: { balance: getRandomInteger(MINIMUM_BALANCE, MAXIMUM_BALANCE) }
}
}));
return { writeOperations };
}
function buildUpdateManyWriteOperations ({ accounts }) {
shuffleArray(accounts);
const accountsChunks = chunkArray(accounts, accounts.length / UPDATE_MANY_OPERATIONS_COUNT);
const writeOperations = accountsChunks.map((accountsChunk) => ({
updateMany: {
filter: { _id: { $in: accountsChunk.map(account => account._id) } },
update: { balance: getRandomInteger(MINIMUM_BALANCE, MAXIMUM_BALANCE) }
}
}));
return { writeOperations };
}
function getRandomInteger (min = 0, max = 1) {
min = Math.ceil(min);
max = Math.floor(max);
return min + Math.floor(Math.random() * (max - min + 1));
}
function shuffleArray (array) {
let currentIndex = array.length;
let temporaryValue;
let randomIndex;
// While there remain elements to shuffle...
while (0 !== currentIndex) {
// Pick a remaining element...
randomIndex = Math.floor(Math.random() * currentIndex);
currentIndex -= 1;
// And swap it with the current element.
temporaryValue = array[currentIndex];
array[currentIndex] = array[randomIndex];
array[randomIndex] = temporaryValue;
}
return array;
}
function chunkArray (array, sizeOfTheChunkedArray) {
const chunked = [];
for (const element of array) {
const last = chunked[chunked.length - 1];
if (!last || last.length === sizeOfTheChunkedArray) {
chunked.push([element]);
} else {
last.push(element);
}
}
return chunked;
}
Output
$ node 65831219.js
Write operations took 20.803 seconds with `buildUpdateManyWriteOperations`.
Write operations took 50.84 seconds with `buildUpdateOneWriteOperations`.
----------------------------------------------------------------------------------------------------
Tests were run using MongoDB version 4.0.4.
At high level, if you have same update object, then you can do updateMany rather than bulkWrite
Reason:
bulkWrite is designed to send multiple different commands to the server as mentioned here
If you have same update object, updateMany is best suited.
Performance:
If you have 10k update commands in bulkWrite, it will be executed batch manner internally. It may impact on the execution time
Exact lines from the reference about batching:
Each group of operations can have at most 1000 operations. If a group exceeds this limit, MongoDB will divide the group into smaller groups of 1000 or less. For example, if the bulk operations list consists of 2000 insert operations, MongoDB creates 2 groups, each with 1000 operations.
Thanks #Alex

Is it a good practice to perform actions to database with one single connection to Mongodb?

I'm only using single connection to MongoDB database in a Node-based project. First, declaring a "db" variable and then performing all database related CRUD operations on that single variable or connection.
Is it a good practice or I need to create multiple connections? What will be consequences?
Following is a rough structure:
var db;
MongoClient.connect(url, (err, database) => {
db = database;
one(db)
})
function one(db) {
// doing something with db
two(db)
}
function two(db) {
// doing something with db
three(db)
five(db)
six(db)
}
function three(db) {
// doing something with db
four(db)
seven(db)
}
and so on....
It is alright to use the same connection to perform all of your queries. Remember that the Mongo Driver for Node.js is asynchronous. That means it will send the queries to the mongod server and continue with the execution of your code without waiting for the results. However, when the server responds with the query results the Mongo Driver will then call your callback function. Therefore all of the heavy workload is on the mongod server not on your node app.
Check out this script that proves this. You can see that everything is done async and the node app can continue with the flow of execution.
var MongoClient = require('mongodb').MongoClient
function testDb(db) {
var documents = []
for(var i = 0; i < 100000; i++)
documents.push({test: 'just testing', exp: [1,2,3]})
var col = db.collection('cart')
console.log('insert the 1st one!')
col.insertMany(documents, {w:1, j:1}, function(err, results) {
console.log('we inserted the 1st documents')
})
console.log('fetch the 2nd one!')
col.find({}).toArray(function(err, results) {
console.log('we got the 2nd result' || err)
})
console.log('fetch the 3rd one!')
col.find({}).toArray(function(err, results) {
console.log('we got the 3rd results' || err)
})
console.log('fetch the 4th one!')
col.find({}).toArray(function(err, results) {
console.log('we got the 4th results' || err)
})
console.log('No more fetches or inserts!')
console.log('-----------------------------------------')
console.log('Starting to do some other work!')
console.log('-----------------------------------------')
var t = []
for(var i = 0; i < 100000; i++)
t.push(i)
console.log('-----------------------------------------')
console.log('Done with the extra work!')
console.log('-----------------------------------------')
}
MongoClient.connect('mongodb://localhost:27017/test', function(err, db) {
testDb(db)
});
This is the output after running that node program:
$bash node test.js
insert the 1st one!
fetch the 2nd one!
fetch the 3rd one!
fetch the 4th one!
No more fetches or inserts!
-----------------------------------------
Starting to do some other work!
-----------------------------------------
-----------------------------------------
Done with the extra work!
-----------------------------------------
we got the 4th results
we got the 3rd results
we got the 2nd result
we inserted the 1st documents

Is there a way to perform a "dry run" of an update operation?

I am in the process of changing the schema for one of my MongoDB collections. (I had been storing dates as strings, and now my application stores them as ISODates; I need to go back and change all of the old records to use ISODates as well.) I think I know how to do this using an update, but since this operation will affect tens of thousands of records I'm hesitant to issue an operation that I'm not 100% sure will work. Is there any way to do a "dry run" of an update that will show me, for a small number of records, the original record and how it would be changed?
Edit: I ended up using the approach of adding a new field to each record, and then (after verifying that the data was right) renaming that field to match the original. It looked like this:
db.events.find({timestamp: {$type: 2}})
.forEach( function (e) {
e.newTimestamp = new ISODate(e.timestamp);
db.events.save(e);
} )
db.events.update({},
{$rename: {'newTimestamp': 'timestamp'}},
{multi: true})
By the way, that method for converting the string times to ISODates was what ended up working. (I got the idea from this SO answer.)
My advice would be to add the ISODate as a new field. Once confirmed that all looks good you could then unset the the string date.
Create a test environment with your database structure. Copy a handful of records to it. Problem solved. Not the solution you were looking for, I'm sure. But, I believe, this is the exact circumstances that a 'test environment' should be used for.
Select ID of particular records that you would like to monitor. place in the update {_id:{$in:[<your monitored id>]}}
Another option which depends of the amount of overhead it will cause you -
You can consider writing a script, that performs the find operation, add printouts or run in debug while the save operation is commented out. Once you've gained confidence you can apply the save operation.
var changesLog = [];
var errorsLog = [];
events.find({timestamp: {$type: 2}}, function (err, events) {
if (err) {
debugger;
throw err;
} else {
for (var i = 0; i < events.length; i++) {
console.log('events' + i +"/"+(candidates.length-1));
var currentEvent = events[i];
var shouldUpdateCandidateData = false;
currentEvent.timestamp = new ISODate(currentEvent.timestamp);
var change = currentEvent._id;
changesLog.push(change);
// // ** Dry Run **
// currentEvent.save(function (err) {
// if (err) {
// debugger;
// errorsLog.push(currentEvent._id + ", " + currentEvent.timeStamp + ', ' + err);
// throw err;
// }
// });
}
console.log('Done');
console.log('Changes:');
console.log(changesLog);
console.log('Errors:');
console.log(errorsLog);
return;
}
});
db.collection.find({"_manager": { $exists: true, $ne: null }}).forEach(
function(doc){
doc['_managers']=[doc._manager]; // String --> List
delete doc['_manager']; // Remove "_managers" key-value pair
printjson(doc); // Debug by output the doc result
//db.teams.save(doc); // Save all the changes into doc data
}
)
In my case the collection contain _manager and I would like to change it to _managers list. I have tested it in my local working as expected.
In the several latest versions of MongoDB (at least starting with 4.2), you could do that using a transaction.
const { MongoClient } = require('mongodb')
async function main({ dryRun }) {
const client = new MongoClient('mongodb://127.0.0.1:27017', {
maxPoolSize: 1
})
const pool = await client.connect()
const db = pool.db('someDB')
const session = pool.startSession()
session.startTransaction()
try {
const filter = { id: 'some-id' }
const update = { $rename: { 'newTimestamp': 'timestamp' } }
// This is the important bit
const options = { session: session }
await db.collection('someCollection').updateMany(
filter,
update,
options // using session
)
const afterUpdate = db.collection('someCollection')
.find(
filter,
options // using session
)
.toArray()
console.debug('updated documents', afterUpdate)
if (dryRun) {
// This will roll back any changes made within the session
await session.abortTransaction()
} else {
await session.commitTransaction()
}
} finally {
await session.endSession()
await pool.close()
}
}
const _ = main({ dryRun: true })