What is mongo $mergeCursors? - mongodb

We see a lot of slow queries in mongo logs like below (with pipeline op mergeCursors). We have a shaded mongo with 2 shards with only primaries. What is mergeCursors command? Please let me know if any other information is required.
{
"_id" : ObjectId("5571b739f65f7e64bb806362"),
"op" : "command",
"ns" : "mongrel.$cmd",
"command" : {
"aggregate" : "collection1",
"pipeline" : [
{
"$mergeCursors" : [
{
"host" : "endpoint:27005",
"id" : NumberLong(82775337156)
}
]
}
]
},
"keyUpdates" : 0,
"numYield" : 0,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(12),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(2),
"w" : NumberLong(2680)
}
},
"responseLength" : 12312,
"millis" : 6142,
"execStats" : {},
"ts" : ISODate("2015-06-05T12:35:40.801Z"),
"client" : "10.167.212.83",
"allUsers" : [],
"user" : ""
}

I was recently reading this post (http://dbattish.tumblr.com/post/108652372056/joins-in-mongodb) which seems to say that it is an internal aggregate command to merge queries across shards.

Related

MongoDB performing slow read-queries under load

I'm running a MongoDB on my VPS. Two instances of the same python bot are constantly reading and writing documents. Essentially, they continously repeat the following steps infinitely:
Get a document using find_and_modify() (no i cannot use find() here)
process data fetched in step 1 (takes 10-60 seconds)
update document using find_and_modify() (no i cannot use update() here)
This worked flawlessly until I deployed a REST API web server to view the results in the browser etc. Now whenever the bot instances and the web server are running at the same time, the bots slow down by minutes and a request to my web server takes 2 minutes. When either the bots or the web server are/is running, they both query the database within milliseconds.
Before I think about upgrading my VPS' RAM and CPU, I would like to understand what exactly causes this. I assume the problem lies in the database being overwhelmed with the amount of requests from the 3 clients (2 bot instances, 1 web server).
I found the command currentOp(), but I'm having issues understanding its return.
> db.currentOp({"secs_running": {"$gte": 5}})
{
"inprog" : [
{
"type" : "op",
"host" : "localhost:27017",
"desc" : "conn1555",
"connectionId" : 1555,
"client" : "127.0.0.1:37750",
"clientMetadata" : {
"driver" : {
"name" : "PyMongo",
"version" : "3.11.3"
},
"os" : {
"type" : "Linux",
"name" : "Linux",
"architecture" : "x86_64",
"version" : "5.4.0-71-generic"
},
"platform" : "CPython 3.8.5.final.0"
},
"active" : true,
"currentOpTime" : "2021-04-27T13:37:56.261+00:00",
"opid" : 3151480,
"lsid" : {
"id" : UUID("71d36512-c5a3-4fe9-b3f9-31fd23e86413"),
"uid" : BinData(0,"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
},
"secs_running" : NumberLong(123),
"microsecs_running" : NumberLong(123430156),
"op" : "command",
"ns" : "nxmain.websites",
"command" : {
"aggregate" : "websites",
"pipeline" : [
{
"$match" : {
}
},
{
"$group" : {
"_id" : 1,
"n" : {
"$sum" : 1
}
}
}
],
"cursor" : {
},
"lsid" : {
"id" : UUID("71d36512-c5a3-4fe9-b3f9-31fd23e86413")
},
"$db" : "nxmain",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"planSummary" : "COLLSCAN",
"numYields" : 934,
"locks" : {
"ReplicationStateTransition" : "w",
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(936)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(936)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(936)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(936)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(2)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
}
},
{
"type" : "op",
"host" : "localhost:27017",
"desc" : "conn1535",
"connectionId" : 1535,
"client" : "127.0.0.1:36886",
"clientMetadata" : {
"driver" : {
"name" : "PyMongo",
"version" : "3.11.3"
},
"os" : {
"type" : "Linux",
"name" : "Linux",
"architecture" : "x86_64",
"version" : "5.4.0-71-generic"
},
"platform" : "CPython 3.8.5.final.0"
},
"active" : true,
"currentOpTime" : "2021-04-27T13:37:56.261+00:00",
"opid" : 3152992,
"secs_running" : NumberLong(7),
"microsecs_running" : NumberLong(7503765),
"op" : "command",
"ns" : "admin.$cmd",
"command" : {
"ismaster" : 1,
"topologyVersion" : {
"processId" : ObjectId("60845741af792cc59a636f20"),
"counter" : NumberLong(0)
},
"maxAwaitTimeMS" : 10000,
"$db" : "admin",
"$readPreference" : {
"mode" : "primary"
}
},
"numYields" : 0,
"waitingForLatch" : {
"timestamp" : ISODate("2021-04-27T13:37:48.858Z"),
"captureName" : "AnonymousLatch"
},
"locks" : {
},
"waitingForLock" : false,
"lockStats" : {
},
"waitingForFlowControl" : false,
"flowControlStats" : {
}
}
],
"ok" : 1
}
As you can see there are two operations in the pipeline. The one with secs_running: 123 is the web server, the other one is one of the bot instances, which takes about 10-20 seconds when the web server is running in parallel.
What could be the cause for my problem and how can I solve it? Thank you in advance.

MongoDB stuck creating index with "Index Build: draining writes received during build" message

I have 4 rows in the test collection:
{ "_id" : ObjectId("5f4ce50e19b13337216dd477"), "test" : 1 }
{ "_id" : ObjectId("5f4ce50e19b13337216dd478"), "test" : 2 }
{ "_id" : ObjectId("5f4ce50e19b13337216dd479"), "test" : 3 }
{ "_id" : ObjectId("5f4ce50e19b13337216dd47a"), "test" : 4 }
After running db.test.createIndex({test:1},{background:1}); to create an index, it just hangs. It was hanging for at least a few hours. Here is what I found in the db.currentOp() about this operation:
{
"type" : "op",
"host" : "HOSTNAME:27017",
"desc" : "IndexBuildsCoordinatorMongod-13",
"active" : true,
"currentOpTime" : "2020-08-31T12:11:13.159+00:00",
"opid" : 8721867,
"secs_running" : NumberLong(20),
"microsecs_running" : NumberLong(20888590),
"op" : "command",
"ns" : "test.test",
"command" : {
"createIndexes" : "test",
"indexes" : [
{
"v" : 2,
"key" : {
"test" : 1
},
"name" : "test_1",
"background" : 1
}
],
"lsid" : {
"id" : UUID("07b43083-8ab9-4bcb-8768-919a3f27655f")
},
"$clusterTime" : {
"clusterTime" : Timestamp(1598875647, 409),
"signature" : {
"hash" : BinData(0,"+/YcdPyQriT8RL1LtFUhxe2BtCE="),
"keyId" : NumberLong("6861636045532823556")
}
},
"$db" : "test"
},
"msg" : "Index Build: draining writes received during build",
"numYields" : 0,
"locks" : {
},
"waitingForLock" : false,
"lockStats" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(6)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(1),
"w" : NumberLong(4)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1),
"w" : NumberLong(4)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(1),
"w" : NumberLong(3),
"W" : NumberLong(1)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(4)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
"acquireCount" : NumberLong(3),
"timeAcquiringMicros" : NumberLong(1)
}
}
This Index Build: draining writes received during build makes no sense since there was no read/writes to the test collection during index creation.
Also index creation hangs only in non-empty collection. Index creates successfully in empty collection.
What might be an issue in this case? I'm out of ideas.
Finally figured it out with the help of MongoDB team.
The node can't communicate with itself so it will hang trying to commit the index build. This was the reason. Adding keyfile fixed the issue:
rm -f mongo.keyfile
openssl rand -base64 756 > mongo.keyfile
chmod 400 mongo.keyfile
bin/mongod --config mongo.conf --keyFile mongo.keyfile
Here are the links to the MongoDB's Jira issues which cover this subject:
https://jira.mongodb.org/browse/SERVER-50665 and
https://jira.mongodb.org/browse/SERVER-48516

mongodb find query getmore operation hangs

We are running MongoDB 3.0.2 on Linux.
The "getmore" operation for a find query periodically hangs.
The operation is available currentOp(), and one item I cannot explain is that all the acquireCount values in lockStats continue to grow while the query hangs (for Global, MMAPV1Journal, Database, and Collection).
This is the operation in question, at this point running for more than 1000 secs, returned by db.currentOp() :
{
"desc" : "conn60",
"threadId" : "0x2a99ee0",
"connectionId" : 60,
"opid" : 67792,
"active" : true,
"secs_running" : 1098,
"microsecs_running" : NumberLong(1098289543),
"op" : "getmore",
"ns" : "dbName.collectionName",
"query" : {
"d" : {
"$gt" : ISODate("2016-03-13T18:00:00.261Z"),
"$lt" : ISODate("2016-03-14T22:45:17.718Z")
},
"cc" : "US",
"dc" : {
"$in" : [
"26",
"31",
"17",
"29",
"35"
]
},
"pr" : {
"$gte" : 4
}
},
"client" : "10.0.0.111:33670",
"numYields" : 317557,
"locks" : {
},
"waitingForLock" : false,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(635114)
}
},
"MMAPV1Journal" : {
"acquireCount" : {
"r" : NumberLong(317557)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(317557)
}
},
"Collection" : {
"acquireCount" : {
"R" : NumberLong(317557)
}
}
}
}
Any insights would be much appreciated ! Thank you in advance.

execStats is always empty in MongoDB "aggregate" commands profiling results

I am trying to profile the performance of an aggregation pipeline, specifically checking whether indices are used, how many objects are scanned, etc.
I'm setting the DB to full profiling:
db.setProfilingLevel(2)
But then in the db's 'system.profile' collection, in the result record for the aggregation command, the execStats is always empty.
Here is the full result for the command:
{
"op" : "command",
"ns" : "mydb.$cmd",
"command" : {
"aggregate" : "mycolection",
"pipeline" : [{
"$match" : {
"date" : {
"$gte" : "2013-11-26"
}
}
}, {
"$sort" : {
"user_id" : 1
}
}, {
"$project" : {
"user_id" : 1,
"_id" : 0
}
}, {
"$group" : {
"_id" : "$user_id",
"agg_val" : {
"$sum" : 1
}
}
}],
"allowDiskUse" : true
},
"keyUpdates" : 0,
"numYield" : 16,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(3143653),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(140),
"w" : NumberLong(3)
}
},
"responseLength" : 4990,
"millis" : 3237,
"execStats" : { },
"ts" : ISODate("2014-11-26T16:20:59.576Z"),
"client" : "127.0.0.1",
"allUsers" : [],
"user" : ""
}
Support execStats for aggregation command was added in mongo 3.4.

Aggregation framework performance on a 10M collection

I have a collection of 10M documents, that is a pre-aggregation of daily events.
A simple $group took more than 8s, is this performance normal ?
Some date from the profiler :
{
"op" : "command",
"ns" : "analytics.$cmd",
"command" : {
"aggregate" : "aggregation",
"pipeline" : [
{
"$group" : {
"_id" : "",
"hits" : {
"$sum" : "$hits"
}
}
}
]
},
"ntoreturn" : 1,
"keyUpdates" : 0,
"numYield" : 15,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(17169805),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(8582619),
"w" : NumberLong(294)
}
},
"responseLength" : 78,
"millis" : 8594,
"ts" : ISODate("2013-12-04T15:57:38.217Z"),
"client" : "127.0.0.1",
"allUsers" : [ ],
"user" : ""
}
Here is one single document
{
"_id" : ObjectId("529e21ee67e807418500daeb"),
"date" : ISODate("2012-09-19T00:00:00Z"),
"hits" : 1,
"infos" : {
"sourceValue" : NumberLong(1),
"eventType" : "createUser",
"sourceType" : "user",
"instance" : "xxx",
"targetType" : "user",
"targetValue" : NumberLong(15)
}
}