mongodb sharded cluster error 14037 - mongodb

I'm creating a shared cluster following the official tutorial using three configuration servers, three server in the replica set and using a mongos client, but when i try to create a collection with
db.createCollection("XYZ")
I get
/* 1 */
{
"ok" : 0.0,
"errmsg" : "can't create user databases on a --configsvr instance",
"code" : 14037,
"codeName" : "Location14037"
}
My server status is
/* 1 */
{
"host" : "mongo-1",
"version" : "3.4.1",
"process" : "mongos",
"pid" : NumberLong(1),
"uptime" : 16325.0,
"uptimeMillis" : NumberLong(16324905),
"uptimeEstimate" : NumberLong(16324),
"localTime" : ISODate("2017-01-26T02:04:32.110Z"),
"asserts" : {
"regular" : 0,
"warning" : 0,
"msg" : 0,
"user" : 0,
"rollovers" : 0
},
"connections" : {
"current" : 4,
"available" : 419426,
"totalCreated" : 23
},
"extra_info" : {
"note" : "fields vary by platform",
"page_faults" : 0
},
"network" : {
"bytesIn" : NumberLong(70779),
"bytesOut" : NumberLong(106181),
"physicalBytesIn" : NumberLong(70779),
"physicalBytesOut" : NumberLong(106181),
"numRequests" : NumberLong(1865)
},
"opcounters" : {
"insert" : 0,
"query" : 54,
"update" : 0,
"delete" : 0,
"getmore" : 0,
"command" : 864
},
"sharding" : {
"configsvrConnectionString" : "production/10.7.0.28:27019,10.7.0.29:27019,10.7.0.30:27019",
"lastSeenConfigServerOpTime" : {
"ts" : Timestamp(6379728405545353, 1),
"t" : NumberLong(2)
}
},
"tcmalloc" : {
"generic" : {
"current_allocated_bytes" : 2719976,
"heap_size" : 6291456
},
"tcmalloc" : {
"pageheap_free_bytes" : 167936,
"pageheap_unmapped_bytes" : 0,
"max_total_thread_cache_bytes" : 1045430272,
"current_total_thread_cache_bytes" : 777824,
"total_free_bytes" : 3403544,
"central_cache_free_bytes" : 194040,
"transfer_cache_free_bytes" : 2431680,
"thread_cache_free_bytes" : 777824,
"aggressive_memory_decommit" : 0,
"formattedString" : "------------------------------------------------\nMALLOC: 2719976 ( 2.6 MiB) Bytes in use by application\nMALLOC: + 167936 ( 0.2 MiB) Bytes in page heap freelist\nMALLOC: + 194040 ( 0.2 MiB) Bytes in central cache freelist\nMALLOC: + 2431680 ( 2.3 MiB) Bytes in transfer cache freelist\nMALLOC: + 777824 ( 0.7 MiB) Bytes in thread cache freelists\nMALLOC: + 1171648 ( 1.1 MiB) Bytes in malloc metadata\nMALLOC: ------------\nMALLOC: = 7463104 ( 7.1 MiB) Actual memory used (physical + swap)\nMALLOC: + 0 ( 0.0 MiB) Bytes released to OS (aka unmapped)\nMALLOC: ------------\nMALLOC: = 7463104 ( 7.1 MiB) Virtual address space used\nMALLOC:\nMALLOC: 508 Spans in use\nMALLOC: 24 Thread heaps in use\nMALLOC: 4096 Tcmalloc page size\n------------------------------------------------\nCall ReleaseFreeMemory() to release freelist memory to the OS (via madvise()).\nBytes released to the OS take up virtual address space but no physical memory.\n"
}
},
"mem" : {
"bits" : 64,
"resident" : 28,
"virtual" : 228,
"supported" : true
},
"metrics" : {
"cursor" : {
"timedOut" : NumberLong(0),
"open" : {
"multiTarget" : NumberLong(0),
"singleTarget" : NumberLong(0),
"pinned" : NumberLong(0),
"total" : NumberLong(0)
}
},
"commands" : {
"addShard" : {
"failed" : NumberLong(0),
"total" : NumberLong(3)
},
"aggregate" : {
"failed" : NumberLong(0),
"total" : NumberLong(12)
},
"buildInfo" : {
"failed" : NumberLong(0),
"total" : NumberLong(14)
},
"create" : {
"failed" : NumberLong(9),
"total" : NumberLong(9)
},
"enableSharding" : {
"failed" : NumberLong(0),
"total" : NumberLong(1)
},
"find" : {
"failed" : NumberLong(0),
"total" : NumberLong(54)
},
"grantRolesToUser" : {
"failed" : NumberLong(7),
"total" : NumberLong(10)
},
"isMaster" : {
"failed" : NumberLong(0),
"total" : NumberLong(48)
},
"listCollections" : {
"failed" : NumberLong(0),
"total" : NumberLong(19)
},
"ping" : {
"failed" : NumberLong(0),
"total" : NumberLong(618)
},
"replSetGetStatus" : {
"failed" : NumberLong(14),
"total" : NumberLong(14)
},
"revokeRolesFromUser" : {
"failed" : NumberLong(0),
"total" : NumberLong(1)
},
"saslContinue" : {
"failed" : NumberLong(0),
"total" : NumberLong(62)
},
"saslStart" : {
"failed" : NumberLong(0),
"total" : NumberLong(31)
},
"serverStatus" : {
"failed" : NumberLong(0),
"total" : NumberLong(2)
},
"usersInfo" : {
"failed" : NumberLong(0),
"total" : NumberLong(8)
},
"whatsmyuri" : {
"failed" : NumberLong(0),
"total" : NumberLong(12)
}
}
},
"ok" : 1.0
}
And sharded status
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("58891625f1d4d70889a9787b")
}
shards:
{ "_id" : "production", "host" : "production/10.7.0.14:27018,10.7.0.16:27018,10.7.0.9:27018", "state" : 1 }
active mongoses:
"3.4.1" : 1
balancer:
Currently enabled: yes
Currently running: yes
Balancer lock taken at Wed Jan 25 2017 17:20:29 GMT-0400 (VET) by ConfigServer:Balancer
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
databases:
{ "_id" : "base", "primary" : "production", "partitioned" : true }
What am I doing wrong?
Thanks in advance.

Answering my own question and its really dummy the issue. The thing is that because config servers are a replica set the names between that replica set and shard replica set need to be unique, in my case where not and that cause the issue.

Related

mongodb: restore replicaset after kubernetes scaling down

I configured a replicatset correctly.
After having scaled down mongodb kubernetes pods, replicat set truned out to invalid status:
> rs.status();
{
"ok" : 0,
"errmsg" : "Our replica set config is invalid or we are not a member of it",
"code" : 93,
"codeName" : "InvalidReplicaSetConfig"
}
My configuration is:
> rs.config();
{
"_id" : "rs0",
"version" : 3,
"term" : 2,
"protocolVersion" : NumberLong(1),
"writeConcernMajorityJournalDefault" : true,
"members" : [
{
"_id" : 0,
"host" : "mongors-0.mongors-service.hes-all.svc:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 1,
"host" : "mongors-1.mongors-service.hes-all.svc:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 10,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("626fb63f211511c4dcf938ac")
}
}
configuration details seem right, but when I run rs.initiate, or rs.reconfig(cfg):
> rs.reconfig(config);
{
"topologyVersion" : {
"processId" : ObjectId("6347bdffe3c3303e6f325b9a"),
"counter" : NumberLong(1)
},
"ok" : 0,
"errmsg" : "New config is rejected :: caused by :: replSetReconfig should only be run on a writable PRIMARY. Current state REMOVED;",
"code" : 10107,
"codeName" : "NotWritablePrimary"
}
> rs.initiate();
{
"ok" : 0,
"errmsg" : "already initialized",
"code" : 23,
"codeName" : "AlreadyInitialized"
}
Any ideas?

MongoDB performing slow read-queries under load

I'm running a MongoDB on my VPS. Two instances of the same python bot are constantly reading and writing documents. Essentially, they continously repeat the following steps infinitely:
Get a document using find_and_modify() (no i cannot use find() here)
process data fetched in step 1 (takes 10-60 seconds)
update document using find_and_modify() (no i cannot use update() here)
This worked flawlessly until I deployed a REST API web server to view the results in the browser etc. Now whenever the bot instances and the web server are running at the same time, the bots slow down by minutes and a request to my web server takes 2 minutes. When either the bots or the web server are/is running, they both query the database within milliseconds.
Before I think about upgrading my VPS' RAM and CPU, I would like to understand what exactly causes this. I assume the problem lies in the database being overwhelmed with the amount of requests from the 3 clients (2 bot instances, 1 web server).
I found the command currentOp(), but I'm having issues understanding its return.
> db.currentOp({"secs_running": {"$gte": 5}})
{
"inprog" : [
{
"type" : "op",
"host" : "localhost:27017",
"desc" : "conn1555",
"connectionId" : 1555,
"client" : "127.0.0.1:37750",
"clientMetadata" : {
"driver" : {
"name" : "PyMongo",
"version" : "3.11.3"
},
"os" : {
"type" : "Linux",
"name" : "Linux",
"architecture" : "x86_64",
"version" : "5.4.0-71-generic"
},
"platform" : "CPython 3.8.5.final.0"
},
"active" : true,
"currentOpTime" : "2021-04-27T13:37:56.261+00:00",
"opid" : 3151480,
"lsid" : {
"id" : UUID("71d36512-c5a3-4fe9-b3f9-31fd23e86413"),
"uid" : BinData(0,"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
},
"secs_running" : NumberLong(123),
"microsecs_running" : NumberLong(123430156),
"op" : "command",
"ns" : "nxmain.websites",
"command" : {
"aggregate" : "websites",
"pipeline" : [
{
"$match" : {
}
},
{
"$group" : {
"_id" : 1,
"n" : {
"$sum" : 1
}
}
}
],
"cursor" : {
},
"lsid" : {
"id" : UUID("71d36512-c5a3-4fe9-b3f9-31fd23e86413")
},
"$db" : "nxmain",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"planSummary" : "COLLSCAN",
"numYields" : 934,
"locks" : {
"ReplicationStateTransition" : "w",
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(936)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(936)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(936)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(936)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(2)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
}
},
{
"type" : "op",
"host" : "localhost:27017",
"desc" : "conn1535",
"connectionId" : 1535,
"client" : "127.0.0.1:36886",
"clientMetadata" : {
"driver" : {
"name" : "PyMongo",
"version" : "3.11.3"
},
"os" : {
"type" : "Linux",
"name" : "Linux",
"architecture" : "x86_64",
"version" : "5.4.0-71-generic"
},
"platform" : "CPython 3.8.5.final.0"
},
"active" : true,
"currentOpTime" : "2021-04-27T13:37:56.261+00:00",
"opid" : 3152992,
"secs_running" : NumberLong(7),
"microsecs_running" : NumberLong(7503765),
"op" : "command",
"ns" : "admin.$cmd",
"command" : {
"ismaster" : 1,
"topologyVersion" : {
"processId" : ObjectId("60845741af792cc59a636f20"),
"counter" : NumberLong(0)
},
"maxAwaitTimeMS" : 10000,
"$db" : "admin",
"$readPreference" : {
"mode" : "primary"
}
},
"numYields" : 0,
"waitingForLatch" : {
"timestamp" : ISODate("2021-04-27T13:37:48.858Z"),
"captureName" : "AnonymousLatch"
},
"locks" : {
},
"waitingForLock" : false,
"lockStats" : {
},
"waitingForFlowControl" : false,
"flowControlStats" : {
}
}
],
"ok" : 1
}
As you can see there are two operations in the pipeline. The one with secs_running: 123 is the web server, the other one is one of the bot instances, which takes about 10-20 seconds when the web server is running in parallel.
What could be the cause for my problem and how can I solve it? Thank you in advance.

MongoDB query to get CPU usage

Using mongodb, I know that I can use the command
db.serverStatus()
Which will return a lot of information about the current mongo instance, including memory information:
"mem" : {
"bits" : 64,
"resident" : 4303,
"virtual" : 7390,
...
}
Is there anything similar, or anything in this output that I may be missing, that will also report CPU usage details?
i.e.
"cpu" : {
"usr" : 32,
"wa" : 16,
"id" : 52
}
You could try top command and check if the output gives you necessary information. Switch to admin database and issue:
db.runCommand( { top: 1 } )
{
"totals" : {
"note" : "all times in microseconds",
"Orders.orders" : {
"total" : {
"time" : 107211,
"count" : 56406
},
"readLock" : {
"time" : 107205,
"count" : 56405
},
"writeLock" : {
"time" : 6,
"count" : 1
},
"queries" : {
"time" : 105,
"count" : 1
},
"getmore" : {
"time" : 0,
"count" : 0
},
"insert" : {
"time" : 0,
"count" : 0
},
"update" : {
"time" : 0,
"count" : 0
},
"remove" : {
"time" : 0,
"count" : 0
},
"commands" : {
"time" : 0,
"count" : 0
}
},.... rest clipped as it gives per db stats

Aggregation framework performance on a 10M collection

I have a collection of 10M documents, that is a pre-aggregation of daily events.
A simple $group took more than 8s, is this performance normal ?
Some date from the profiler :
{
"op" : "command",
"ns" : "analytics.$cmd",
"command" : {
"aggregate" : "aggregation",
"pipeline" : [
{
"$group" : {
"_id" : "",
"hits" : {
"$sum" : "$hits"
}
}
}
]
},
"ntoreturn" : 1,
"keyUpdates" : 0,
"numYield" : 15,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(17169805),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(8582619),
"w" : NumberLong(294)
}
},
"responseLength" : 78,
"millis" : 8594,
"ts" : ISODate("2013-12-04T15:57:38.217Z"),
"client" : "127.0.0.1",
"allUsers" : [ ],
"user" : ""
}
Here is one single document
{
"_id" : ObjectId("529e21ee67e807418500daeb"),
"date" : ISODate("2012-09-19T00:00:00Z"),
"hits" : 1,
"infos" : {
"sourceValue" : NumberLong(1),
"eventType" : "createUser",
"sourceType" : "user",
"instance" : "xxx",
"targetType" : "user",
"targetValue" : NumberLong(15)
}
}

Output of my high lockTime and high page_faults value also

we are using mongodb for our collections .
This is my stats of my development server .
My concerns are that there is high lockTime on my development server and also there are high page_faults value also .
Please tell me how can we resolve this ??
PRIMARY> db.serverStatus()
{
"host" : "ubsc-aio:27018",
"version" : "2.0.4",
"process" : "mongod",
"uptime" : 3278692,
"uptimeEstimate" : 3098346,
"localTime" : ISODate("2013-08-30T10:55:06.997Z"),
"globalLock" : {
"totalTime" : 3278692551767,
"lockTime" : 139516930214,
"ratio" : 0.0425526114483682,
"currentQueue" : {
"total" : 0,
"readers" : 0,
"writers" : 0
},
"activeClients" : {
"total" : 1,
"readers" : 1,
"writers" : 0
}
},
"mem" : {
"bits" : 64,
"resident" : 2009,
"virtual" : 23455,
"supported" : true,
"mapped" : 11420,
"mappedWithJournal" : 22840
},
"connections" : {
"current" : 162,
"available" : 7838
},
"extra_info" : {
"note" : "fields vary by platform",
"heap_usage_bytes" : 3645040,
"page_faults" : 4147570
},
"indexCounters" : {
"btree" : {
"accesses" : 902898,
"hits" : 901095,
"misses" : 1803,
"resets" : 0,
"missRatio" : 0.0019969033046922245
}
},
"backgroundFlushing" : {
"flushes" : 54639,
"total_ms" : 36709498,
"average_ms" : 671.8552316111203,
"last_ms" : 81,
"last_finished" : ISODate("2013-08-30T10:54:43.013Z")
},
"cursors" : {
"totalOpen" : 1,
"clientCursors_size" : 1,
"timedOut" : 7
},
"network" : {
"bytesIn" : 77779294441,
"bytesOut" : 314231714161,
"numRequests" : 189861092
},
"repl" : {
"setName" : "at",
"ismaster" : true,
"secondary" : false,
"hosts" : [
"localhost:27018",
"localhost:27017"
],
"arbiters" : [
"localhost:27019"
],
"primary" : "localhost:27018",
"me" : "localhost:27018"
},
"opcounters" : {
"insert" : 303294,
"query" : 133717078,
"update" : 59123588,
"delete" : 234256,
"getmore" : 48037783,
"command" : 125805489
},
"asserts" : {
"regular" : 0,
"warning" : 0,
"msg" : 0,
"user" : 16576,
"rollovers" : 0
},
"writeBacksQueued" : false,
"dur" : {
"commits" : 28,
"journaledMB" : 0.08192,
"writeToDataFilesMB" : 0.116123,
"compression" : 0.6743163821345669,
"commitsInWriteLock" : 0,
"earlyCommits" : 0,
"timeMs" : {
"dt" : 3000,
"prepLogBuffer" : 0,
"writeToJournal" : 25,
"writeToDataFiles" : 2,
"remapPrivateView" : 1
}
},
"ok" : 1
}