Error code 18 AuthenticationFailed when add Secondary in replica set MongoDB - mongodb

I received the error below:
{"t":{"$date":"2022-12-24T11:00:54.895+00:00"},"s":"I", "c":"NETWORK", "id":4712102, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Host failed in replica set","attr":{"replicaSet":"{Replset_name}","host":"{VPS_IP}:27019","error":{"code":18,"codeName":"AuthenticationFailed","errmsg":"Authentication failed."},"action":{"dropConnections":false,"requestImmediateCheck
That error is shown in mongod.log when I add Secondary in the existing replica set MongoDB between different docker containers in 2 server machines.
My replica set structure includes the following:
Primary on VPS1_DO:container1 (actived) (same overlay-network)
Secondary1 on VPS1_DO:container2 (actived) (same overlay-network)
Secondary2 on VPS2_Azure:container1 (error)
Details in rs.status()
"members" : [
{
"_id" : 0,
"name" : "10.5.0.11(staticIP-container1):27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 258904,
"optime" : {
"ts" : Timestamp(1672026076, 1),
"t" : NumberLong(67)
},
"optimeDurable" : {
"ts" : Timestamp(1672026076, 1),
"t" : NumberLong(67)
},
"optimeDate" : ISODate("2022-12-26T03:41:16Z"),
"optimeDurableDate" : ISODate("2022-12-26T03:41:16Z"),
"lastAppliedWallTime" : ISODate("2022-12-26T03:41:16.739Z"),
"lastDurableWallTime" : ISODate("2022-12-26T03:41:16.739Z"),
"lastHeartbeat" : ISODate("2022-12-26T03:41:17.962Z"),
"lastHeartbeatRecv" : ISODate("2022-12-26T03:41:18.521Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "10.5.0.12:27017",
"syncSourceId" : 1,
"infoMessage" : "",
"configVersion" : 17,
"configTerm" : 67
},
{
"_id" : 1,
"name" : "10.5.0.12(staticIP-container2):27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 589529,
"optime" : {
"ts" : Timestamp(1672026076, 1),
"t" : NumberLong(67)
},
"optimeDate" : ISODate("2022-12-26T03:41:16Z"),
"lastAppliedWallTime" : ISODate("2022-12-26T03:41:16.739Z"),
"lastDurableWallTime" : ISODate("2022-12-26T03:41:16.739Z"),
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1671767185, 1),
"electionDate" : ISODate("2022-12-23T03:46:25Z"),
"configVersion" : 17,
"configTerm" : 67,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 2,
"name" : "178.128.xx.xxx(IP-VPS):27019",
"health" : 0,
"state" : 6,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastAppliedWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastDurableWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2022-12-26T03:41:17.427Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"authenticated" : false,
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1,
"configTerm" : -1
}
],
And I ensure that I follow some rules:
Same MongoDB version (5.0.6)
Same mongod.conf file (same replSet name)
Same keyFile and chmod 400 (read-only)
Can ping each order (open port ICMP inbound)
Many thanks !!

Related

Mongo cannot write to primary

I have 3 node mongo with 1 primary and 2 secondary. One of my secondary node is down but the primary and the other secondary node is still up. The problem is now i cannot write to the primary. what is the reason as primary is still up but read operations are working. I am using connection string like
"mongodb://custom-pvc-mongodb-0.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017/admin"
the below is my rs.status()
{
"set" : "custom-pvc-mongodb",
"date" : ISODate("2022-12-07T12:04:12.543Z"),
"myState" : 1,
"term" : NumberLong(3),
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 2,
"votingMembersCount" : 3,
"writableVotingMembersCount" : 3,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1670301216, 1),
"t" : NumberLong(3)
},
"lastCommittedWallTime" : ISODate("2022-12-06T04:33:36.252Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1670301216, 1),
"t" : NumberLong(3)
},
"appliedOpTime" : {
"ts" : Timestamp(1670414650, 1),
"t" : NumberLong(3)
},
"durableOpTime" : {
"ts" : Timestamp(1670414650, 1),
"t" : NumberLong(3)
},
"lastAppliedWallTime" : ISODate("2022-12-07T12:04:10.025Z"),
"lastDurableWallTime" : ISODate("2022-12-07T12:04:10.025Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1670301216, 1),
"electionCandidateMetrics" : {
"lastElectionReason" : "electionTimeout",
"lastElectionDate" : ISODate("2022-11-25T07:35:27.387Z"),
"electionTerm" : NumberLong(3),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1669361650, 1),
"t" : NumberLong(1)
},
"numVotesNeeded" : 2,
"priorityAtElection" : 1,
"electionTimeoutMillis" : NumberLong(10000),
"numCatchUpOps" : NumberLong(0),
"newTermStartDate" : ISODate("2022-11-25T07:35:27.410Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2022-11-25T07:35:28.101Z")
},
"members" : [
{
"_id" : 0,
"name" : "custom-pvc-mongodb-0.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 1052969,
"optime" : {
"ts" : Timestamp(1670414650, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2022-12-07T12:04:10Z"),
"lastAppliedWallTime" : ISODate("2022-12-07T12:04:10.025Z"),
"lastDurableWallTime" : ISODate("2022-12-07T12:04:10.025Z"),
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1669361727, 1),
"electionDate" : ISODate("2022-11-25T07:35:27Z"),
"configVersion" : 1,
"configTerm" : 3,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "custom-pvc-mongodb-1.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastAppliedWallTime" : ISODate("2022-12-06T04:33:36.252Z"),
"lastDurableWallTime" : ISODate("2022-12-06T04:33:36.252Z"),
"lastHeartbeat" : ISODate("2022-12-07T12:04:11.496Z"),
"lastHeartbeatRecv" : ISODate("2022-12-06T04:33:41.424Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Error connecting to custom-pvc-mongodb-1.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017 :: caused by :: Could not find address for custom-pvc-mongodb-1.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017: SocketException: Host not found (authoritative)",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 1,
"configTerm" : 3
},
{
"_id" : 2,
"name" : "custom-pvc-mongodb-2.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1052905,
"optime" : {
"ts" : Timestamp(1670301216, 1),
"t" : NumberLong(3)
},
"optimeDurable" : {
"ts" : Timestamp(1670301216, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2022-12-06T04:33:36Z"),
"optimeDurableDate" : ISODate("2022-12-06T04:33:36Z"),
"lastAppliedWallTime" : ISODate("2022-12-06T04:33:36.252Z"),
"lastDurableWallTime" : ISODate("2022-12-06T04:33:36.252Z"),
"lastHeartbeat" : ISODate("2022-12-07T12:04:12.099Z"),
"lastHeartbeatRecv" : ISODate("2022-12-06T17:34:23.114Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 1,
"configTerm" : 3
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1670414650, 1),
"signature" : {
"hash" : BinData(0,"kmVIlznkue1ffD6Se8Ztbzc22j0="),
"keyId" : NumberLong("7169853219500195844")
}
},
"operationTime" : Timestamp(1670414650, 1)
}
You need to connect to entire ReplicaSet, not to a single node. Connection string would be this:
mongodb://custom-pvc-mongodb-0.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017,custom-pvc-mongodb-1.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017,custom-pvc-mongodb-2.custom-pvc-mongodb-svc.mongo-3033.svc.cluster.local:27017/admin?replicaSet=custom-pvc-mongodb
At connection it will automatically connect to the PRIMARY member. Switchover to new PRIMARY should be done automatically. You may add &readPreference=primaryPreferred

How to replicate data between original replicaset and new replicaset

I have mongodb replicaset installed on my original host (k8s cluster 1) and i have just installed a new mongodb replicaset on the new host (k8s cluster 2).
My goal is to replicate the data from the original host to the new host, so that i can shutdown the old host.
There is a blog post that im trying to follow to achieve this however im having some trouble. (reference: https://mschmitt.org/blog/mongodb-migration-replicaset/)
original_host: aadfad22ca65e4ff09de37179f961d5b-.us-east-2.elb.amazonaws.com:27017
new_host: ab775d626d81742478af2744923e2ec6-.us-east-2.elb.amazonaws.com:27017
ATTEMPT
I tried to step down the new host with rs.stepDown({force:true}). However it fails....
rs.stepDown({force:true})
{
"ok" : 0,
"errmsg" : "No electable secondaries caught up as of 2022-05-24T08:50:24.809+00:00. Please use the replSetStepDown command with the argument {force: true} to force node to step down.",
"code" : 262,
"codeName" : "ExceededTimeLimit",
"$clusterTime" : {
"clusterTime" : Timestamp(1653382215, 1),
"signature" : {
"hash" : BinData(0,"qugjVF4xVS8+MNYlCgkK+0/Jt1o="),
"keyId" : NumberLong("7100922576402120709")
}
},
"operationTime" : Timestamp(1653382211, 1)
}
I tried to add the new_host as a rs member to the original rs using rs.add( { host: "ab775d626d81742478af2744923e2ec6-<redacted>.us-east-2.elb.amazonaws.com:27017", priority: 0, votes: 0 } ) However it fails with replica set IDs do not match
{
"_id" : 2,
"name" : "ab775d626d81742478af2744923e2ec6-<redacted>.us-east-2.elb.amazonaws.com:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastAppliedWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastDurableWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2022-05-24T08:52:21.889Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "replica set IDs do not match, ours: 628c90e7c07f5017faff8b75; remote node's: 628b8b76ab0cdc7f9158b23b",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1,
"configTerm" : -1
}
original replicaset config
rs0:PRIMARY> rs.status()
{
"set" : "rs0",
"date" : ISODate("2022-05-24T08:45:48.912Z"),
"myState" : 1,
"term" : NumberLong(2),
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 1,
"votingMembersCount" : 2,
"writableVotingMembersCount" : 1,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1653381945, 1),
"t" : NumberLong(2)
},
"lastCommittedWallTime" : ISODate("2022-05-24T08:45:45.660Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1653381945, 1),
"t" : NumberLong(2)
},
"appliedOpTime" : {
"ts" : Timestamp(1653381945, 1),
"t" : NumberLong(2)
},
"durableOpTime" : {
"ts" : Timestamp(1653381945, 1),
"t" : NumberLong(2)
},
"lastAppliedWallTime" : ISODate("2022-05-24T08:45:45.660Z"),
"lastDurableWallTime" : ISODate("2022-05-24T08:45:45.660Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1653381945, 1),
"electionCandidateMetrics" : {
"lastElectionReason" : "electionTimeout",
"lastElectionDate" : ISODate("2022-05-24T08:01:45.606Z"),
"electionTerm" : NumberLong(2),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1653379303, 15),
"t" : NumberLong(1)
},
"numVotesNeeded" : 1,
"priorityAtElection" : 5,
"electionTimeoutMillis" : NumberLong(10000),
"newTermStartDate" : ISODate("2022-05-24T08:01:45.609Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2022-05-24T08:01:45.611Z")
},
"members" : [
{
"_id" : 0,
"name" : "aadfad22ca65e4ff09de37179f961d5b-<redacted>.us-east-2.elb.amazonaws.com:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 2644,
"optime" : {
"ts" : Timestamp(1653381945, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2022-05-24T08:45:45Z"),
"lastAppliedWallTime" : ISODate("2022-05-24T08:45:45.660Z"),
"lastDurableWallTime" : ISODate("2022-05-24T08:45:45.660Z"),
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1653379305, 1),
"electionDate" : ISODate("2022-05-24T08:01:45Z"),
"configVersion" : 7,
"configTerm" : 2,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "mongodb-staging-arbiter-0.mongodb-staging-arbiter-headless.staging.svc.cluster.local:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 2627,
"lastHeartbeat" : ISODate("2022-05-24T08:45:47.161Z"),
"lastHeartbeatRecv" : ISODate("2022-05-24T08:45:47.763Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 7,
"configTerm" : 2
},
{
"_id" : 2,
"name" : "ab775d626d81742478af2744923e2ec6-<redacted>.us-east-2.elb.amazonaws.com:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastAppliedWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastDurableWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2022-05-24T08:45:47.280Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "replica set IDs do not match, ours: 628c90e7c07f5017faff8b75; remote node's: 628b8b76ab0cdc7f9158b23b",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1,
"configTerm" : -1
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1653381945, 1),
"signature" : {
"hash" : BinData(0,"1GVq5OEAV3YzQmhPHuzQPVwC7+c="),
"keyId" : NumberLong("7101210034268274693")
}
},
"operationTime" : Timestamp(1653381945, 1)
}
new replicaset config
{
"set" : "rs0",
"date" : ISODate("2022-05-24T08:45:52.621Z"),
"myState" : 1,
"term" : NumberLong(6),
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 1,
"votingMembersCount" : 2,
"writableVotingMembersCount" : 1,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1653381951, 1),
"t" : NumberLong(6)
},
"lastCommittedWallTime" : ISODate("2022-05-24T08:45:51.410Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1653381951, 1),
"t" : NumberLong(6)
},
"appliedOpTime" : {
"ts" : Timestamp(1653381951, 1),
"t" : NumberLong(6)
},
"durableOpTime" : {
"ts" : Timestamp(1653381951, 1),
"t" : NumberLong(6)
},
"lastAppliedWallTime" : ISODate("2022-05-24T08:45:51.410Z"),
"lastDurableWallTime" : ISODate("2022-05-24T08:45:51.410Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1653381921, 1),
"electionCandidateMetrics" : {
"lastElectionReason" : "electionTimeout",
"lastElectionDate" : ISODate("2022-05-24T08:42:31.402Z"),
"electionTerm" : NumberLong(6),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1653381732, 1),
"t" : NumberLong(5)
},
"numVotesNeeded" : 1,
"priorityAtElection" : 5,
"electionTimeoutMillis" : NumberLong(10000),
"newTermStartDate" : ISODate("2022-05-24T08:42:31.405Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2022-05-24T08:42:31.407Z")
},
"members" : [
{
"_id" : 0,
"name" : "ab775d626d81742478af2744923e2ec6-<redacted>.us-east-2.elb.amazonaws.com:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 204,
"optime" : {
"ts" : Timestamp(1653381951, 1),
"t" : NumberLong(6)
},
"optimeDate" : ISODate("2022-05-24T08:45:51Z"),
"lastAppliedWallTime" : ISODate("2022-05-24T08:45:51.410Z"),
"lastDurableWallTime" : ISODate("2022-05-24T08:45:51.410Z"),
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1653381751, 1),
"electionDate" : ISODate("2022-05-24T08:42:31Z"),
"configVersion" : 4,
"configTerm" : 6,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "mongodb-prod-arbiter-0.mongodb-prod-arbiter-headless.mongodb.svc.cluster.local:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 187,
"lastHeartbeat" : ISODate("2022-05-24T08:45:50.898Z"),
"lastHeartbeatRecv" : ISODate("2022-05-24T08:45:52.493Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 4,
"configTerm" : 6
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1653381951, 1),
"signature" : {
"hash" : BinData(0,"XYfuUbFoIwFfLZx3FqPOAD+CU44="),
"keyId" : NumberLong("7100922576402120709")
}
},
"operationTime" : Timestamp(1653381951, 1)
}
Drop all data from new hosts, i.e. stop mongod, delete all files from dbPath and start again.
Then simply add new hosts as new member, i.e. rs.add("ab775d626d81742478af2744923e2ec6-.us-east-2.elb.amazonaws.com:27017") Once you added them, an initial sync is running. Check sync with rs.status(), it may take some time.
When the new members are in state SECONDARY and rs.printSecondaryReplicationInfo() shows
rs.printSecondaryReplicationInfo()
source: "aadfad22ca65e4ff09de37179f961d5b-<redacted>.us-east-2.elb.amazonaws.com:27017"
syncedTo: Tue May 24 2022 11:23:23 GMT+0200 (CEST)
0 secs (0 hrs) behind the primary
(1-2 sec. behind the primary is also normal) then you can remove the old host from replica set with rs.remove('aadfad22ca65e4ff09de37179f961d5b-.us-east-2.elb.amazonaws.com:27017')
Finally you can stop mongod on old hosts and remove everything from there.

Skewed Read Load on Mongo Replica Set

I have set up a mongo replica-set with one primary and two secondaries. The problem that I am facing is that the reads from application servers which are connecting with replica-set connection URL are invariably going to only one secondary thereby causing a huge skew in read load between the two secondaries.
Due to this skew, I am constrained for resources on one server while the resources on the other are getting wasted.
rs.status()
{
"set" : "rs0",
"date" : ISODate("2020-09-08T19:39:20.394Z"),
"myState" : 1,
"term" : NumberLong(16),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 2,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1599593958, 2042),
"t" : NumberLong(16)
},
"lastCommittedWallTime" : ISODate("2020-09-08T19:39:18.908Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1599593958, 2042),
"t" : NumberLong(16)
},
"readConcernMajorityWallTime" : ISODate("2020-09-08T19:39:18.908Z"),
"appliedOpTime" : {
"ts" : Timestamp(1599593959, 1176),
"t" : NumberLong(16)
},
"durableOpTime" : {
"ts" : Timestamp(1599593958, 2042),
"t" : NumberLong(16)
},
"lastAppliedWallTime" : ISODate("2020-09-08T19:39:19.138Z"),
"lastDurableWallTime" : ISODate("2020-09-08T19:39:18.908Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1599593936, 300),
"lastStableCheckpointTimestamp" : Timestamp(1599593936, 300),
"electionCandidateMetrics" : {
"lastElectionReason" : "priorityTakeover",
"lastElectionDate" : ISODate("2020-08-11T17:18:08.040Z"),
"electionTerm" : NumberLong(16),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(1597166288, 246),
"t" : NumberLong(15)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1597166288, 246),
"t" : NumberLong(15)
},
"numVotesNeeded" : 2,
"priorityAtElection" : 2,
"electionTimeoutMillis" : NumberLong(10000),
"priorPrimaryMemberId" : 5,
"targetCatchupOpTime" : {
"ts" : Timestamp(1597166288, 394),
"t" : NumberLong(15)
},
"numCatchUpOps" : NumberLong(148),
"newTermStartDate" : ISODate("2020-08-11T17:18:08.074Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2020-08-11T17:18:10.782Z")
},
"members" : [
{
"_id" : 3,
"name" : "1.1.1.1:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 2427845,
"optime" : {
"ts" : Timestamp(1599593959, 1176),
"t" : NumberLong(16)
},
"optimeDate" : ISODate("2020-09-08T19:39:19Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1597166288, 383),
"electionDate" : ISODate("2020-08-11T17:18:08Z"),
"configVersion" : 32,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 5,
"name" : "3.3.3.3:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 3672,
"optime" : {
"ts" : Timestamp(1599593954, 3378),
"t" : NumberLong(16)
},
"optimeDurable" : {
"ts" : Timestamp(1599593954, 3378),
"t" : NumberLong(16)
},
"optimeDate" : ISODate("2020-09-08T19:39:14Z"),
"optimeDurableDate" : ISODate("2020-09-08T19:39:14Z"),
"lastHeartbeat" : ISODate("2020-09-08T19:39:19.238Z"),
"lastHeartbeatRecv" : ISODate("2020-09-08T19:39:20.261Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "1.1.1.1:27017",
"syncSourceHost" : "1.1.1.1:27017",
"syncSourceId" : 3,
"infoMessage" : "",
"configVersion" : 32
},
{
"_id" : 6,
"name" : "2.2.2.2:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 3341,
"optime" : {
"ts" : Timestamp(1599593957, 2190),
"t" : NumberLong(16)
},
"optimeDurable" : {
"ts" : Timestamp(1599593957, 2190),
"t" : NumberLong(16)
},
"optimeDate" : ISODate("2020-09-08T19:39:17Z"),
"optimeDurableDate" : ISODate("2020-09-08T19:39:17Z"),
"lastHeartbeat" : ISODate("2020-09-08T19:39:18.751Z"),
"lastHeartbeatRecv" : ISODate("2020-09-08T19:39:20.078Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "1.1.1.1:27017",
"syncSourceHost" : "1.1.1.1:27017",
"syncSourceId" : 3,
"infoMessage" : "",
"configVersion" : 32
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1599593959, 1329),
"signature" : {
"hash" : BinData(0,"dfdfdggjhkljoj+mvY8="),
"keyId" : NumberLong("897987897897987")
}
},
"operationTime" : Timestamp(1599593959, 1176)
}
Please help me here. Is this something which is normally expected from a mongo replica-set cluster?
Many thanks in advance :)
The server selection algorithm for secondaries in a replica set is described here.
You can play with local threshold and max staleness parameters, e.g. if you increase both sufficiently you should be seeing roughly even load distribution between the secondaries assuming a compliant driver.

Replica Arbiter Set

I need to change my arbiter as AWS retiring my ec2 instance "Retiring: This instance is scheduled for retirement..."
So I need to change my arbiter server, I created a new server, added all the things that exist in that server as I already save AMI for that.
For changing the replica set members, I use the javascript slice method that mentioned in the official MongoDB documents
Set the configuration in the new variable
cfg = rs.conf()
Removing the member from the new variable
cfg.members.splice(2,1) //remove the third member that is not in use and need to change
Then Overwrite the replica set configuration document by using the following command:-
rs.reconfig(cfg)
myreplicaSetName:ARBITER> rs.status()
{
"set" : "myreplicaSetName",
"date" : ISODate("2019-12-10T06:49:35.584Z"),
"myState" : 7,
"term" : NumberLong(3),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
},
"appliedOpTime" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
},
"durableOpTime" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
}
},
"members" : [
{
"_id" : 0,
"name" : "myhostname:port",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 120,
"optime" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
},
"optimeDurable" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2019-12-10T06:49:33Z"),
"optimeDurableDate" : ISODate("2019-12-10T06:49:33Z"),
"lastHeartbeat" : ISODate("2019-12-10T06:49:33.593Z"),
"lastHeartbeatRecv" : ISODate("2019-12-10T06:49:35.267Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1554185040, 1),
"electionDate" : ISODate("2019-04-02T06:04:00Z"),
"configVersion" : 15
},
{
"_id" : 1,
"name" : "my2ndhostname:port",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 120,
"optime" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
},
"optimeDurable" : {
"ts" : Timestamp(1575960573, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2019-12-10T06:49:33Z"),
"optimeDurableDate" : ISODate("2019-12-10T06:49:33Z"),
"lastHeartbeat" : ISODate("2019-12-10T06:49:33.592Z"),
"lastHeartbeatRecv" : ISODate("2019-12-10T06:49:34.289Z"),
"pingMs" : NumberLong(1),
"lastHeartbeatMessage" : "",
"syncingTo" : "myhostname:port",
"syncSourceHost" : "myhostname:port",
"syncSourceId" : 0,
"infoMessage" : "",
"configVersion" : 15
},
{
"_id" : 2,
"name" : "my3rdhostname:port",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 312,
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"maintenanceMode" : 1,
"infoMessage" : "could not find member to sync from",
"configVersion" : 15,
"self" : true,
"lastHeartbeatMessage" : ""
}
],
"ok" : 1
}
Everything goes with the reconfig, firstly it shows OTHER when I added as ARbiter then when I login to mongodb in the arbiter server, it shows ARBITER too, so it means new ARBITER is successfully set. But why its shows in the
"infoMessage" : "could not find member to sync from",

MongoDB replica set not electing new primary after primary is manually stopped

I have a mongoDB 4.0.1 replica set with 4 nodes setup with the following configuration
var cfg = {
"_id": "rs0",
"version": 1,
"members": [
{
"_id": 0,
"host": "mongo-1:27017",
"priority": 3
},
{
"_id": 1,
"host": "mongo-2:27017",
"priority": 2
},
{
"_id": 2,
"host": "mongo-3:27017",
"priority": 1
},
{
"_id": 3,
"host": "mongo-4:27017",
"arbiterOnly": true
}
]
};
rs.initiate(cfg, { force: true });
rs.reconfig(cfg, { force: true });
rs.status();
It's all on local host and connecting is fine and that's ok. I'm basically testing that bringing down the primary mongo-1 (after 10-12 seconds) the remaining nodes elect a new primary ideally mongo-2 since its priority is higher.
But if I take down the container mongo-1, the other 3 nodes just keep trying to connect to mongo-1 indefinitely. I've even waited over 10 minutes but all the logs show are attempted connections host-unreachable blah blah.
If anyone has any ideas on what I might be missing or a config that I'm not setting please feel free to comment or suggest a solution for this problem.
Thanks!
EDIT
Here is my output from rs.status():
rs0:PRIMARY> rs.status()
{
"set" : "rs0",
"date" : ISODate("2018-10-27T00:47:23.582Z"),
"myState" : 1,
"term" : NumberLong(4),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"appliedOpTime" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"durableOpTime" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1540601162, 1),
"members" : [
{
"_id" : 0,
"name" : "mongo-1:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 227,
"optime" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"optimeDate" : ISODate("2018-10-27T00:47:15Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "could not find member to sync from",
"electionTime" : Timestamp(1540601173, 1),
"electionDate" : ISODate("2018-10-27T00:46:13Z"),
"configVersion" : 1,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "mongo-2:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 31,
"optime" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"optimeDurable" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"optimeDate" : ISODate("2018-10-27T00:47:15Z"),
"optimeDurableDate" : ISODate("2018-10-27T00:47:15Z"),
"lastHeartbeat" : ISODate("2018-10-27T00:47:22.085Z"),
"lastHeartbeatRecv" : ISODate("2018-10-27T00:47:22.295Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "mongo-1:27017",
"syncSourceHost" : "mongo-1:27017",
"syncSourceId" : 0,
"infoMessage" : "",
"configVersion" : 1
},
{
"_id" : 2,
"name" : "mongo-3:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 225,
"optime" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"optimeDurable" : {
"ts" : Timestamp(1540601235, 1),
"t" : NumberLong(4)
},
"optimeDate" : ISODate("2018-10-27T00:47:15Z"),
"optimeDurableDate" : ISODate("2018-10-27T00:47:15Z"),
"lastHeartbeat" : ISODate("2018-10-27T00:47:21.677Z"),
"lastHeartbeatRecv" : ISODate("2018-10-27T00:47:22.491Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "mongo-1:27017",
"syncSourceHost" : "mongo-1:27017",
"syncSourceId" : 0,
"infoMessage" : "",
"configVersion" : 1
},
{
"_id" : 3,
"name" : "mongo-4:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 225,
"lastHeartbeat" : ISODate("2018-10-27T00:47:21.657Z"),
"lastHeartbeatRecv" : ISODate("2018-10-27T00:47:21.804Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 1
}
],
"ok" : 1,
"operationTime" : Timestamp(1540601235, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1540601235, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}