Connecting to mongodb behind AWS NLB sporadic timeouts - mongodb
For better or worse, this is how I have things set up now:
mongo client connects to:
-> AWS Internal Network (TCP) Load balancer
-> Traefik (reverse proxy)
-> mongo (from docker image mongo:4.4.17)
(This setup was a quick hack to get mongodb in place. I've done a lot of reading debugging this issue and learned maybe my setup isn't the best. But I've found nothing to suggest it shouldn't work.)
Issue:
Connections will work until they don't. My 'test' is a simple node script that opens a db connection and execute db.find(), then logs that. It works a few times, then will hang. The mongodb logs suggest the client reaches its timeout and disconnects. Other than the client-side timeout, I see no errors to explain what mongo was doing for 30s.
If I make requests directly to Traefik or to mongo this issue goes away entirely. So it must be related to the request going through the NLB.
Some Logs:
mongodb logs (level 0):
Test script (worked, got valid repsonse and exited cleanly):
{"t":{"$date":"2022-11-08T18:46:15.848+00:00"},"s":"I", "c":"NETWORK", "id":22943, "ctx":"listener","msg":"Connection accepted","attr":{"remote":"10.20.21.43:57206","connectionId":13,"connectionCount":3}}
{"t":{"$date":"2022-11-08T18:46:15.853+00:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn13","msg":"client metadata","attr":{"remote":"10.20.21.43:57206","client":"conn13","doc":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"}}}
{"t":{"$date":"2022-11-08T18:46:15.862+00:00"},"s":"I", "c":"NETWORK", "id":22943, "ctx":"listener","msg":"Connection accepted","attr":{"remote":"10.20.31.176:42852","connectionId":14,"connectionCount":4}}
{"t":{"$date":"2022-11-08T18:46:15.862+00:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn14","msg":"client metadata","attr":{"remote":"10.20.31.176:42852","client":"conn14","doc":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"}}}
{"t":{"$date":"2022-11-08T18:46:15.878+00:00"},"s":"I", "c":"ACCESS", "id":20250, "ctx":"conn14","msg":"Authentication succeeded","attr":{"mechanism":"SCRAM-SHA-256","speculative":true,"principalName":"user","authenticationDatabase":"tracker","remote":"10.20.31.176:42852","extraInfo":{}}}
{"t":{"$date":"2022-11-08T18:46:15.892+00:00"},"s":"I", "c":"NETWORK", "id":22944, "ctx":"conn13","msg":"Connection ended","attr":{"remote":"10.20.21.43:57206","connectionId":13,"connectionCount":3}}
{"t":{"$date":"2022-11-08T18:46:15.894+00:00"},"s":"I", "c":"NETWORK", "id":22944, "ctx":"conn14","msg":"Connection ended","attr":{"remote":"10.20.31.176:42852","connectionId":14,"connectionCount":2}}
Exact same test script, run a few seconds later that timed out after 30 seconds:
{"t":{"$date":"2022-11-08T18:46:37.143+00:00"},"s":"I", "c":"NETWORK", "id":22943, "ctx":"listener","msg":"Connection accepted","attr":{"remote":"10.20.21.43:49370","connectionId":15,"connectionCount":3}}
{"t":{"$date":"2022-11-08T18:46:37.147+00:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn15","msg":"client metadata","attr":{"remote":"10.20.21.43:49370","client":"conn15","doc":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"}}}
{"t":{"$date":"2022-11-08T18:46:43.980+00:00"},"s":"I", "c":"STORAGE", "id":22430, "ctx":"WTCheckpointThread","msg":"WiredTiger message","attr":{"message":"[1667933203:980496][1:0x7fc8a881e700], WT_SESSION.checkpoint: [WT_VERB_CHECKPOINT_PROGRESS] saving checkpoint snapshot min: 8, snapshot max: 8 snapshot count: 0, oldest timestamp: (0, 0) , meta checkpoint timestamp: (0, 0) base write gen: 195129"}}
{"t":{"$date":"2022-11-08T18:46:47.658+00:00"},"s":"I", "c":"NETWORK", "id":22943, "ctx":"listener","msg":"Connection accepted","attr":{"remote":"10.20.31.176:42300","connectionId":16,"connectionCount":4}}
{"t":{"$date":"2022-11-08T18:46:47.658+00:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn16","msg":"client metadata","attr":{"remote":"10.20.31.176:42300","client":"conn16","doc":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"}}}
{"t":{"$date":"2022-11-08T18:47:07.163+00:00"},"s":"I", "c":"-", "id":20883, "ctx":"conn15","msg":"Interrupted operation as its client disconnected","attr":{"opId":2510}}
{"t":{"$date":"2022-11-08T18:47:07.163+00:00"},"s":"I", "c":"NETWORK", "id":22944, "ctx":"conn15","msg":"Connection ended","attr":{"remote":"10.20.21.43:49370","connectionId":15,"connectionCount":3}}
{"t":{"$date":"2022-11-08T18:47:07.163+00:00"},"s":"I", "c":"NETWORK", "id":22944, "ctx":"conn16","msg":"Connection ended","attr":{"remote":"10.20.31.176:42300","connectionId":16,"connectionCount":2}}
Same test script - Log Level 3:
Unsuccessful (timeout at 30s)
{"t":{"$date":"2022-11-08T18:56:29.522+00:00"},"s":"I", "c":"NETWORK", "id":22943, "ctx":"listener","msg":"Connection accepted","attr":{"remote":"10.20.31.176:57422","connectionId":2,"connectionCount":2}}
{"t":{"$date":"2022-11-08T18:56:29.523+00:00"},"s":"D3", "c":"EXECUTOR", "id":22983, "ctx":"listener","msg":"Starting new executor thread in passthrough mode"}
{"t":{"$date":"2022-11-08T18:56:29.526+00:00"},"s":"D2", "c":"COMMAND", "id":21965, "ctx":"conn2","msg":"About to run the command","attr":{"db":"admin","commandArgs":{"ismaster":true,"client":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"},"compression":[],"helloOk":true,"$db":"admin"}}}
{"t":{"$date":"2022-11-08T18:56:29.526+00:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn2","msg":"client metadata","attr":{"remote":"10.20.31.176:57422","client":"conn2","doc":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"}}}
{"t":{"$date":"2022-11-08T18:56:29.526+00:00"},"s":"D3", "c":"NETWORK", "id":22934, "ctx":"conn2","msg":"Starting server-side compression negotiation"}
{"t":{"$date":"2022-11-08T18:56:29.527+00:00"},"s":"D3", "c":"NETWORK", "id":22936, "ctx":"conn2","msg":"No compressors provided"}
{"t":{"$date":"2022-11-08T18:56:29.527+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn2","msg":"Slow query","attr":{"type":"command","ns":"admin.$cmd","command":{"ismaster":true,"client":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"},"compression":[],"helloOk":true,"$db":"admin"},"numYields":0,"reslen":329,"locks":{},"protocol":"op_query","durationMillis":0}}
{"t":{"$date":"2022-11-08T18:56:29.527+00:00"},"s":"D1", "c":"QUERY", "id":22790, "ctx":"conn2","msg":"Received interrupt request for unknown op","attr":{"opId":317}}
{"t":{"$date":"2022-11-08T18:56:29.527+00:00"},"s":"D2", "c":"QUERY", "id":22783, "ctx":"conn2","msg":"Ops known during interrupt","attr":{"ops":[]}}
{"t":{"$date":"2022-11-08T18:56:30.034+00:00"},"s":"D2", "c":"COMMAND", "id":21965, "ctx":"conn2","msg":"About to run the command","attr":{"db":"admin","commandArgs":{"hello":true,"helloOk":true,"maxAwaitTimeMS":10000,"topologyVersion":{"processId":{"$oid":"636aa6433c5de5c55d30c876"},"counter":0},"$db":"admin"}}}
{"t":{"$date":"2022-11-08T18:56:30.034+00:00"},"s":"D3", "c":"FTDC", "id":23904, "ctx":"conn2","msg":"Using maxAwaitTimeMS for awaitable isMaster protocol."}
{"t":{"$date":"2022-11-08T18:56:36.026+00:00"},"s":"D2", "c":"WRITE", "id":21976, "ctx":"abortExpiredTransactions","msg":"Scanning sessions","attr":{"sessionCount":0}}
{"t":{"$date":"2022-11-08T18:56:40.037+00:00"},"s":"I", "c":"NETWORK", "id":22943, "ctx":"listener","msg":"Connection accepted","attr":{"remote":"10.20.31.176:57284","connectionId":3,"connectionCount":3}}
{"t":{"$date":"2022-11-08T18:56:40.037+00:00"},"s":"D3", "c":"EXECUTOR", "id":22983, "ctx":"listener","msg":"Starting new executor thread in passthrough mode"}
{"t":{"$date":"2022-11-08T18:56:40.038+00:00"},"s":"D2", "c":"COMMAND", "id":21965, "ctx":"conn3","msg":"About to run the command","attr":{"db":"admin","commandArgs":{"ismaster":true,"client":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"},"compression":[],"helloOk":true,"$db":"admin"}}}
{"t":{"$date":"2022-11-08T18:56:40.038+00:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn3","msg":"client metadata","attr":{"remote":"10.20.31.176:57284","client":"conn3","doc":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"}}}
{"t":{"$date":"2022-11-08T18:56:40.038+00:00"},"s":"D3", "c":"NETWORK", "id":22934, "ctx":"conn3","msg":"Starting server-side compression negotiation"}
{"t":{"$date":"2022-11-08T18:56:40.038+00:00"},"s":"D3", "c":"NETWORK", "id":22936, "ctx":"conn3","msg":"No compressors provided"}
{"t":{"$date":"2022-11-08T18:56:40.038+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn3","msg":"Slow query","attr":{"type":"command","ns":"admin.$cmd","command":{"ismaster":true,"client":{"driver":{"name":"nodejs","version":"3.7.3"},"os":{"type":"Linux","name":"linux","architecture":"x64","version":"5.10.147-133.644.amzn2.x86_64"},"platform":"'Node.js v19.0.0, LE (unified)"},"compression":[],"helloOk":true,"$db":"admin"},"numYields":0,"reslen":329,"locks":{},"protocol":"op_query","durationMillis":0}}
{"t":{"$date":"2022-11-08T18:56:40.038+00:00"},"s":"D1", "c":"QUERY", "id":22790, "ctx":"conn3","msg":"Received interrupt request for unknown op","attr":{"opId":452}}
{"t":{"$date":"2022-11-08T18:56:40.038+00:00"},"s":"D2", "c":"QUERY", "id":22783, "ctx":"conn3","msg":"Ops known during interrupt","attr":{"ops":[]}}
{"t":{"$date":"2022-11-08T18:56:40.044+00:00"},"s":"D3", "c":"NETWORK", "id":22934, "ctx":"conn2","msg":"Starting server-side compression negotiation"}
{"t":{"$date":"2022-11-08T18:56:40.044+00:00"},"s":"D3", "c":"NETWORK", "id":22935, "ctx":"conn2","msg":"Compression negotiation not requested by client"}
{"t":{"$date":"2022-11-08T18:56:40.044+00:00"},"s":"D3", "c":"FTDC", "id":23905, "ctx":"conn2","msg":"Using exhaust for isMaster or hello protocol"}
{"t":{"$date":"2022-11-08T18:56:40.044+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn2","msg":"Slow query","attr":{"type":"command","ns":"admin.$cmd","command":{"hello":true,"helloOk":true,"maxAwaitTimeMS":10000,"topologyVersion":{"processId":{"$oid":"636aa6433c5de5c55d30c876"},"counter":0},"$db":"admin"},"numYields":0,"reslen":323,"locks":{},"protocol":"op_msg","durationMillis":0}}
{"t":{"$date":"2022-11-08T18:56:40.044+00:00"},"s":"D1", "c":"QUERY", "id":22790, "ctx":"conn2","msg":"Received interrupt request for unknown op","attr":{"opId":326}}
{"t":{"$date":"2022-11-08T18:56:40.044+00:00"},"s":"D2", "c":"QUERY", "id":22783, "ctx":"conn2","msg":"Ops known during interrupt","attr":{"ops":[]}}
{"t":{"$date":"2022-11-08T18:56:40.045+00:00"},"s":"D2", "c":"COMMAND", "id":21965, "ctx":"conn2","msg":"About to run the command","attr":{"db":"admin","commandArgs":{"hello":true,"helloOk":true,"maxAwaitTimeMS":10000,"topologyVersion":{"processId":{"$oid":"636aa6433c5de5c55d30c876"},"counter":0},"$db":"admin"}}}
{"t":{"$date":"2022-11-08T18:56:40.045+00:00"},"s":"D3", "c":"FTDC", "id":23904, "ctx":"conn2","msg":"Using maxAwaitTimeMS for awaitable isMaster protocol."}
{"t":{"$date":"2022-11-08T18:56:50.043+00:00"},"s":"D2", "c":"COMMAND", "id":21965, "ctx":"conn3","msg":"About to run the command","attr":{"db":"admin","commandArgs":{"ismaster":1,"$db":"admin"}}}
{"t":{"$date":"2022-11-08T18:56:50.043+00:00"},"s":"D3", "c":"NETWORK", "id":22934, "ctx":"conn3","msg":"Starting server-side compression negotiation"}
{"t":{"$date":"2022-11-08T18:56:50.043+00:00"},"s":"D3", "c":"NETWORK", "id":22935, "ctx":"conn3","msg":"Compression negotiation not requested by client"}
{"t":{"$date":"2022-11-08T18:56:50.043+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn3","msg":"Slow query","attr":{"type":"command","ns":"admin.$cmd","command":{"ismaster":1,"$db":"admin"},"numYields":0,"reslen":304,"locks":{},"protocol":"op_msg","durationMillis":0}}
{"t":{"$date":"2022-11-08T18:56:50.043+00:00"},"s":"D1", "c":"QUERY", "id":22790, "ctx":"conn3","msg":"Received interrupt request for unknown op","attr":{"opId":579}}
{"t":{"$date":"2022-11-08T18:56:50.043+00:00"},"s":"D2", "c":"QUERY", "id":22783, "ctx":"conn3","msg":"Ops known during interrupt","attr":{"ops":[]}}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"D3", "c":"NETWORK", "id":22934, "ctx":"conn2","msg":"Starting server-side compression negotiation"}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"D3", "c":"NETWORK", "id":22935, "ctx":"conn2","msg":"Compression negotiation not requested by client"}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"D3", "c":"FTDC", "id":23905, "ctx":"conn2","msg":"Using exhaust for isMaster or hello protocol"}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn2","msg":"Slow query","attr":{"type":"command","ns":"admin.$cmd","command":{"hello":true,"helloOk":true,"maxAwaitTimeMS":10000,"topologyVersion":{"processId":{"$oid":"636aa6433c5de5c55d30c876"},"counter":0},"$db":"admin"},"numYields":0,"reslen":323,"locks":{},"protocol":"op_msg","durationMillis":0}}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"D1", "c":"QUERY", "id":22790, "ctx":"conn2","msg":"Received interrupt request for unknown op","attr":{"opId":453}}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"D2", "c":"QUERY", "id":22783, "ctx":"conn2","msg":"Ops known during interrupt","attr":{"ops":[]}}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"D2", "c":"COMMAND", "id":21965, "ctx":"conn2","msg":"About to run the command","attr":{"db":"admin","commandArgs":{"hello":true,"helloOk":true,"maxAwaitTimeMS":10000,"topologyVersion":{"processId":{"$oid":"636aa6433c5de5c55d30c876"},"counter":0},"$db":"admin"}}}
{"t":{"$date":"2022-11-08T18:56:50.055+00:00"},"s":"D3", "c":"FTDC", "id":23904, "ctx":"conn2","msg":"Using maxAwaitTimeMS for awaitable isMaster protocol."}
{"t":{"$date":"2022-11-08T18:56:54.149+00:00"},"s":"D1", "c":"EXECUTOR", "id":23106, "ctx":"AuthorizationManager-0","msg":"Reaping this thread","attr":{"nextThreadRetirementDate":{"$date":"2022-11-08T18:57:24.149Z"}}}
{"t":{"$date":"2022-11-08T18:56:54.149+00:00"},"s":"D1", "c":"EXECUTOR", "id":23105, "ctx":"AuthorizationManager-0","msg":"Shutting down thread","attr":{"threadName":"AuthorizationManager-0","poolName":"AuthorizationManager"}}
{"t":{"$date":"2022-11-08T18:56:59.543+00:00"},"s":"I", "c":"-", "id":20883, "ctx":"conn2","msg":"Interrupted operation as its client disconnected","attr":{"opId":580}}
{"t":{"$date":"2022-11-08T18:56:59.543+00:00"},"s":"D3", "c":"-", "id":4892201, "ctx":"conn2","msg":"Internal assertion","attr":{"error":{"code":279,"codeName":"ClientDisconnect","errmsg":"operation was interrupted"},"location":"{fileName:\"src/mongo/util/interruptible.h\", line:398, functionName:\"operator()\"}"}}
{"t":{"$date":"2022-11-08T18:56:59.543+00:00"},"s":"D2", "c":"NETWORK", "id":22986, "ctx":"conn3","msg":"Session from remote encountered a network error during SourceMessage","attr":{"remote":"10.20.31.176:57284","error":{"code":6,"codeName":"HostUnreachable","errmsg":"Connection closed by peer"}}}
{"t":{"$date":"2022-11-08T18:56:59.543+00:00"},"s":"I", "c":"NETWORK", "id":22944, "ctx":"conn3","msg":"Connection ended","attr":{"remote":"10.20.31.176:57284","connectionId":3,"connectionCount":2}}
{"t":{"$date":"2022-11-08T18:56:59.543+00:00"},"s":"D1", "c":"COMMAND", "id":21962, "ctx":"conn2","msg":"Assertion while executing command","attr":{"command":"hello","db":"admin","commandArgs":{"hello":true,"helloOk":true,"maxAwaitTimeMS":10000,"topologyVersion":{"processId":{"$oid":"636aa6433c5de5c55d30c876"},"counter":0},"$db":"admin"},"error":"ClientDisconnect: operation was interrupted"}}
{"t":{"$date":"2022-11-08T18:56:59.543+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn2","msg":"Slow query","attr":{"type":"command","ns":"admin.$cmd","command":{"hello":true,"helloOk":true,"maxAwaitTimeMS":10000,"topologyVersion":{"processId":{"$oid":"636aa6433c5de5c55d30c876"},"counter":0},"$db":"admin"},"numYields":0,"ok":0,"errMsg":"operation was interrupted","errName":"ClientDisconnect","errCode":279,"reslen":117,"locks":{},"protocol":"op_msg","durationMillis":0}}
Test Script:
const mongodb = require("mongodb");
let url;
//url = 'mongodb://user:pass#10.20.11.28:3204/tracker';
//url = 'mongodb://user:pass#10.20.21.43:3204/tracker';
//url = 'mongodb://user:pass#10.20.31.176:3204/tracker';
url = 'mongodb:/user:pass#ninja-internal-network-1234567.elb.us-west-2.amazonaws.com:3204/tracker';
console.log(url);
dbclient = new mongodb.MongoClient(url, {auto_reconnect:true, ssl: false, poolSize: 16, keepAlive: true, retryWrites:false, useUnifiedTopology: true})
console.log(dbclient);
dbclient.connect(function(err) {
if (err) {
console.error("connection error:", err);
}
db = dbclient.db('tracker')
db.collection('visitors', function(a,b){
b.find({aid:7}).toArray(function(err, items) {
console.log(items.length);
console.log(items[0]);
dbclient.close();
});
});
});
Related
How to resolve Deploying error in heroku using mongodb
MongoServerError: bad auth : Authentication failed. at Connection.onMessage (/Users/harshkataria/Desktop/Tag/node_modules/mongodb/lib/cmap/connection.js:210:30) at MessageStream.<anonymous> (/Users/harshkataria/Desktop/Tag/node_modules/mongodb/lib/cmap/connection.js:63:60) at MessageStream.emit (node:events:527:28) at processIncomingData (/Users/harshkataria/Desktop/Tag/node_modules/mongodb/lib/cmap/message_stream.js:132:20) at MessageStream._write (/Users/harshkataria/Desktop/Tag/node_modules/mongodb/lib/cmap/message_stream.js:33:9) at writeOrBuffer (node:internal/streams/writable:389:12) at _write (node:internal/streams/writable:330:10) at MessageStream.Writable.write (node:internal/streams/writable:334:10) at TLSSocket.ondata (node:internal/streams/readable:754:22) at TLSSocket.emit (node:events:527:28) { ok: 0, code: 8000, codeName: 'AtlasError', [Symbol(errorLabels)]: Set(1) { 'HandshakeError' } } [nodemon] app crashed - waiting for file changes before starting... hello i am facing issue while connecting my app in heroku while using mongo auth
Error: MongoError: bad auth: Authentication failed
You might be trying to tell me to "change my password" or "remove brackets, these <>" I have done both. I have made sure that the password I'm using is correct. Here is the URI: mongodb+srv://user:pass#notlbot.iclv0tm.mongodb.net/?retryWrites=true&w=majority no, my username isnt 'user' and my password also is not 'pass' I don't know what to do from here, though it may be important to note I am trying to connect from a Linodes server. I'm using AWS to host Atlas. at writeOrBuffer (node:internal/streams/writable:389:12) at _write (node:internal/streams/writable:330:10) at MessageStream.Writable.write (node:internal/streams/writable:334:10) at TLSSocket.ondata (node:internal/streams/readable:754:22) at TLSSocket.emit (node:events:527:28) at addChunk (node:internal/streams/readable:315:12) at readableAddChunk (node:internal/streams/readable:289:9) at TLSSocket.Readable.push (node:internal/streams/readable:228:10) at TLSWrap.onStreamRead (node:internal/stream_base_commons:190:23) { ok: 0, code: 8000, codeName: 'AtlasError' }
How to properly configure multicast message redistribution around the Artemis cluster
I'm using Artemis 2.8.0. I've started two standalone servers in symmetric cluster mode and deployed address with type 'multicast' on both of them also I've created couple of predefined queues attached to this address. When I wrote messages to address on first server it successfully wroted to all queues attached to address. After that I connected to second server and created consummer for one of a queues and messages from first server didn't redistribute to second. I can't realize is it expected behaviour or not ? I had tried connect consummer by FQQN too but result was the same. In documentation there isn't any special information about 'multicast' redistribution. my broker.xml looks like <?xml version='1.0'?> <configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xi="http://www.w3.org/2001/XInclude" xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd"> <core xmlns="urn:activemq:core" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq:core "> <name>server1</name> <cluster-user>artemis</cluster-user> <cluster-password>artemis</cluster-password> <persistence-enabled>true</persistence-enabled> <journal-type>ASYNCIO</journal-type> <paging-directory>data/paging</paging-directory> <bindings-directory>data/bindings</bindings-directory> <journal-directory>data/journal</journal-directory> <large-messages-directory>data/large-messages</large-messages-directory> <journal-datasync>true</journal-datasync> <journal-min-files>2</journal-min-files> <journal-pool-files>10</journal-pool-files> <journal-file-size>10M</journal-file-size> <journal-buffer-timeout>20000</journal-buffer-timeout> <journal-max-io>4096</journal-max-io> <disk-scan-period>5000</disk-scan-period> <max-disk-usage>90</max-disk-usage> <critical-analyzer>true</critical-analyzer> <critical-analyzer-timeout>120000</critical-analyzer-timeout> <critical-analyzer-check-period>60000</critical-analyzer-check-period> <critical-analyzer-policy>HALT</critical-analyzer-policy> <acceptors> <acceptor name="artemis">tcp://0.0.0.0:61716?tcpSendBufferSize=1048576;tcpReceiveBufferSize=1048576;protocols=CORE,AMQP,STOMP,HORNETQ,MQTT,OPENWIRE;useEpoll=true;amqpCredits=1000;amqpLowCredits=300</acceptor> <acceptor name="cluster-acceptor">tcp://0.0.0.0:61717</acceptor> </acceptors> <connectors> <connector name="netty-connector">tcp://localhost:61616</connector> <connector name="cluster-connector">tcp://localhost:61617</connector> </connectors> <cluster-connections> <cluster-connection name="k24-artemis-cluster"> <address></address> <connector-ref>netty-connector</connector-ref> <check-period>5000</check-period> <retry-interval>500</retry-interval> <use-duplicate-detection>true</use-duplicate-detection> <message-load-balancing>ON_DEMAND</message-load-balancing> <max-hops>1</max-hops> <static-connectors> <connector-ref>cluster-connector</connector-ref> </static-connectors> </cluster-connection> </cluster-connections> <security-settings> <security-setting match="#"> <permission type="createNonDurableQueue" roles="amq"/> <permission type="deleteNonDurableQueue" roles="amq"/> <permission type="createDurableQueue" roles="amq"/> <permission type="deleteDurableQueue" roles="amq"/> <permission type="createAddress" roles="amq"/> <permission type="deleteAddress" roles="amq"/> <permission type="consume" roles="amq"/> <permission type="browse" roles="amq"/> <permission type="send" roles="amq"/> <!-- we need this otherwise ./artemis data imp wouldn't work --> <permission type="manage" roles="amq"/> </security-setting> </security-settings> <address-settings> <!-- if you define auto-create on certain queues, management has to be auto-create --> <address-setting match="activemq.management#"> <dead-letter-address>DLQ</dead-letter-address> <expiry-address>ExpiryQueue</expiry-address> <redelivery-delay>0</redelivery-delay> <!-- with -1 only the global-max-size is in use for limiting --> <max-size-bytes>-1</max-size-bytes> <message-counter-history-day-limit>10</message-counter-history-day-limit> <address-full-policy>PAGE</address-full-policy> <auto-create-queues>true</auto-create-queues> <auto-create-addresses>true</auto-create-addresses> <auto-create-jms-queues>true</auto-create-jms-queues> <auto-create-jms-topics>true</auto-create-jms-topics> </address-setting> <!--default for catch all--> <address-setting match="#"> <dead-letter-address>DLQ</dead-letter-address> <expiry-address>ExpiryQueue</expiry-address> <redelivery-delay>0</redelivery-delay> <max-size-bytes>-1</max-size-bytes> <message-counter-history-day-limit>10</message-counter-history-day-limit> <address-full-policy>PAGE</address-full-policy> <auto-create-queues>true</auto-create-queues> <auto-create-addresses>true</auto-create-addresses> <auto-create-jms-queues>true</auto-create-jms-queues> <auto-create-jms-topics>true</auto-create-jms-topics> </address-setting> <address-setting match="k24.#"> <redistribution-delay>0</redistribution-delay> <max-delivery-attempts>100</max-delivery-attempts> <redelivery-delay-multiplier>1.5</redelivery-delay-multiplier> <redelivery-delay>5000</redelivery-delay> <max-redelivery-delay>50000</max-redelivery-delay> <send-to-dla-on-no-route>true</send-to-dla-on-no-route> <auto-create-addresses>true</auto-create-addresses> <auto-delete-addresses>true</auto-delete-addresses> <auto-create-queues>true</auto-create-queues> <auto-delete-queues>true</auto-delete-queues> <default-purge-on-no-consumers>false</default-purge-on-no-consumers> <max-size-bytes>104857600</max-size-bytes><!--100 Mb--> <page-size-bytes>20971520</page-size-bytes><!--20 Mb--> <address-full-policy>PAGE</address-full-policy> </address-setting> </address-settings> <addresses> <address name="DLQ"> <anycast> <queue name="DLQ" /> </anycast> </address> <address name="ExpiryQueue"> <anycast> <queue name="ExpiryQueue" /> </anycast> </address> <address name="k24.payment"> <multicast> <queue name="k24.payment.bossbi"> <durable>true</durable> </queue> <queue name="k24.payment.other"> <durable>true</durable> </queue> </multicast> </address> </addresses> </core> </configuration> startup logs on the first server 2019-05-23 11:12:33,188 INFO [org.apache.activemq.artemis.core.server] AMQ221080: Deploying address k24.payment supporting [MULTICAST] 2019-05-23 11:12:33,188 INFO [org.apache.activemq.artemis.core.server] AMQ221003: Deploying MULTICAST queue k24.payment.bossbi on address k24.payment 2019-05-23 11:12:33,188 INFO [org.apache.activemq.artemis.core.server] AMQ221003: Deploying MULTICAST queue k24.payment.other on address k24.payment 2019-05-23 11:12:33,366 INFO [org.apache.activemq.audit.base] AMQ601019: User anonymous is getting mbean info on target resource: org.apache.activemq.artemis.core.management.impl.AcceptorControlImpl#1b45c0e [] 2019-05-23 11:12:33,373 INFO [org.apache.activemq.audit.base] AMQ601019: User anonymous is getting mbean info on target resource: org.apache.activemq.artemis.core.management.impl.AcceptorControlImpl#73a8da0f [] 2019-05-23 11:12:33,391 INFO [org.apache.activemq.artemis.core.server] AMQ221020: Started EPOLL Acceptor at 0.0.0.0:61616 for protocols [CORE,MQTT,AMQP,STOMP,HORNETQ,OPENWIRE] 2019-05-23 11:12:33,400 INFO [org.apache.activemq.artemis.core.server] AMQ221020: Started EPOLL Acceptor at 0.0.0.0:61617 for protocols [CORE,MQTT,AMQP,HORNETQ,STOMP,OPENWIRE] 2019-05-23 11:12:33,403 INFO [org.apache.activemq.artemis.core.server] AMQ221007: Server is now live 2019-05-23 11:12:33,403 INFO [org.apache.activemq.artemis.core.server] AMQ221001: Apache ActiveMQ Artemis Message Broker version 2.8.0 [server0, nodeID=d6998d86-7d25-11e9-88ce-7c76357cb366] 2019-05-23 11:12:33,432 INFO [org.apache.activemq.audit.base] AMQ601267: User anonymous is creating a core session on target resource ActiveMQServerImpl::serverUUID=d6998d86-7d25-11e9-88ce-7c76357cb366 [with parameters: [229b6b91-7d2a-11e9-94a2-106530a6cae3, artemis, ****, 102400, RemotingConnectionImpl [ID=3ded146a, clientID=null, nodeID=d6998d86-7d25-11e9-88ce-7c76357cb366, transportConnection=org.apache.activemq.artemis.core.remoting.impl.netty.NettyServerConnection#3e514577[ID=3ded146a, local= /127.0.0.1:61616, remote=/127.0.0.1:52806]], true, true, true, false, null, org.apache.activemq.artemis.core.protocol.core.impl.CoreSessionCallback#39407896, true, OperationContextImpl [1688453060] [minimalStore=9223372036854775807, storeLineUp=0, stored=0, minimalReplicated=9223372036854775807, replicationLineUp=0, replicated=0, paged=0, minimalPage=9223372036854775807, pageLineUp=0, errorCode=-1, errorMessage=null, executorsPending=0, executor=OrderedExecutor(tasks=[])], {}]] 2019-05-23 11:12:33,461 INFO [org.apache.activemq.audit.base] AMQ601267: User anonymous is creating a core session on target resource ActiveMQServerImpl::serverUUID=d6998d86-7d25-11e9-88ce-7c76357cb366 [with parameters: [22a110e2-7d2a-11e9-94a2-106530a6cae3, artemis, ****, 102400, RemotingConnectionImpl [ID=3ded146a, clientID=null, nodeID=d6998d86-7d25-11e9-88ce-7c76357cb366, transportConnection=org.apache.activemq.artemis.core.remoting.impl.netty.NettyServerConnection#3e514577[ID=3ded146a, local= /127.0.0.1:61616, remote=/127.0.0.1:52806]], true, true, true, false, null, org.apache.activemq.artemis.core.protocol.core.impl.CoreSessionCallback#200d654a, true, OperationContextImpl [964255832] [minimalStore=9223372036854775807, storeLineUp=0, stored=0, minimalReplicated=9223372036854775807, replicationLineUp=0, replicated=0, paged=0, minimalPage=9223372036854775807, pageLineUp=0, errorCode=-1, errorMessage=null, executorsPending=0, executor=OrderedExecutor(tasks=[])], {}]] 2019-05-23 11:12:33,470 INFO [org.apache.activemq.audit.base] AMQ601065: User artemis is creating a queue on target resource: ServerSessionImpl() [with parameters: [Address [name=activemq.notifications, id=0, routingTypes={MULTICAST}, autoCreated=false], notif.22a22253-7d2a-11e9-94a2-106530a6cae3.ActiveMQServerImpl_serverUUID=d6998d86-7d25-11e9-88ce-7c76357cb366, _AMQ_Binding_Type<>2 AND _AMQ_NotifType IN ('SESSION_CREATED','BINDING_ADDED','BINDING_REMOVED','CONSUMER_CREATED','CONSUMER_CLOSED','PROPOSAL','PROPOSAL_RESPONSE','UNPROPOSAL') AND _AMQ_Distance<1 AND (((_AMQ_Address NOT LIKE '$.artemis.internal.sf.%') AND (_AMQ_Address NOT LIKE 'activemq.management%'))) AND (_AMQ_NotifType = 'SESSION_CREATED' OR (_AMQ_Address NOT LIKE 'activemq.notifications%')), true, false, -1, false, false, false, -1, null, false, null, false, 0, -1, false, 0, 0, false]] 2019-05-23 11:12:33,494 INFO [org.apache.activemq.audit.base] AMQ601019: User anonymous is getting mbean info on target resource: org.apache.activemq.artemis.core.management.impl.QueueControlImpl#28cb8c95 [] 2019-05-23 11:12:33,507 INFO [org.apache.activemq.audit.base] AMQ601265: User artemis is creating a core consumer on target resource ServerSessionImpl() [with parameters: [0, notif.22a22253-7d2a-11e9-94a2-106530a6cae3.ActiveMQServerImpl_serverUUID=d6998d86-7d25-11e9-88ce-7c76357cb366, null, 0, false, true, null]] 2019-05-23 11:12:33,542 INFO [org.apache.activemq.artemis.core.server] AMQ221027: Bridge ClusterConnectionBridge#9fe7fc4 [name=$.artemis.internal.sf.k24-artemis-cluster.d8938882-7d25-11e9-9b96-106530a6cae3, queue=QueueImpl[name=$.artemis.internal.sf.k24-artemis-cluster.d8938882-7d25-11e9-9b96-106530a6cae3, postOffice=PostOfficeImpl [server=ActiveMQServerImpl::serverUUID=d6998d86-7d25-11e9-88ce-7c76357cb366], temp=false]#66c0982c targetConnector=ServerLocatorImpl (identity=(Cluster-connection-bridge::ClusterConnectionBridge#9fe7fc4 [name=$.artemis.internal.sf.k24-artemis-cluster.d8938882-7d25-11e9-9b96-106530a6cae3, queue=QueueImpl[name=$.artemis.internal.sf.k24-artemis-cluster.d8938882-7d25-11e9-9b96-106530a6cae3, postOffice=PostOfficeImpl [server=ActiveMQServerImpl::serverUUID=d6998d86-7d25-11e9-88ce-7c76357cb366], temp=false]#66c0982c targetConnector=ServerLocatorImpl [initialConnectors=[TransportConfiguration(name=netty-connector, factory=org-apache-activemq-artemis-core-remoting-impl-netty-NettyConnectorFactory) ?port=61616&host=localhost], discoveryGroupConfiguration=null]]::ClusterConnectionImpl#1676605578[nodeUUID=d6998d86-7d25-11e9-88ce-7c76357cb366, connector=TransportConfiguration(name=netty-connector, factory=org-apache-activemq-artemis-core-remoting-impl-netty-NettyConnectorFactory) ?port=61716&host=localhost, address=, server=ActiveMQServerImpl::serverUUID=d6998d86-7d25-11e9-88ce-7c76357cb366])) [initialConnectors=[TransportConfiguration(name=netty-connector, factory=org-apache-activemq-artemis-core-remoting-impl-netty-NettyConnectorFactory) ?port=61616&host=localhost], discoveryGroupConfiguration=null]] is connected 2019-05-23 11:12:33,548 INFO [org.apache.activemq.audit.message] AMQ601500: User artemis is sending a core message on target resource: ServerSessionImpl() [with parameters: [TransactionImpl [xid=null, txID=378, xid=null, state=ACTIVE, createTime=1558595553462(Thu May 23 11:12:33 SAMT 2019), timeoutSeconds=300, nr operations = 0]#5c8669b1, CoreMessage[messageID=0,durable=false,userID=null,priority=4, timestamp=Thu May 23 11:12:33 SAMT 2019,expiration=0, durable=false, address=activemq.management,size=457,properties=TypedProperties[_AMQ_OperationName=sendQueueInfoToQueue,_AMQ_ResourceName=broker]]#1649442918, true, false, RoutingContextImpl(Address=null, routingType=null, PreviousAddress=null previousRoute:null, reusable=null, version=0) .................................................. ]] 2019-05-23 11:12:33,552 INFO [org.apache.activemq.audit.base] AMQ601263: User artemis is handling a management message on target resource 22a110e2-7d2a-11e9-94a2-106530a6cae3 [with parameters: [TransactionImpl [xid=null, txID=378, xid=null, state=ACTIVE, createTime=1558595553462(Thu May 23 11:12:33 SAMT 2019), timeoutSeconds=300, nr operations = 0]#5c8669b1, CoreMessage[messageID=385,durable=false,userID=null,priority=4, timestamp=Thu May 23 11:12:33 SAMT 2019,expiration=0, durable=false, address=activemq.management,size=457,properties=TypedProperties[_AMQ_OperationName=sendQueueInfoToQueue,_AMQ_ResourceName=broker]]#1649442918, true]] And on the second 2019-05-23 11:12:27,556 INFO [org.apache.activemq.artemis.core.server] AMQ221080: Deploying address k24.payment supporting [MULTICAST] 2019-05-23 11:12:27,557 INFO [org.apache.activemq.artemis.core.server] AMQ221003: Deploying MULTICAST queue k24.payment.bossbi on address k24.payment 2019-05-23 11:12:27,557 INFO [org.apache.activemq.artemis.core.server] AMQ221003: Deploying MULTICAST queue k24.payment.other on address k24.payment 2019-05-23 11:12:27,734 INFO [org.apache.activemq.audit.base] AMQ601019: User anonymous is getting mbean info on target resource: org.apache.activemq.artemis.core.management.impl.AcceptorControlImpl#75d2da2d [] 2019-05-23 11:12:27,736 INFO [org.apache.activemq.audit.base] AMQ601019: User anonymous is getting mbean info on target resource: org.apache.activemq.artemis.core.management.impl.AcceptorControlImpl#3370f42 [] 2019-05-23 11:12:27,753 INFO [org.apache.activemq.artemis.core.server] AMQ221020: Started EPOLL Acceptor at 0.0.0.0:61716 for protocols [CORE,MQTT,AMQP,STOMP,HORNETQ,OPENWIRE] 2019-05-23 11:12:27,762 INFO [org.apache.activemq.artemis.core.server] AMQ221020: Started EPOLL Acceptor at 0.0.0.0:61717 for protocols [CORE,MQTT,AMQP,HORNETQ,STOMP,OPENWIRE] 2019-05-23 11:12:27,766 INFO [org.apache.activemq.artemis.core.server] AMQ221007: Server is now live 2019-05-23 11:12:27,766 INFO [org.apache.activemq.artemis.core.server] AMQ221001: Apache ActiveMQ Artemis Message Broker version 2.8.0 [server1, nodeID=d8938882-7d25-11e9-9b96-106530a6cae3] 2019-05-23 11:12:27,993 INFO [org.apache.activemq.hawtio.branding.PluginContextListener] Initialized activemq-branding plugin 2019-05-23 11:12:28,044 INFO [org.apache.activemq.hawtio.plugin.PluginContextListener] Initialized artemis-plugin plugin 2019-05-23 11:12:28,390 INFO [io.hawt.HawtioContextListener] Initialising hawtio services 2019-05-23 11:12:28,401 INFO [io.hawt.system.ConfigManager] Configuration will be discovered via system properties 2019-05-23 11:12:28,402 INFO [io.hawt.jmx.JmxTreeWatcher] Welcome to hawtio 1.5.5 : http://hawt.io/ : Don't cha wish your console was hawt like me? ;-) 2019-05-23 11:12:28,404 INFO [io.hawt.jmx.UploadManager] Using file upload directory: /home/dmitry/work/tools/apache-artemis-2.6.2/broker1/tmp/uploads 2019-05-23 11:12:28,414 INFO [io.hawt.web.AuthenticationFilter] Starting hawtio authentication filter, JAAS realm: "activemq" authorized role(s): "amq" role principal classes: "org.apache.activemq.artemis.spi.core.security.jaas.RolePrincipal" 2019-05-23 11:12:28,441 INFO [io.hawt.web.JolokiaConfiguredAgentServlet] Jolokia overridden property: [key=policyLocation, value=file:/home/dmitry/work/tools/apache-artemis-2.6.2/broker1/etc/jolokia-access.xml] 2019-05-23 11:12:28,461 INFO [io.hawt.web.RBACMBeanInvoker] Using MBean [hawtio:type=security,area=jmx,rank=0,name=HawtioDummyJMXSecurity] for role based access control 2019-05-23 11:12:28,552 INFO [io.hawt.system.ProxyWhitelist] Initial proxy whitelist: [localhost, 127.0.0.1, 10.255.100.22, 10.255.100.32, bdk-laptop.lan.itecos.com] 2019-05-23 11:12:28,761 INFO [org.apache.activemq.artemis] AMQ241001: HTTP Server started at http://localhost:8261 2019-05-23 11:12:28,761 INFO [org.apache.activemq.artemis] AMQ241002: Artemis Jolokia REST API available at http://localhost:8261/console/jolokia 2019-05-23 11:12:28,762 INFO [org.apache.activemq.artemis] AMQ241004: Artemis Console available at http://localhost:8261/console 2019-05-23 11:12:33,417 INFO [org.apache.activemq.audit.base] AMQ601267: User anonymous is creating a core session on target resource ActiveMQServerImpl::serverUUID=d8938882-7d25-11e9-9b96-106530a6cae3 [with parameters: [2298850f-7d2a-11e9-bf72-106530a6cae3, artemis, ****, 102400, RemotingConnectionImpl [ID=05d224cf, clientID=null, nodeID=d8938882-7d25-11e9-9b96-106530a6cae3, transportConnection=org.apache.activemq.artemis.core.remoting.impl.netty.NettyServerConnection#27357d98[ID=05d224cf, local= /127.0.0.1:61716, remote=/127.0.0.1:57184]], true, true, true, false, null, org.apache.activemq.artemis.core.protocol.core.impl.CoreSessionCallback#1c98f1c2, true, OperationContextImpl [92544794] [minimalStore=9223372036854775807, storeLineUp=0, stored=0, minimalReplicated=9223372036854775807, replicationLineUp=0, replicated=0, paged=0, minimalPage=9223372036854775807, pageLineUp=0, errorCode=-1, errorMessage=null, executorsPending=0, executor=OrderedExecutor(tasks=[])], {}]] 2019-05-23 11:12:33,450 INFO [org.apache.activemq.audit.base] AMQ601267: User anonymous is creating a core session on target resource ActiveMQServerImpl::serverUUID=d8938882-7d25-11e9-9b96-106530a6cae3 [with parameters: [229f89f0-7d2a-11e9-bf72-106530a6cae3, artemis, ****, 102400, RemotingConnectionImpl [ID=05d224cf, clientID=null, nodeID=d8938882-7d25-11e9-9b96-106530a6cae3, transportConnection=org.apache.activemq.artemis.core.remoting.impl.netty.NettyServerConnection#27357d98[ID=05d224cf, local= /127.0.0.1:61716, remote=/127.0.0.1:57184]], true, true, true, false, null, org.apache.activemq.artemis.core.protocol.core.impl.CoreSessionCallback#6e34b6c6, true, OperationContextImpl [191090876] [minimalStore=9223372036854775807, storeLineUp=0, stored=0, minimalReplicated=9223372036854775807, replicationLineUp=0, replicated=0, paged=0, minimalPage=9223372036854775807, pageLineUp=0, errorCode=-1, errorMessage=null, executorsPending=0, executor=OrderedExecutor(tasks=[])], {}]] 2019-05-23 11:12:33,489 INFO [org.apache.activemq.audit.base] AMQ601065: User artemis is creating a queue on target resource: ServerSessionImpl() [with parameters: [Address [name=activemq.notifications, id=0, routingTypes={MULTICAST}, autoCreated=false], notif.22a4ba11-7d2a-11e9-bf72-106530a6cae3.ActiveMQServerImpl_serverUUID=d8938882-7d25-11e9-9b96-106530a6cae3, _AMQ_Binding_Type<>2 AND _AMQ_NotifType IN ('SESSION_CREATED','BINDING_ADDED','BINDING_REMOVED','CONSUMER_CREATED','CONSUMER_CLOSED','PROPOSAL','PROPOSAL_RESPONSE','UNPROPOSAL') AND _AMQ_Distance<1 AND (((_AMQ_Address NOT LIKE '$.artemis.internal.sf.%') AND (_AMQ_Address NOT LIKE 'activemq.management%'))) AND (_AMQ_NotifType = 'SESSION_CREATED' OR (_AMQ_Address NOT LIKE 'activemq.notifications%')), true, false, -1, false, false, false, -1, null, false, null, false, 0, -1, false, 0, 0, false]] 2019-05-23 11:12:33,537 INFO [org.apache.activemq.audit.base] AMQ601019: User anonymous is getting mbean info on target resource: org.apache.activemq.artemis.core.management.impl.QueueControlImpl#7865d90f [] 2019-05-23 11:12:33,547 INFO [org.apache.activemq.audit.base] AMQ601265: User artemis is creating a core consumer on target resource ServerSessionImpl() [with parameters: [0, notif.22a4ba11-7d2a-11e9-bf72-106530a6cae3.ActiveMQServerImpl_serverUUID=d8938882-7d25-11e9-9b96-106530a6cae3, null, 0, false, true, null]] 2019-05-23 11:12:33,582 INFO [org.apache.activemq.artemis.core.server] AMQ221027: Bridge ClusterConnectionBridge#5c52e5b7 [name=$.artemis.internal.sf.k24-artemis-cluster.d6998d86-7d25-11e9-88ce-7c76357cb366, queue=QueueImpl[name=$.artemis.internal.sf.k24-artemis-cluster.d6998d86-7d25-11e9-88ce-7c76357cb366, postOffice=PostOfficeImpl [server=ActiveMQServerImpl::serverUUID=d8938882-7d25-11e9-9b96-106530a6cae3], temp=false]#3bb7d355 targetConnector=ServerLocatorImpl (identity=(Cluster-connection-bridge::ClusterConnectionBridge#5c52e5b7 [name=$.artemis.internal.sf.k24-artemis-cluster.d6998d86-7d25-11e9-88ce-7c76357cb366, queue=QueueImpl[name=$.artemis.internal.sf.k24-artemis-cluster.d6998d86-7d25-11e9-88ce-7c76357cb366, postOffice=PostOfficeImpl [server=ActiveMQServerImpl::serverUUID=d8938882-7d25-11e9-9b96-106530a6cae3], temp=false]#3bb7d355 targetConnector=ServerLocatorImpl [initialConnectors=[TransportConfiguration(name=netty-connector, factory=org-apache-activemq-artemis-core-remoting-impl-netty-NettyConnectorFactory) ?port=61716&host=localhost], discoveryGroupConfiguration=null]]::ClusterConnectionImpl#1052212904[nodeUUID=d8938882-7d25-11e9-9b96-106530a6cae3, connector=TransportConfiguration(name=netty-connector, factory=org-apache-activemq-artemis-core-remoting-impl-netty-NettyConnectorFactory) ?port=61616&host=localhost, address=, server=ActiveMQServerImpl::serverUUID=d8938882-7d25-11e9-9b96-106530a6cae3])) [initialConnectors=[TransportConfiguration(name=netty-connector, factory=org-apache-activemq-artemis-core-remoting-impl-netty-NettyConnectorFactory) ?port=61716&host=localhost], discoveryGroupConfiguration=null]] is connected 2019-05-23 11:12:33,589 INFO [org.apache.activemq.audit.message] AMQ601500: User artemis is sending a core message on target resource: ServerSessionImpl() [with parameters: [TransactionImpl [xid=null, txID=97, xid=null, state=ACTIVE, createTime=1558595553475(Thu May 23 11:12:33 SAMT 2019), timeoutSeconds=300, nr operations = 0]#d735c32, CoreMessage[messageID=0,durable=false,userID=null,priority=4, timestamp=Thu May 23 11:12:33 SAMT 2019,expiration=0, durable=false, address=activemq.management,size=457,properties=TypedProperties[_AMQ_OperationName=sendQueueInfoToQueue,_AMQ_ResourceName=broker]]#24312193, true, false, RoutingContextImpl(Address=null, routingType=null, PreviousAddress=null previousRoute:null, reusable=null, version=0) .................................................. ]] 2019-05-23 11:12:33,592 INFO [org.apache.activemq.audit.base] AMQ601263: User artemis is handling a management message on target resource 229f89f0-7d2a-11e9-bf72-106530a6cae3 [with parameters: [TransactionImpl [xid=null, txID=97, xid=null, state=ACTIVE, createTime=1558595553475(Thu May 23 11:12:33 SAMT 2019), timeoutSeconds=300, nr operations = 0]#d735c32, CoreMessage[messageID=104,durable=false,userID=null,priority=4, timestamp=Thu May 23 11:12:33 SAMT 2019,expiration=0, durable=false, address=activemq.management,size=457,properties=TypedProperties[_AMQ_OperationName=sendQueueInfoToQueue,_AMQ_ResourceName=broker]]#24312193, true]] My client code is written on scala. However I think it should be clear. import java.util import org.apache.activemq.artemis.api.core.TransportConfiguration import org.apache.activemq.artemis.api.core.client.loadbalance.RoundRobinConnectionLoadBalancingPolicy import org.apache.activemq.artemis.api.core.client._ import org.apache.activemq.artemis.core.remoting.impl.netty.{NettyConnectorFactory, TransportConstants} import org.slf4j.LoggerFactory import org.testng.Assert import org.testng.annotations.Test import scala.collection.JavaConverters._ class MulticastClusterMsgRedistributionTest { private val log = LoggerFactory.getLogger(getClass) private val ADDRESS_NAME = "k24.payment" private val QUEUE_NAME1 = "k24.payment.bossbi" private val QUEUE_NAME2 = "k24.payment.other" #Test def write(): Unit = { val sessionFactory = createSessionFactory(61616) val session = sessionFactory.createSession(false, false, false) val producer = session.createProducer(ADDRESS_NAME) session.start() 1.to(50).foreach{ i => val msg = session.createMessage(true) msg.writeBodyBufferString(s"msg $i") producer.send(msg) session.commit() } session.close() sessionFactory.close() } #Test def read(): Unit = { val sessionFactory = createSessionFactory(61716) val session = sessionFactory.createSession(false, false, false) val consumer = session.createConsumer(s"$ADDRESS_NAME::$QUEUE_NAME1") session.start() 1.to(50).foreach{ i => val msg = consumer.receive(2000) Assert.assertNotNull(msg) val body = msg.getBodyBuffer.readString() Assert.assertEquals(body, s"msg $i") log.info(s"-> $body") msg.acknowledge() session.commit() } } private def createSessionFactory(targetServerPort: Int): ClientSessionFactory = { val server = transportConfiguration(targetServerPort) val loc = ActiveMQClient.createServerLocatorWithHA(Seq(server): _*) loc.setUseTopologyForLoadBalancing(true) loc.setConnectionLoadBalancingPolicyClassName(classOf[RoundRobinConnectionLoadBalancingPolicy].getName) loc.addClusterTopologyListener(topologyListener(loc)) val sessionFactory = loc.createSessionFactory() log.info(s"Session factory attached to ${sessionFactory.getConnection.getRemoteAddress}") sessionFactory } private def transportConfiguration(port: Integer): TransportConfiguration = { val srvParams = new util.HashMap[String, Object]() srvParams.put(TransportConstants.HOST_PROP_NAME, "localhost") srvParams.put(TransportConstants.PORT_PROP_NAME, port) new TransportConfiguration(classOf[NettyConnectorFactory].getName, srvParams, s"srv_localhost_$port") } private def topologyListener(loc: ServerLocator) = { new ClusterTopologyListener { override def nodeUP(m: TopologyMember, last: Boolean): Unit = { log.info(s"\t artemis member up = ${memberState(m)}") log.info(s"cluster state:\n ${loc.getTopology.getMembers.asScala.map(memberState).mkString("\n")}") } override def nodeDown(eventUID: Long, nodeID: String): Unit = { log.info(s"\t artemis member down = $nodeID") log.info(s"cluster state:\n ${loc.getTopology.getMembers.asScala.map(memberState).mkString("\n")}") } } } private def memberState(m: TopologyMember): String = { val live = Option(m.getLive) val backup = Option(m.getBackup) val backupState = backup.map{ b => s"${b.getParams.get(TransportConstants.HOST_PROP_NAME)}:${b.getParams.get(TransportConstants.PORT_PROP_NAME)}" } val liveState = live.map{ l => s"${l.getParams.get(TransportConstants.HOST_PROP_NAME)}:${l.getParams.get(TransportConstants.PORT_PROP_NAME)}" } s"$liveState backup to $backupState" } } I suppose Artemis should redistribute messages from all queues attached to multicast address from first server to second when there are consummers on second server.
Curator Leader election give connection refused error
I implemented curator leader election example which is given in this site Instead of having number of curator clients I added only one curator client as follows public void selectLeader() { CuratorFramework client = null; try { client = CuratorFrameworkFactory.newClient("localhost:2181", new ExponentialBackoffRetry(1000, 3)); LeaderSelectorService service = new LeaderSelectorService(client, "/leaderSelections", "LeaderElector"); client.start(); Thread.sleep(10000); service.start(); } catch (Exception e) { System.out.println("error"+e); }finally { System.out.println("Shutting down..."); // CloseableUtils.closeQuietly(client); } } public class LeaderSelectorService extends LeaderSelectorListenerAdapter implements Closeable { private final String name; private final LeaderSelector leaderSelector; public LeaderSelectorService(CuratorFramework client, String path, String name) { this.name = name; // create a leader selector using the given path for management // all participants in a given leader selection must use the same path // ExampleClient here is also a LeaderSelectorListener but this isn't required leaderSelector = new LeaderSelector(client, path, this); // for most cases you will want your instance to requeue when it relinquishes leadership leaderSelector.autoRequeue(); } public void start() throws IOException { // the selection for this instance doesn't start until the leader selector is started // leader selection is done in the background so this call to leaderSelector.start() returns immediately leaderSelector.start(); } #Override public void takeLeadership(CuratorFramework arg0) throws Exception { // we are now the leader. This method should not return until we want to relinquish leadership final int waitSeconds = (int)(5 * Math.random()) + 1; System.out.println(name + " is now the leader. Waiting " + waitSeconds + " seconds..."); //System.out.println(name + " has been leader " + leaderCount.getAndIncrement() + " time(s) before."); try { Thread.sleep(TimeUnit.SECONDS.toMillis(waitSeconds)); } catch ( InterruptedException e ) { System.err.println(name + " was interrupted."); Thread.currentThread().interrupt(); } finally { System.out.println(name + " relinquishing leadership.\n"); } } #Override public void close() throws IOException { leaderSelector.close(); } } I have only one zookeeper instance and I am using Zookeeper 3.4.6, curator-framework 4.0.0 and curator-recipes 4.0.0. when I start the client, it connects to zookeeper and in the log I can see "State change : connected" message. Then I wait 10s and start leader election which gives me below error repeatedly. 2017-09-06 09:34:22.727 INFO 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Unable to read additional data from server sessionid 0x15e555a719d0000, likely server has closed socket, closing socket connection and attempting reconnect 2017-09-06 09:34:22.830 INFO 1228 --- [c-1-EventThread] o.a.c.f.state.ConnectionStateManager : State change: SUSPENDED 2017-09-06 09:34:23.302 INFO 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error) 2017-09-06 09:34:23.303 INFO 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Socket connection established, initiating session, client: /127.0.0.1:49594, server: localhost/127.0.0.1:2181 2017-09-06 09:34:23.305 INFO 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x15e555a719d0000, negotiated timeout = 120000 2017-09-06 09:34:23.305 INFO 1228 --- [c-1-EventThread] o.a.c.f.state.ConnectionStateManager : State change: RECONNECTED 2017-09-06 09:34:23.310 WARN 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Session 0x15e555a719d0000 for server localhost/127.0.0.1:2181, unexpected error, closing socket connection and attempting reconnect java.io.IOException: Connection reset by peer at sun.nio.ch.FileDispatcherImpl.read0(Native Method) ~[na:1.8.0_131] at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) ~[na:1.8.0_131] at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) ~[na:1.8.0_131] at sun.nio.ch.IOUtil.read(IOUtil.java:192) ~[na:1.8.0_131] at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) ~[na:1.8.0_131] at org.apache.zookeeper.ClientCnxnSocketNIO.doIO(ClientCnxnSocketNIO.java:75) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] at org.apache.zookeeper.ClientCnxnSocketNIO.doTransport(ClientCnxnSocketNIO.java:363) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:1214) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] after some time it started to give me below error message. org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss at org.apache.zookeeper.KeeperException.create(KeeperException.java:99) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] at org.apache.curator.framework.imps.CuratorFrameworkImpl.checkBackgroundRetry(CuratorFrameworkImpl.java:831) [curator-framework-4.0.0.jar:4.0.0] at org.apache.curator.framework.imps.CuratorFrameworkImpl.processBackgroundOperation(CuratorFrameworkImpl.java:623) [curator-framework-4.0.0.jar:4.0.0] at org.apache.curator.framework.imps.WatcherRemovalFacade.processBackgroundOperation(WatcherRemovalFacade.java:152) [curator-framework-4.0.0.jar:4.0.0] at org.apache.curator.framework.imps.GetConfigBuilderImpl$2.processResult(GetConfigBuilderImpl.java:222) [curator-framework-4.0.0.jar:4.0.0] at org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:590) [zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:499) [zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] 2017-09-06 09:34:31.897 INFO 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error) 2017-09-06 09:34:31.898 INFO 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Socket connection established, initiating session, client: /127.0.0.1:49611, server: localhost/127.0.0.1:2181 2017-09-06 09:34:31.899 INFO 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x15e555a719d0000, negotiated timeout = 120000 2017-09-06 09:34:31.899 INFO 1228 --- [c-1-EventThread] o.a.c.f.state.ConnectionStateManager : State change: RECONNECTED 2017-09-06 09:34:31.907 WARN 1228 --- [localhost:2181)] org.apache.zookeeper.ClientCnxn : Session 0x15e555a719d0000 for server localhost/127.0.0.1:2181, unexpected error, closing socket connection and attempting reconnect java.io.IOException: Xid out of order. Got Xid 41 with err -6 expected Xid 40 for a packet with details: clientPath:/leaderSelections serverPath:/leaderSelections finished:false header:: 40,12 replyHeader:: 0,0,-4 request:: '/leaderSelections,F response:: v{} at org.apache.zookeeper.ClientCnxn$SendThread.readResponse(ClientCnxn.java:892) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] at org.apache.zookeeper.ClientCnxnSocketNIO.doIO(ClientCnxnSocketNIO.java:101) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] at org.apache.zookeeper.ClientCnxnSocketNIO.doTransport(ClientCnxnSocketNIO.java:363) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:1214) ~[zookeeper-3.5.3-beta.jar:3.5.3-beta-8ce24f9e675cbefffb8f21a47e06b42864475a60] I tried several solution in the internet but non got succeeded. Does anybody know the root cause of this issue.
I have fixed this issue. There was version number mismatch between zookeeper version and curator version. I used curator version 4.0.0 with zookeeper 3.4.6. According to apache curator site Curator 4.0.0 - compatible with ZooKeeper 3.5.x. I changed my curator version to 2.8.0
Error: connection timeout - expressjs mongodb mongoose
I'm new to express/node and web programming in general. What is the best way to deal with this error when mongoose's mongodb connection times out, this is how I connect: mongoose.connect(config.mongoUrl); const db = mongoose.connection; db.on('error', console.error.bind(console, 'error connecting with mongodb database:')); db.once('open', function() { console.log('connected to mongodb database'); }); This is the error when it times out while my server is running: error connecting with mongodb database: Error: connection timeout at Db. (C:\Users\Sean\OneDrive\webpages\000\lasttry\node_modules\mongoose\lib\drivers\node-mongodb-native\connection.js:169:17) at emitTwo (events.js:106:13) at Db.emit (events.js:191:7) at Server.listener (C:\Users\Sean\OneDrive\webpages\000\lasttry\node_modules\mongodb\lib\db.js:1798:14) at emitOne (events.js:96:13) at Server.emit (events.js:188:7) at Server. (C:\Users\Sean\OneDrive\webpages\000\lasttry\node_modules\mongodb\lib\server.js:274:14) at emitOne (events.js:96:13) at Server.emit (events.js:188:7) at Pool. (C:\Users\Sean\OneDrive\webpages\000\lasttry\node_modules\mongodb-core\lib\topologies\server.js:335:12) at emitOne (events.js:96:13) at Pool.emit (events.js:188:7) at Connection. (C:\Users\Sean\OneDrive\webpages\000\lasttry\node_modules\mongodb-core\lib\connection\pool.js:270:12) at Connection.g (events.js:291:16) at emitTwo (events.js:106:13) at Connection.emit (events.js:191:7)
How about on disconnect just reconnect to mongo. See below: mongoose.connect(config.mongoUrl); var db = mongoose.connection; db.on('error', console.error.bind(console, 'error connecting with mongodb database:')); db.once('open', function() { console.log('connected to mongodb database'); }); db.on('disconnected', function () { //Reconnect on timeout mongoose.connect(config.mongoUrl); db = mongoose.connection; }); You can also set a timeout value on the connection. mongoose.connect(url, { server: { socketOptions: { connectTimeoutMS: 1000 }}}, function(err) { ... }); Also, make sure that mongo is still running on your machine. A connection timeout could mean mongo isn't running. Reference: Another stack overflow question
Check mongod is running. typing mongo in shell. Add connectTimeoutMS=300000 paramter for you uri. uri looks like mongodb://localhost/collectionName?connectTimeoutMS=300000