How to publish batched messages to a pubsub topic with retrying request - google-cloud-firestore

I've read all this doc : https://cloud.google.com/pubsub/docs/publisher there are 3 examples:
Publishing to topic
Publishing with batch mode
Publishing with retrying requests
I want to combine example 2 and 3 into single so publishing with batch mode works with retrying requests. How can I do this?

The object pubsub_v1.PublisherClient accepts both parameters as input for the construction.
By including the two optional parameters batch_settings and client_config you can configure batch mode with retrying requests.
from google.cloud import pubsub_v1
publisher_client = pubsub_v1.PublisherClient(
# Optional Batch param
batch_settings = pubsub_v1.types.BatchSettings(
max_bytes=1024, # One kilobyte
max_latency=1, # One second
),
# Optional Retrying param
client_config = {
"interfaces": {
"google.pubsub.v1.Publisher": {
"retry_params": {
"messaging": {
'total_timeout_millis': 650000, # default: 600000
}
}
}
}
},
# Optional
client_options = {
"api_endpoint": REGIONAL_ENDPOINT
}
)

Related

node-rdkafka - debug set to all but I only see broker transport failure

I am trying to connect to kafka server. Authentication is based on GSSAPI.
/opt/app-root/src/server/node_modules/node-rdkafka/lib/error.js:411
return new LibrdKafkaError(e);
^
Error: broker transport failure
at Function.createLibrdkafkaError (/opt/app-root/src/server/node_modules/node-rdkafka/lib/error.js:411:10)
at /opt/app-root/src/server/node_modules/node-rdkafka/lib/client.js:350:28
This my test_kafka.js:
const Kafka = require('node-rdkafka');
const kafkaConf = {
'group.id': 'espdev2',
'enable.auto.commit': true,
'metadata.broker.list': 'br01',
'security.protocol': 'SASL_SSL',
'sasl.kerberos.service.name': 'kafka',
'sasl.kerberos.keytab': 'svc_esp_kafka_nonprod.keytab',
'sasl.kerberos.principal': 'svc_esp_kafka_nonprod#INT.LOCAL',
'debug': 'all',
'enable.ssl.certificate.verification': true,
//'ssl.certificate.location': 'some-root-ca.cer',
'ssl.ca.location': 'some-root-ca.cer',
//'ssl.key.location': 'svc_esp_kafka_nonprod.keytab',
};
const topics = 'hello1';
console.log(Kafka.features);
let readStream = new Kafka.KafkaConsumer.createReadStream(kafkaConf, { "auto.offset.reset": "earliest" }, { topics })
readStream.on('data', function (message) {
const messageString = message.value.toString();
console.log(`Consumed message on Stream: ${messageString}`);
});
You can look at this issue for the explanation of this error:
https://github.com/edenhill/librdkafka/issues/1987
Taken from #edenhill:
As a general rule for librdkafka-based clients: given that the cluster and client are correctly configured, all errors can be ignored as they are most likely temporary and librdkafka will attempt to recover automatically. In this specific case; if a group coordinator request fails it will be retried (using any broker in state Up) within 500ms. The current assignment and group membership will not be affected, if a new coordinator is found before the missing heartbeats times out the membership (session.timeout.ms).
Auto offset commits will be stalled until a new coordinator is found. In a future version we'll extend the error type to include a severity, allowing applications to happily ignore non-terminal errors. At this time an application should consider all errors informational, and not terminal.

How to publish a basic message to RabbitMQ exchange using op-rabbit

I've been trying to get a very simple app working to publish messages to a RabbitMQ exchange using the Scala op-rabbit library to no avail.
I'm clearly doing something wrong, but the docs are very limited regarding message publishing.
I can get the actor to connect to RabbitMQ. However, upon publishing a message, it doesn't appear in Rabbit.
Here is the code I'm using to publish the message:
object RmqPublisher extends App {
val actorSystem = ActorSystem("my-actor")
private lazy val config: Config = ConfigFactory.load()
val rabbitControl: ActorRef =
actorSystem.actorOf(Props {
new RabbitControl(
ConnectionParams.fromConfig(config.getConfig("op-rabbit.rabbit"))
)
}
)
rabbitControl ! Message.exchange("Test", "amq.direct", "my_routing_key")
}
Here is my config:
op-rabbit {
topic-exchange-name = amq.direct
channel-dispatcher = "op-rabbit.default-channel-dispatcher"
default-channel-dispatcher {
# Dispatcher is the name of the event-based dispatcher
type = Dispatcher
# What kind of ExecutionService to use
executor = "fork-join-executor"
# Configuration for the fork join pool
fork-join-executor {
# Min number of threads to cap factor-based parallelism number to
parallelism-min = 2
# Parallelism (threads) ... ceil(available processors * factor)
parallelism-factor = 2.0
# Max number of threads to cap factor-based parallelism number to
parallelism-max = 4
}
# Throughput defines the maximum number of messages to be
# processed per actor before the thread jumps to the next actor.
# Set to 1 for as fair as possible.
throughput = 100
}
rabbit {
exchange-name ="amq.direct"
routing-keys = "my_routing_key"
virtual-host = "/"
hosts = ["localhost"]
username = "guest"
password = "guest"
port = 5672
ssl = false
connection-timeout = "5s"
max-tps = 1000
}
}
The logs suggest it is connected successfully as can be seen below:
[INFO] [08/04/2020 21:49:39.219] [my-actor-op-rabbit.default-channel-dispatcher-5] [akka://my-actor/user/$a/connection/$a] akka://my-actor/user/$a/connection/$a connected
[INFO] [08/04/2020 21:49:39.223] [my-actor-akka.actor.default-dispatcher-4] [akka://my-actor/user/$a/connection] akka://my-actor/user/$a/connection connected to amqp://guest#{localhost:5672}:5672//
[INFO] [08/04/2020 21:49:39.230] [my-actor-akka.actor.default-dispatcher-2] [akka://my-actor/user/$a/connection/confirmed-publisher-channel] akka://my-actor/user/$a/connection/confirmed-publisher-channel connected
Any ideas what I'm doing wrong?

Will PubSub forward message to dead letter topic after delivery_attempt exceeds max_delivery_attempts

My subscriber looks like this:
from google.cloud import pubsub_v1
from google.cloud.pubsub_v1.types import DeadLetterPolicy
dead_letter_policy = DeadLetterPolicy(
dead_letter_topic='dead_letter_topic',
max_delivery_attempts=5,
)
topic_path = subscriber.topic_path(PROJECT, TOPIC)
subscriber.create_subscription(sub_path, topic_path, dead_letter_policy=dead_letter_policy)
subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(PROJECT, SUBSCRIPTION)
def callback(message):
print("Received message: {}".format(message))
print('Attempted:', message.delivery_attempt, 'times')
data = message.data.decode('utf-8')
data_d = json.loads(data)
if data_d["name"] == "some_file.json":
message.nack()
else:
message.ack()
The received message looks like this:
Received message: Message {
data: b'{\n "kind": "storage#object",\n "id": "...'
attributes: {
"bucketId": "sample_bucket",
...
}
}
Attempted: 12 times
Clearly it attempted more than 5 times, why I can still pull this message from PubSub topic?
Here is the subscription info:
ackDeadlineSeconds: 10
deadLetterPolicy:
deadLetterTopic: projects/someproject/topics/dead_letter
maxDeliveryAttempts: 5
expirationPolicy:
ttl: 2678400s
messageRetentionDuration: 604800s
name: projects/someproject/subscriptions/new_sub
pushConfig: {}
topic: projects/someproject/topics/pubsub_sample
Typically, this happens if you haven't given Pub/Sub permission to publish to your dead letter topic or subscribe to your subscription. You need to ensure you have run the following:
PUBSUB_SERVICE_ACCOUNT="service-${PROJECT_NUMBER}#gcp-sa-pubsub.iam.gserviceaccount.com"
gcloud pubsub topics add-iam-policy-binding <dead letter topic> \
--member="serviceAccount:${PUBSUB_SERVICE_ACCOUNT}"\
--role='roles/pubsub.publisher'
gcloud pubsub subscriptions add-iam-policy-binding <subscription with dead letter queue> \
--member="serviceAccount:${PUBSUB_SERVICE_ACCOUNT}"\
--role='roles/pubsub.subscriber'
If writing to the dead letter queue topic fails, then Cloud Pub/Sub will continue to deliver the message to your subscriber.

Remove trailing bits from hex pyModBus

I want to built a function that sends a request from ModBus to serial in hex. I more o less have a working function but have two issues.
Issue 1
[b'\x06', b'\x1c', b'\x00!', b'\r', b'\x1e', b'\x1d\xd3', b'\r', b'\n', b'\x1e', b'\x1d']
I cant remove this part b'\r', b'\n', using the .split('\r \n') method since It's not a string.
Issue 2
When getting a value from holding register 40 (33) and i try to use the .to_bytes() method I keep getting b'\x00!', b'\r' and I'm expecting b'\x21'
r = client.read_holding_registers(40)
re = r.registers[0]
req = re.to_bytes(2, 'big')
My functions to generate my request and to send trough pyserial.
def scanned_code():
code = client.read_holding_registers(0)
# code2= client.re
r = code.registers[0]
return r
def send_request(data):
""" Takes input from create_request() and sends data to serial port"""
try:
for i in range(data):
serial_client.write(data[i])
# serial_client.writelines(data[i])
except:
print('no se pudo enviar el paquete <<<--------------------')
def create_request(job):
""" Request type is 33 looks for job
[06]
[1c]
req=33[0d][0a]
job=30925[0d][0a][1e]
[1d]
"""
r = client.read_holding_registers(40)
re = r.registers[0]
req = re.to_bytes(2, 'big')
num = job.to_bytes(2, 'big')
data = [
b'\x06',
b'\x1C',
req,
b'\x0D',
b'\x1E',
num,
b'\x0D',
b'\x0A',
b'\x1E',
b'\x1D'
]
print(data)
while True:
# verify order_trigger() is True.
while order_trigger() != False:
print('inside while loop')
# set flag coil back to 0
reset_trigger()
# get Job no.
job = scanned_code()
# check for JOB No. dif. than 0
if job != 0:
print(scanned_code())
send_request(create_request(job))
# send job request to host to get job data
# send_request()
# if TRUE send job request by serial to DVI client
# get job request data
# translate job request data to modbus
# send data to plc
else:
print(' no scanned code')
break
time.sleep(INTERNAL_SLEEP_TIME)
print('outside loop')
time.sleep(EXTERNAL_SLEEP_TIME)
As an additional question is this the proper way of doing things?

airflow http callback sensor

Our airflow implementation sends out http requests to get services to do tasks. We want those services to let airflow know when they complete their task, so we are sending a callback url to the service which they will call when their task is complete. I can't seem to find a callback sensor, however. How do people handle this normally?
There is no such thing as a callback or webhook sensor in Airflow. The sensor definition follows as taken from the documentation:
Sensors are a certain type of operator that will keep running until a certain criterion is met. Examples include a specific file landing in HDFS or S3, a partition appearing in Hive, or a specific time of the day. Sensors are derived from BaseSensorOperator and run a poke method at a specified poke_interval until it returns True.
This means that a sensor is an operator that performs polling behavior on external systems. In that sense, your external services should have a way of keeping state for each executed task - either internally or externally - so that a polling sensor can check on that state.
This way you can use for example the airflow.operators.HttpSensor that polls an HTTP endpoint until a condition is met. Or even better, write your own custom sensor that gives you the opportunity to do more complex processing and keep state.
Otherwise, if the service outputs data in a storage system you can use a sensor that polls a database for example. I believe you get the idea.
I'm attaching a custom operator example that I've written for integrating with the Apache Livy API. The sensor does two things: a) submits a Spark job through the REST API and b) waits for the job to be completed.
The operator extends the SimpleHttpOperator and at the same time implements the HttpSensor thus combining both functionalities.
class LivyBatchOperator(SimpleHttpOperator):
"""
Submits a new Spark batch job through
the Apache Livy REST API.
"""
template_fields = ('args',)
ui_color = '#f4a460'
#apply_defaults
def __init__(self,
name,
className,
file,
executorMemory='1g',
driverMemory='512m',
driverCores=1,
executorCores=1,
numExecutors=1,
args=[],
conf={},
timeout=120,
http_conn_id='apache_livy',
*arguments, **kwargs):
"""
If xcom_push is True, response of an HTTP request will also
be pushed to an XCom.
"""
super(LivyBatchOperator, self).__init__(
endpoint='batches', *arguments, **kwargs)
self.http_conn_id = http_conn_id
self.method = 'POST'
self.endpoint = 'batches'
self.name = name
self.className = className
self.file = file
self.executorMemory = executorMemory
self.driverMemory = driverMemory
self.driverCores = driverCores
self.executorCores = executorCores
self.numExecutors = numExecutors
self.args = args
self.conf = conf
self.timeout = timeout
self.poke_interval = 10
def execute(self, context):
"""
Executes the task
"""
payload = {
"name": self.name,
"className": self.className,
"executorMemory": self.executorMemory,
"driverMemory": self.driverMemory,
"driverCores": self.driverCores,
"executorCores": self.executorCores,
"numExecutors": self.numExecutors,
"file": self.file,
"args": self.args,
"conf": self.conf
}
print payload
headers = {
'X-Requested-By': 'airflow',
'Content-Type': 'application/json'
}
http = HttpHook(self.method, http_conn_id=self.http_conn_id)
self.log.info("Submitting batch through Apache Livy API")
response = http.run(self.endpoint,
json.dumps(payload),
headers,
self.extra_options)
# parse the JSON response
obj = json.loads(response.content)
# get the new batch Id
self.batch_id = obj['id']
log.info('Batch successfully submitted with Id %s', self.batch_id)
# start polling the batch status
started_at = datetime.utcnow()
while not self.poke(context):
if (datetime.utcnow() - started_at).total_seconds() > self.timeout:
raise AirflowSensorTimeout('Snap. Time is OUT.')
sleep(self.poke_interval)
self.log.info("Batch %s has finished", self.batch_id)
def poke(self, context):
'''
Function that the sensors defined while deriving this class should
override.
'''
http = HttpHook(method='GET', http_conn_id=self.http_conn_id)
self.log.info("Calling Apache Livy API to get batch status")
# call the API endpoint
endpoint = 'batches/' + str(self.batch_id)
response = http.run(endpoint)
# parse the JSON response
obj = json.loads(response.content)
# get the current state of the batch
state = obj['state']
# check the batch state
if (state == 'starting') or (state == 'running'):
# if state is 'starting' or 'running'
# signal a new polling cycle
self.log.info('Batch %s has not finished yet (%s)',
self.batch_id, state)
return False
elif state == 'success':
# if state is 'success' exit
return True
else:
# for all other states
# raise an exception and
# terminate the task
raise AirflowException(
'Batch ' + str(self.batch_id) + ' failed (' + state + ')')
Hope this will help you a bit.