I have a script to test for at least one consume
the producer
import json
import random
import time
from confluent_kafka import Producer
import config
p = Producer({'bootstrap.servers':','.join(config.KAFKA_HOST),})
total_count = 0
c = 0
try:
for i in range(20000):
num = random.randint(1, 1000000)
total_count += num
a = {'t': num, 'time': time.time()}
p.produce('test-topic-vv', json.dumps(a))
c += 1
if c %100 == 0:
p.flush()
finally:
p.flush()
the consumer
import json
import random
import sys
from confluent_kafka import Consumer, TopicPartition
import config
c = Consumer({
'bootstrap.servers':','.join(config.KAFKA_HOST),
'group.id': 'test-topic-consumer-group',
'auto.offset.reset': 'earliest',
'enable.auto.offset.store': False,
'enable.auto.commit': True,
})
topic = 'test-topic-vv'
def test_for_seek():
try:
pp = []
pp.append(TopicPartition(topic, partition=8))
c.assign(pp)
while True:
msgs = c.consume(num_messages=10, timeout=10)
if not msgs:
print('no data and wait')
for i in c.assignment():
print(i.topic, i.partition, i.offset, c.get_watermark_offsets(i))
continue
for msg in msgs:
t1 = msg.partition()
o1 = msg.offset()
print('Received message: {} par {} offset {}'.format(msg.value().decode('utf-8'), t1, o1))
break
finally:
c.close()
def test_for_run():
try:
c.subscribe([topic])
total_count = 0
map_par = {}
while True:
msgs = c.consume(num_messages=10, timeout=5)
if not msgs:
print('no data and wait')
for i in c.assignment():
print(i.topic, i.partition, i.offset, c.get_watermark_offsets(i))
continue
deald = []
for msg in msgs:
t1 = msg.partition()
o1 = msg.offset()
print('Received message: {} par {} offset {}'.format(msg.value().decode('utf-8'), t1, o1))
if random.randint(1, 100) == 9:
# test for deal failed then retry again
print('deal failed will retry msg offset {} partition {}'.format(msg.offset(), msg.partition()))
break
else:
total_count += json.loads(msg.value())['t']
# test for deal success
if t1 in map_par:
if map_par[t1] + 1 != o1:
raise Exception('deal partition {} except last offset {} current offset {}'.format(t1, map_par[t1], o1))
map_par[t1] = o1
c.store_offsets(msg)
deald.append(msg)
group_partition = {}
for msg in msgs:
if msg in deald:
continue
partition = msg.partition()
offset = msg.offset()
if partition in group_partition:
group_partition[partition] = min(group_partition[partition], offset)
else:
group_partition[partition] = offset
# seek to deal failed partition offset
for k, v in group_partition.items():
c.seek(TopicPartition(topic, partition=k, offset=v))
print('deal failed will set msg offset {} partition {}'.format(v, k))
finally:
c.close()
if sys.argv[1] == 'test_for_seek':
test_for_seek()
else:
test_for_run()
the topic test-topic-vv has 9 partition
first i run producer to add some message to topic then consume it. but i got a exception
screenshot https://user-images.githubusercontent.com/12459874/194990350-8cd13128-f3fa-4a86-a93e-771af45f93f0.png
The latest message's offset of partition 8 should be 7382 but got 7391
then i run test_for_seek to check the consumer group's actually record offset it was 7382 indeed
screenshot https://user-images.githubusercontent.com/12459874/194990593-9b8431d0-ce07-4122-800d-f9b3c129f5f3.png
I also check the broker's group offset record
screenshot https://user-images.githubusercontent.com/12459874/194990684-9d8ad773-a569-4cee-9d4c-0a898e8f8922.png
it also was 7382
So what happened to consumer when use seek to manage offset, hope any one can help me to deal with the problem.
check information
confluent_kafka.version()==1.9.2
confluent_kafka.libversion()==1.9.2
Operating system: ubuntu 16.04
Python3.8
kafka 2.11-1.1.1
Related
I have created an approximate time synchronizer to sync stereo camera images are GPS. The time synchronizer does not enter the callback. I have already tried playing around with slop and queue_size parameters.
class sorghum_gps:
def __init__(self,arg1):
self.bridge = CvBridge()
self.image_sub0 = message_filters.Subscriber("/cam0/image_raw",Image)
self.image_sub1 = message_filters.Subscriber("/cam1/image_raw",Image)
self.gps_sub = message_filters.Subscriber("/fix",NavSatFix)
self.ts = message_filters.ApproximateTimeSynchronizer([self.image_sub0,self.image_sub1, self.gps_sub],5,1)
self.ts.registerCallback(self.callback0)
def callback0(self,data1, data2, data3):
try:
cv_image0 = self.bridge.imgmsg_to_cv2(data1, "bgr8")
cv_image1 = self.bridge.imgmsg_to_cv2(data2, "bgr8")
print('got_data')
except CvBridgeError as e:
print(e)
def call1(self, data):
print(data)
def main(args):
print(args[1], 'RUNNING')
rospy.init_node('node_gps',anonymous=True)
gp = sorghum_gps(args)
try:
rospy.spin()
except KeyboardInterrupt:
print("Shutting down")
cv2.destroyAllWindows()
if __name__ == '__main__':
main(sys.argv)
Here are the details of the bag file I am trying to sync
types: sensor_msgs/Image [060021388200f6f0f447d0fcd9c64743]
sensor_msgs/NavSatFix [2d3a8cd499b9b4a0249fb98fd05cfa48] topics: /fix 7748 msgs :
sensor_msgs/NavSatFix
/cam0/image_raw 1942 msgs : sensor_msgs/Image
/cam1/image_raw 1942 msgs : sensor_msgs/Image
Kafka Client : 0.11.0.0-cp1
Kafka Broker :
On Kafka broker rolling restart, our application lost some messages while sending to broker. I believe with rolling restart there should not be any loss of message. These are the producer (Using Producer with asynchronous send() and not using callback/future etc) settings we are using :
val acksConfig: String = "all",
val retriesConfig: Int = Int.MAX_VALUE,
val retriesBackOffConfig: Int = 1000,
val batchSize: Int = 32768,
val lingerTime: Int = 1,
val maxBlockTime: Int = Int.MAX_VALUE,
val requestTimeOut: Int = 420000,
val bufferMemory: Int = 33_554_432,
val compressionType: String = "gzip",
val keySerializer: Class<StringSerializer> = StringSerializer::class.java,
val valueSerializer: Class<ByteArraySerializer> = ByteArraySerializer::class.java
I am seeing these exceptions in the logs
2019-03-19 17:30:59,224 [org.apache.kafka.clients.producer.internals.Sender] [kafka-producer-network-thread | producer-1] (Sender.java:511) WARN org.apache.kafka.clients.producer.internals.Sender - Got error produce response with correlation id 1105790 on topic-partition catapult_on_entitlement_updates_prod-67, retrying (2147483643 attempts left). Error: NOT_LEADER_FOR_PARTITION
But log says retry attempt left, i am curious why didnt it retry then? Let me know if anyone has any idea?
Two things to note:
What is the replication factor of the topic you are producing and what is the required number of min.insync.replicas?
What do you mean by "producer lost some messages". The producer if it cannot successfully produce to #min.insync.replicas brokers it will throw an exception and fail (for synchronous production). It is up to the producer/ client to retry in case of failure (synchronous or asynchronous production).
I have been seeing so many kafka consumer rebalances even if the thread is consuming nothing.I would expect the consumer not to rebalance in this scenario.
Here is the sample code.
import argparse
from kafka.coordinator.assignors.range import RangePartitionAssignor
from kafka import KafkaConsumer
import time
import sys
import logging
from kafka.consumer.subscription_state import ConsumerRebalanceListener
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
def get_config(args):
config = {
'bootstrap_servers': args.host,
'group_id': args.group,
'key_deserializer': lambda msg: msg,
'value_deserializer': lambda msg: msg,
'partition_assignment_strategy': [RangePartitionAssignor],
'max_poll_records': args.records,
'auto_offset_reset': args.offset,
# 'max_poll_interval_ms': 300000,
# 'connections_max_idle_ms': 8 * 60 * 1000
}
return config
def start_consumer(args):
config = get_config(args)
consumer = KafkaConsumer(**config)
consumer.subscribe([args.topic],
listener=RepartitionListener(None))
for record in consumer:
print record.offset, record.partition
time.sleep(int(args.delay) / 1000.0)
class RepartitionListener(ConsumerRebalanceListener):
def __init__(self):
pass
def on_partitions_revoked(self, revoked):
print("partition revoked ")
for tp in revoked:
try:
print("[{}] revoked topic = {} partition = {}".format(time.strftime("%c"),
tp.topic, tp.partition))
partition_key = "{}_{}".format(tp.topic, str(tp.partition))
except Exception as e:
print("Got exception partition_key = {} {}".
format(tp, e.message))
def on_partitions_assigned(self, assigned):
pass
def main():
parser = argparse.ArgumentParser(
description='Tool to test consumer group with delay')
named_args = parser.add_argument_group('snamed arguments')
named_args.add_argument('-g', '--group', help='group id for the consumer',
required=True)
named_args.add_argument('-r', '--records', help='num records to consume',
required=True)
named_args.add_argument('-k', '--topic', help='kafka topic', required=True)
named_args.add_argument('-d', '--delay', help='add process delay in ms', required=True)
named_args.add_argument('-s', '--host', help='Kafka host format host:port', required=False)
parser.add_argument('-o', '--offset',
default='latest',
help='offset to read from earliest/latest')
args = parser.parse_args()
print args
start_consumer(args)
if __name__ == "__main__":
main()
How to avoid the rebalance trigger? From the logs I am seeing heartbeat is failing, But I am expecting the heartbeat to be continued even if there is no messages over a period of time greater than session.time.out.ms.
2019-02-27 20:39:43,281 - kafka.coordinator - WARNING - Heartbeat session expired, marking coordinator dead
I want to publish in Kafka topic
I am unable to do so, the program halts.
I am getting this error:
KafkaTimeoutError: Failed to update metadata after 60.0 secs.
def saveResults(response):
entities_tweet = response["entities"]
for entity in entities_tweet:
try:
for i in entity_dict:
for j in entity_dict[i]:
if(entity["text"] in j):
entity["tweet"] = response["tweet"]
entity["tweetId"] = response["tweetId"]
entity["timeStamp"] = response["timeStamp"]
#entity["userProfile"] = response["userProfile"]
future = producer.send('argentina-iceland-june-16-watson', bytes(entity))
print("Published.")
else:
print("All ignored.")
future = producer.send('argentina-iceland-june-16-watson', bytes(entity))
print("Published")
except Exception as e:
print (e)
finally:
producer.flush()
However, this is working:
from kafka import KafkaProducer
from kafka.errors import KafkaError
producer = KafkaProducer(bootstrap_servers=['broker1:1234'])
# Asynchronous by default
future = producer.send('my-topic', b'raw_bytes')
It looks like that you're using incorrect boostrap server, it should be broker1:9092 instead of broker1:1234...
this is the code I am currently using. But I would like it to be able to send serialized data using pickle. I have been tinkering with it for a few hours now with no luck. Perhaps if someone here has better networking experience, quick insight would be appreciated so I don't have to rewrite everything, I think it's confusing as it is already.
import select
def send(sdef, data, slen):
sdef.setblocking(0)
sdef.sendall(str(len(str(data))).encode("utf-8").zfill(slen))
sdef.sendall(str(data).encode("utf-8"))
def receive(sdef, slen):
sdef.setblocking(0)
ready = select.select([sdef], [], [], 60)
if ready[0]:
data = int(sdef.recv(slen)) # receive length
# print "To receive: "+str(data)
else:
raise RuntimeError("Socket timeout")
chunks = []
bytes_recd = 0
while bytes_recd < data:
ready = select.select([sdef], [], [], 60)
if ready[0]:
chunk = sdef.recv(min(data - bytes_recd, 2048))
if chunk == b'':
raise RuntimeError("Socket connection broken")
chunks.append(chunk)
bytes_recd = bytes_recd + len(chunk)
else:
raise RuntimeError("Socket timeout")
segments = b''.join(chunks).decode("utf-8")
# print "Received segments: "+str(segments)
return segments
Simplest implementation:
import select, pickle
def send(sdef, data, slen):
sdef.setblocking(0)
sdef.sendall(str(len(str(pickle.dumps(data)))).encode("utf-8").zfill(slen))
sdef.sendall(str(pickle.dumps(data)).encode("utf-8"))
def receive(sdef, slen):
sdef.setblocking(0)
ready = select.select([sdef], [], [], 60)
if ready[0]:
data = int(sdef.recv(slen)) # receive length
# print "To receive: "+str(data)
else:
raise RuntimeError("Socket timeout")
chunks = []
bytes_recd = 0
while bytes_recd < data:
ready = select.select([sdef], [], [], 60)
if ready[0]:
chunk = sdef.recv(min(data - bytes_recd, 2048))
if chunk == b'':
raise RuntimeError("Socket connection broken")
chunks.append(chunk)
bytes_recd = bytes_recd + len(chunk)
else:
raise RuntimeError("Socket timeout")
segments = b''.join(chunks).decode("utf-8")
# print "Received segments: "+str(segments)
return segments
and I cannot deserialize "segments" anymore, because it is a string
This is the way to get it, literaleval should work under all circumstances on serialized data
pickle.loads(ast.literal_eval(segments))