I want to publish in Kafka topic
I am unable to do so, the program halts.
I am getting this error:
KafkaTimeoutError: Failed to update metadata after 60.0 secs.
def saveResults(response):
entities_tweet = response["entities"]
for entity in entities_tweet:
try:
for i in entity_dict:
for j in entity_dict[i]:
if(entity["text"] in j):
entity["tweet"] = response["tweet"]
entity["tweetId"] = response["tweetId"]
entity["timeStamp"] = response["timeStamp"]
#entity["userProfile"] = response["userProfile"]
future = producer.send('argentina-iceland-june-16-watson', bytes(entity))
print("Published.")
else:
print("All ignored.")
future = producer.send('argentina-iceland-june-16-watson', bytes(entity))
print("Published")
except Exception as e:
print (e)
finally:
producer.flush()
However, this is working:
from kafka import KafkaProducer
from kafka.errors import KafkaError
producer = KafkaProducer(bootstrap_servers=['broker1:1234'])
# Asynchronous by default
future = producer.send('my-topic', b'raw_bytes')
It looks like that you're using incorrect boostrap server, it should be broker1:9092 instead of broker1:1234...
Related
I have a script to test for at least one consume
the producer
import json
import random
import time
from confluent_kafka import Producer
import config
p = Producer({'bootstrap.servers':','.join(config.KAFKA_HOST),})
total_count = 0
c = 0
try:
for i in range(20000):
num = random.randint(1, 1000000)
total_count += num
a = {'t': num, 'time': time.time()}
p.produce('test-topic-vv', json.dumps(a))
c += 1
if c %100 == 0:
p.flush()
finally:
p.flush()
the consumer
import json
import random
import sys
from confluent_kafka import Consumer, TopicPartition
import config
c = Consumer({
'bootstrap.servers':','.join(config.KAFKA_HOST),
'group.id': 'test-topic-consumer-group',
'auto.offset.reset': 'earliest',
'enable.auto.offset.store': False,
'enable.auto.commit': True,
})
topic = 'test-topic-vv'
def test_for_seek():
try:
pp = []
pp.append(TopicPartition(topic, partition=8))
c.assign(pp)
while True:
msgs = c.consume(num_messages=10, timeout=10)
if not msgs:
print('no data and wait')
for i in c.assignment():
print(i.topic, i.partition, i.offset, c.get_watermark_offsets(i))
continue
for msg in msgs:
t1 = msg.partition()
o1 = msg.offset()
print('Received message: {} par {} offset {}'.format(msg.value().decode('utf-8'), t1, o1))
break
finally:
c.close()
def test_for_run():
try:
c.subscribe([topic])
total_count = 0
map_par = {}
while True:
msgs = c.consume(num_messages=10, timeout=5)
if not msgs:
print('no data and wait')
for i in c.assignment():
print(i.topic, i.partition, i.offset, c.get_watermark_offsets(i))
continue
deald = []
for msg in msgs:
t1 = msg.partition()
o1 = msg.offset()
print('Received message: {} par {} offset {}'.format(msg.value().decode('utf-8'), t1, o1))
if random.randint(1, 100) == 9:
# test for deal failed then retry again
print('deal failed will retry msg offset {} partition {}'.format(msg.offset(), msg.partition()))
break
else:
total_count += json.loads(msg.value())['t']
# test for deal success
if t1 in map_par:
if map_par[t1] + 1 != o1:
raise Exception('deal partition {} except last offset {} current offset {}'.format(t1, map_par[t1], o1))
map_par[t1] = o1
c.store_offsets(msg)
deald.append(msg)
group_partition = {}
for msg in msgs:
if msg in deald:
continue
partition = msg.partition()
offset = msg.offset()
if partition in group_partition:
group_partition[partition] = min(group_partition[partition], offset)
else:
group_partition[partition] = offset
# seek to deal failed partition offset
for k, v in group_partition.items():
c.seek(TopicPartition(topic, partition=k, offset=v))
print('deal failed will set msg offset {} partition {}'.format(v, k))
finally:
c.close()
if sys.argv[1] == 'test_for_seek':
test_for_seek()
else:
test_for_run()
the topic test-topic-vv has 9 partition
first i run producer to add some message to topic then consume it. but i got a exception
screenshot https://user-images.githubusercontent.com/12459874/194990350-8cd13128-f3fa-4a86-a93e-771af45f93f0.png
The latest message's offset of partition 8 should be 7382 but got 7391
then i run test_for_seek to check the consumer group's actually record offset it was 7382 indeed
screenshot https://user-images.githubusercontent.com/12459874/194990593-9b8431d0-ce07-4122-800d-f9b3c129f5f3.png
I also check the broker's group offset record
screenshot https://user-images.githubusercontent.com/12459874/194990684-9d8ad773-a569-4cee-9d4c-0a898e8f8922.png
it also was 7382
So what happened to consumer when use seek to manage offset, hope any one can help me to deal with the problem.
check information
confluent_kafka.version()==1.9.2
confluent_kafka.libversion()==1.9.2
Operating system: ubuntu 16.04
Python3.8
kafka 2.11-1.1.1
Kafka Client : 0.11.0.0-cp1
Kafka Broker :
On Kafka broker rolling restart, our application lost some messages while sending to broker. I believe with rolling restart there should not be any loss of message. These are the producer (Using Producer with asynchronous send() and not using callback/future etc) settings we are using :
val acksConfig: String = "all",
val retriesConfig: Int = Int.MAX_VALUE,
val retriesBackOffConfig: Int = 1000,
val batchSize: Int = 32768,
val lingerTime: Int = 1,
val maxBlockTime: Int = Int.MAX_VALUE,
val requestTimeOut: Int = 420000,
val bufferMemory: Int = 33_554_432,
val compressionType: String = "gzip",
val keySerializer: Class<StringSerializer> = StringSerializer::class.java,
val valueSerializer: Class<ByteArraySerializer> = ByteArraySerializer::class.java
I am seeing these exceptions in the logs
2019-03-19 17:30:59,224 [org.apache.kafka.clients.producer.internals.Sender] [kafka-producer-network-thread | producer-1] (Sender.java:511) WARN org.apache.kafka.clients.producer.internals.Sender - Got error produce response with correlation id 1105790 on topic-partition catapult_on_entitlement_updates_prod-67, retrying (2147483643 attempts left). Error: NOT_LEADER_FOR_PARTITION
But log says retry attempt left, i am curious why didnt it retry then? Let me know if anyone has any idea?
Two things to note:
What is the replication factor of the topic you are producing and what is the required number of min.insync.replicas?
What do you mean by "producer lost some messages". The producer if it cannot successfully produce to #min.insync.replicas brokers it will throw an exception and fail (for synchronous production). It is up to the producer/ client to retry in case of failure (synchronous or asynchronous production).
I have been seeing so many kafka consumer rebalances even if the thread is consuming nothing.I would expect the consumer not to rebalance in this scenario.
Here is the sample code.
import argparse
from kafka.coordinator.assignors.range import RangePartitionAssignor
from kafka import KafkaConsumer
import time
import sys
import logging
from kafka.consumer.subscription_state import ConsumerRebalanceListener
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
def get_config(args):
config = {
'bootstrap_servers': args.host,
'group_id': args.group,
'key_deserializer': lambda msg: msg,
'value_deserializer': lambda msg: msg,
'partition_assignment_strategy': [RangePartitionAssignor],
'max_poll_records': args.records,
'auto_offset_reset': args.offset,
# 'max_poll_interval_ms': 300000,
# 'connections_max_idle_ms': 8 * 60 * 1000
}
return config
def start_consumer(args):
config = get_config(args)
consumer = KafkaConsumer(**config)
consumer.subscribe([args.topic],
listener=RepartitionListener(None))
for record in consumer:
print record.offset, record.partition
time.sleep(int(args.delay) / 1000.0)
class RepartitionListener(ConsumerRebalanceListener):
def __init__(self):
pass
def on_partitions_revoked(self, revoked):
print("partition revoked ")
for tp in revoked:
try:
print("[{}] revoked topic = {} partition = {}".format(time.strftime("%c"),
tp.topic, tp.partition))
partition_key = "{}_{}".format(tp.topic, str(tp.partition))
except Exception as e:
print("Got exception partition_key = {} {}".
format(tp, e.message))
def on_partitions_assigned(self, assigned):
pass
def main():
parser = argparse.ArgumentParser(
description='Tool to test consumer group with delay')
named_args = parser.add_argument_group('snamed arguments')
named_args.add_argument('-g', '--group', help='group id for the consumer',
required=True)
named_args.add_argument('-r', '--records', help='num records to consume',
required=True)
named_args.add_argument('-k', '--topic', help='kafka topic', required=True)
named_args.add_argument('-d', '--delay', help='add process delay in ms', required=True)
named_args.add_argument('-s', '--host', help='Kafka host format host:port', required=False)
parser.add_argument('-o', '--offset',
default='latest',
help='offset to read from earliest/latest')
args = parser.parse_args()
print args
start_consumer(args)
if __name__ == "__main__":
main()
How to avoid the rebalance trigger? From the logs I am seeing heartbeat is failing, But I am expecting the heartbeat to be continued even if there is no messages over a period of time greater than session.time.out.ms.
2019-02-27 20:39:43,281 - kafka.coordinator - WARNING - Heartbeat session expired, marking coordinator dead
SimpleConsumer has been deprecated in kafka, with org.apache.kafka.clients.consumer.KafkaConsumer being the replacement. However, it doesn't have a send(...) function. How can I rewrite the below code using the new KafkaConsumer?
import scala.concurrent.duration._
import kafka.api.TopicMetadataRequest
import kafka.consumer.SimpleConsumer
....
val consumer = new SimpleConsumer(
host = "127.0.0.1",
port = 9092,
soTimeout = 2.seconds.toMillis.toInt,
bufferSize = 1024,
clientId = "health-check")
// this will fail if Kafka is unavailable
consumer.send(new TopicMetadataRequest(Nil, 1))
You can get topic metadata with .partitionsFor and .listTopics
There is no direct replacement for method ,it depends what you want to do .
If you need all partitions info there is method for that consumer.partitionFor(topic) in the new api
i've made a local server using flask and mongoDB which works great on windows, but when i moved my code to the raspberry pi, i've got an error which i couldn't figure out why it occurs.
the code im using:
1) for the flask server
from flask import Flask
from flask import jsonify
from flask import request
import pymongo
import time
import datetime
import json
app = Flask(__name__)
client = pymongo.MongoClient("localhost", 27017)
db = client['mqtt-db']
obs_collection = db['mqtt-collection']
#app.route("/obs")
def obs():
data_str = request.args.get("data")
print data_str
data = json.loads(data_str)
print data
data["date"] = datetime.datetime.now()
obs_collection.save(data)
return "success"
#app.route("/get_obs")
def get_obs():
res = []
for row in obs_collection.find():
del row['_id']
res.append(row)
return jsonify(res)
#app.route("/delete_all")
def delete_all():
res = obs_collection.delete_many({})
return jsonify({"deleted": res.deleted_count})
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True)
2) script for inserting messages into db , using mqtt protocol:
import paho.mqtt.client as mqtt
import pymongo
import json
import datetime
topic = "sensor"
host = "10.0.0.6"
client = pymongo.MongoClient("localhost", 27017)
db = client['mqtt-db']
mqtt_collection = db['mqtt-collection']
# The callback for when the client receives a CONNACK response from the server.
def on_connect(client, userdata, flags, rc):
print("Connected with result code "+str(rc))
# Subscribing in on_connect() means that if we lose the connection and
# reconnect then subscriptions will be renewed.
client.subscribe(topic)
# The callback for when a PUBLISH message is received from the server.
def on_message(client, userdata, msg):
data_str = str(msg.payload)
data = json.loads(data_str)
print data_str
print data
data["date"] = datetime.datetime.now()
mqtt_collection.save(data)
print(msg.topic+" "+str(msg.payload))
client = mqtt.Client()
client.on_connect = on_connect
client.on_message = on_message
client.connect(host, 1883, 60)
# Blocking call that processes network traffic, dispatches callbacks and
# handles reconnecting.
# Other loop*() functions are available that give a threaded interface and a
# manual interface.
client.loop_forever()
the error occurs when i try to retrieve data from the server using "get_obs" function.
the error is: "Value Error: dictionary update sequence element #0 has length 4; 2 is required"
appreciate your help.
as #davidism suggested, the solution was to update to the latest version of Flask