Kafka consumer poll with offset 0 not returning message - apache-kafka

I am using spring-kafka to poll message, when I use the annotation for the consumer and set offset to 0 it will see all messages from the earliest. But when I try to use a injected ConsumerFactory to create consumer on my own, then poll will only return a few message or no message at all. Is there some other config I need in order to be able to pull message? The poll timeout is already set to 10 seconds.
#Component
public class GenericConsumer {
private static final Logger logger = LoggerFactory.getLogger(GenericConsumer.class);
#Autowired
ConsumerFactory<String, Record> consumerFactory;
public ConsumerRecords<String, Record> poll(String topic, String group){
logger.info("---------- Polling kafka recrods from topic " + topic + " group" + group);
Consumer<String, Record> consumer = consumerFactory.createConsumer(group, "");
consumer.subscribe(Arrays.asList(topic));
// need to make a dummy poll before we can seek
consumer.poll(1000);
consumer.seekToBeginning(consumer.assignment());
ConsumerRecords<String, Record> records;
records = consumer.poll(10000);
logger.info("------------ Total " + records.count() + " records polled");
consumer.close();
return records;
}
}

It works fine for me, this was with boot 2.0.5, Spring Kafka 2.1.10 ...
#SpringBootApplication
public class So52284259Application implements ConsumerAwareRebalanceListener {
private static final Logger logger = LoggerFactory.getLogger(So52284259Application.class);
public static void main(String[] args) {
SpringApplication.run(So52284259Application.class, args);
}
#Bean
public ApplicationRunner runner(KafkaTemplate<String, String> template, GenericConsumer consumer) {
return args -> {
// for (int i = 0; i < 1000; i++) { // load up the topic on first run
// template.send("so52284259", "foo" + i);
// }
consumer.poll("so52284259", "generic");
};
}
#KafkaListener(id = "listener", topics = "so52284259")
public void listen(String in) {
if ("foo999".equals(in)) {
logger.info("#KafkaListener: " + in);
}
}
#Override
public void onPartitionsAssigned(Consumer<?, ?> consumer, Collection<TopicPartition> partitions) {
consumer.seekToBeginning(partitions);
}
#Bean
public NewTopic topic() {
return new NewTopic("so52284259", 1, (short) 1);
}
}
#Component
class GenericConsumer {
private static final Logger logger = LoggerFactory.getLogger(GenericConsumer.class);
#Autowired
ConsumerFactory<String, String> consumerFactory;
public void poll(String topic, String group) {
logger.info("---------- Polling kafka recrods from topic " + topic + " group" + group);
Consumer<String, String> consumer = consumerFactory.createConsumer(group, "");
consumer.subscribe(Arrays.asList(topic));
// need to make a dummy poll before we can seek
consumer.poll(1000);
consumer.seekToBeginning(consumer.assignment());
ConsumerRecords<String, String> records;
boolean done = false;
while (!done) {
records = consumer.poll(10000);
logger.info("------------ Total " + records.count() + " records polled");
Iterator<ConsumerRecord<String, String>> iterator = records.iterator();
while (iterator.hasNext()) {
String value = iterator.next().value();
if ("foo999".equals(value)) {
logger.info("Consumer: " + value);
done = true;
}
}
}
consumer.close();
}
}
and
2018-09-12 09:35:25.929 INFO 61390 --- [ main] com.example.GenericConsumer : ------------ Total 500 records polled
2018-09-12 09:35:25.931 INFO 61390 --- [ main] com.example.GenericConsumer : ------------ Total 500 records polled
2018-09-12 09:35:25.932 INFO 61390 --- [ main] com.example.GenericConsumer : Consumer: foo999
2018-09-12 09:35:25.942 INFO 61390 --- [ listener-0-C-1] com.example.So52284259Application : #KafkaListener: foo999

Related

Creating a new consumer when cosumer stops due to an error in Reactor Kafka

I am working on an application where I have multiple consumers for each Topic partition so there is concurrency in reading from the topic. I followed this link to ensure that the consumer gets created again if the existing consumer stops. .repeat will create the new consumer. I have been trying to test this scenario:
Below is my code along with test:
#Bean
public ReceiverOptions<String, String> kafkaReceiverOptions(String topic, KafkaProperties kafkaProperties) {
ReceiverOptions<String, String> basicReceiverOptions = ReceiverOptions.create(kafkaProperties.buildConsumerProperties());
return basicReceiverOptions.subscription(Collections.singletonList(topic))
.addAssignListener(receiverPartitions -> log.debug("onPartitionAssigned {}", receiverPartitions))
.addRevokeListener(receiverPartitions -> log.debug("onPartitionsRevoked {}", receiverPartitions));
}
#Bean
public ReactiveKafkaConsumerTemplate<String, String> kafkaConsumerTemplate(ReceiverOptions<String, String> kafkaReceiverOptions) {
return new ReactiveKafkaConsumerTemplate<String, String>(kafkaReceiverOptions);
}
#Bean
public ReactiveKafkaProducerTemplate<String, List<Object>> kafkaProducerTemplate(
KafkaProperties properties) {
Map<String, Object> props = properties.buildProducerProperties();
return new ReactiveKafkaProducerTemplate<String, List<Object>>(SenderOptions.create(props));
}
public void run(String... args) {
for(int i = 0; i < topicPartitionsCount ; i++) {
readWrite(destinationTopic).subscribe();
}
}}
public Flux<String> readWrite(String destTopic) {
AtomicBoolean repeatConsumer = new AtomicBoolean(false);
return kafkaConsumerTemplate
.receiveAutoAck()
.doOnNext(consumerRecord -> log.debug("received key={}, value={} from topic={}, offset={}",
consumerRecord.key(),
consumerRecord.value(),
consumerRecord.topic(),
consumerRecord.offset())
)
//.doOnNext(consumerRecord -> log.info("Record received from partition {} in thread {}", consumerRecord.partition(),Thread.currentThread().getName()))
.doOnNext(s-> sendToKafka(s,destinationTopic))
.map(ConsumerRecord::value)
.doOnNext(record -> log.debug("successfully consumed {}={}", Metric[].class.getSimpleName(), record))
.doOnError(exception -> log.debug("Error occurred while processing the message, attempting retry. Error message: {}", exception.getMessage()))
.retryWhen(Retry.backoff(Integer.parseInt(retryAttempts), Duration.ofSeconds(Integer.parseInt(retryAttemptsDelay))).transientErrors(true))
.onErrorContinue((exception,errorConsumerRecord)->{
ReceiverRecordException recordException = (ReceiverRecordException)exception;
log.debug("Retries exhausted for : {}", recordException);
recordException.getRecord().receiverOffset().acknowledge();
repeatConsumer.set(true);
})
.repeat(repeatConsumer::get); // will create a new consumer if the existing consumer stops
}
public class ReceiverRecordException extends RuntimeException {
private final ReceiverRecord record;
ReceiverRecordException(ReceiverRecord record, Throwable t) {
super(t);
this.record = record;
}
public ReceiverRecord getRecord() {
return this.record;
}
}
Test:
#Test
public void readWriteCreatesNewConsumerWhenCurrentConsumerStops() {
AtomicInteger recordNumber = new AtomicInteger(0);
Mockito
.when(reactiveKafkaConsumerTemplate.receiveAutoAck())
.thenReturn(
Flux.create(consumerRecordFluxSink -> {
if (recordNumber.getAndIncrement() < 5) {
consumerRecordFluxSink.error(new RuntimeException("Kafka down"));
} else {
consumerRecordFluxSink.next(createConsumerRecord(validMessage));
consumerRecordFluxSink.complete();
}
})
);
Flux<String> actual = service.readWrite();
StepVerifier.create(actual)
.verifyComplete();
}
When I run the test, I get the record retry exception - onError(reactor.core.Exceptions$RetryExhaustedException: Retries exhausted: 3/3 in a row (3 total)))
My understanding was onErrorContinue will catch the exception and then continue with the next records. But it looks like it is throwing an exception.
Since it is throwing an exception how does repeat() work?
I would really appreciate if some one could help me understand how to test this scenario?

How do I set in Kafka to not consume from where it left?

I have a Kafka consumer in Golang. I don't want to consume from where I left last time, but rather current message. How can I do it?
You can set enable.auto.commit to false and auto.offset.reset to latest for your consumer group id. This means kafka will not be automatically committing your offsets.
With auto commit disabled, your consumer group progress would not be saved (unless you do manually). So whenever the consumer is restarted for whatever reason, it does not find its progress saved and resets to the latest offset.
set a new group.id to your consumer.
Then use auto.offset.reset to define the behavior of this new consumer group, in you case: latest
Apache kafka consumer api provides a method called kafkaConsumer.seekToEnd() which can be used to ignore the existing messages and only consume messages published after the consumer has been started without changing the current group ID of the consumer.
Below is the implementation of the same. The program takes 3 arguments : topic name, group ID and offset range (0 to start from beginning, - 1 to receive messages after consumer has started, other than 0 or - 1 will imply to to consumer to consume from that offset)
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.WakeupException;
import java.util.*;
public class Consumer {
private static Scanner in;
public static void main(String[] argv)throws Exception{
if (argv.length != 3) {
System.err.printf("Usage: %s <topicName> <groupId> <startingOffset>\n",
Consumer.class.getSimpleName());
System.exit(-1);
}
in = new Scanner(System.in);
String topicName = argv[0];
String groupId = argv[1];
final long startingOffset = Long.parseLong(argv[2]);
ConsumerThread consumerThread = new ConsumerThread(topicName,groupId,startingOffset);
consumerThread.start();
String line = "";
while (!line.equals("exit")) {
line = in.next();
}
consumerThread.getKafkaConsumer().wakeup();
System.out.println("Stopping consumer .....");
consumerThread.join();
}
private static class ConsumerThread extends Thread{
private String topicName;
private String groupId;
private long startingOffset;
private KafkaConsumer<String,String> kafkaConsumer;
public ConsumerThread(String topicName, String groupId, long startingOffset){
this.topicName = topicName;
this.groupId = groupId;
this.startingOffset=startingOffset;
}
public void run() {
Properties configProperties = new Properties();
configProperties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
configProperties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
configProperties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
configProperties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
configProperties.put(ConsumerConfig.CLIENT_ID_CONFIG, "offset123");
configProperties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
configProperties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//Figure out where to start processing messages from
kafkaConsumer = new KafkaConsumer<String, String>(configProperties);
kafkaConsumer.subscribe(Arrays.asList(topicName), new ConsumerRebalanceListener() {
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are revoked from this consumer\n", Arrays.toString(partitions.toArray()));
}
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are assigned to this consumer\n", Arrays.toString(partitions.toArray()));
Iterator<TopicPartition> topicPartitionIterator = partitions.iterator();
while(topicPartitionIterator.hasNext()){
TopicPartition topicPartition = topicPartitionIterator.next();
System.out.println("Current offset is " + kafkaConsumer.position(topicPartition) + " committed offset is ->" + kafkaConsumer.committed(topicPartition) );
if(startingOffset == -2) {
System.out.println("Leaving it alone");
}else if(startingOffset ==0){
System.out.println("Setting offset to begining");
kafkaConsumer.seekToBeginning(topicPartition);
}else if(startingOffset == -1){
System.out.println("Setting it to the end ");
kafkaConsumer.seekToEnd(topicPartition);
}else {
System.out.println("Resetting offset to " + startingOffset);
kafkaConsumer.seek(topicPartition, startingOffset);
}
}
}
});
//Start processing messages
try {
while (true) {
ConsumerRecords<String, String> records = kafkaConsumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value());
}
if(startingOffset == -2)
kafkaConsumer.commitSync();
}
}catch(WakeupException ex){
System.out.println("Exception caught " + ex.getMessage());
}finally{
kafkaConsumer.close();
System.out.println("After closing KafkaConsumer");
}
}
public KafkaConsumer<String,String> getKafkaConsumer(){
return this.kafkaConsumer;
}
}
}

How to read the Header values in the Batch listener error handling scenario

I am trying to handle the exception at the listener
#KafkaListener(id = PropertiesUtil.ID,
topics = "#{'${kafka.consumer.topic}'}",
groupId = "${kafka.consumer.group.id.config}",
containerFactory = "containerFactory",
errorHandler = "errorHandler")
public void receiveEvents(#Payload List<ConsumerRecord<String, String>> recordList,
Acknowledgment acknowledgment) {
try {
log.info("Consuming the batch of size {} from kafka topic {}", consumerRecordList.size(),
consumerRecordList.get(0).topic());
processEvent(consumerRecordList);
incrementOffset(acknowledgment);
} catch (Exception exception) {
throwOrHandleExceptions(exception, recordList, acknowledgment);
.........
}
}
The Kafka container config:
#Bean
public KafkaListenerContainerFactory<ConcurrentMessageListenerContainer<String, String>>
containerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConcurrency(this.numberOfConsumers);
factory.getContainerProperties().setAckOnError(false);
factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL);
factory.setConsumerFactory(getConsumerFactory());
factory.setBatchListener(true);
return factory;
}
}
the listener error handler impl
#Bean
public ConsumerAwareListenerErrorHandler errorHandler() {
return (m, e, c) -> {
MessageHeaders headers = m.getHeaders();
List<String> topics = headers.get(KafkaHeaders.RECEIVED_TOPIC, List.class);
List<Integer> partitions = headers.get(KafkaHeaders.RECEIVED_PARTITION_ID, List.class);
List<Long> offsets = headers.get(KafkaHeaders.OFFSET, List.class);
Map<TopicPartition, Long> offsetsToReset = new HashMap<>();
for (int i = 0; i < topics.size(); i++) {
int index = i;
offsetsToReset.compute(new TopicPartition(topics.get(i), partitions.get(i)),
(k, v) -> v == null ? offsets.get(index) : Math.min(v, offsets.get(index)));
}
...
};
}
when i try to run the same without the batching processing then i am able to fetch the partition,topic and offset values but when i enable batch processing and try to test it then i am getting only two values inside the headers i.e id and timestamp and other values are not set. Am i missing anything here??
What version are you using? I just tested it with Boot 2.2.4 (SK 2.3.5) and it works fine...
#SpringBootApplication
public class So60152179Application {
public static void main(String[] args) {
SpringApplication.run(So60152179Application.class, args);
}
#KafkaListener(id = "so60152179", topics = "so60152179", errorHandler = "eh")
public void listen(List<String> in) {
throw new RuntimeException("foo");
}
#Bean
public ConsumerAwareListenerErrorHandler eh() {
return (m, e, c) -> {
System.out.println(m);
return null;
};
}
#Bean
public ApplicationRunner runner(KafkaTemplate<String, String> template) {
return args -> {
template.send("so60152179", "foo");
};
}
#Bean
public NewTopic topic() {
return TopicBuilder.name("so60152179").partitions(1).replicas(1).build();
}
}
spring.kafka.listener.type=batch
spring.kafka.consumer.auto-offset-reset=earliest
GenericMessage [payload=[foo], headers={kafka_offset=[0], kafka_nativeHeaders=[RecordHeaders(headers = [], isReadOnly = false)], kafka_consumer=org.apache.kafka.clients.consumer.KafkaConsumer#2f2e787f, kafka_timestampType=[CREATE_TIME], kafka_receivedMessageKey=[null], kafka_receivedPartitionId=[0], kafka_receivedTopic=[so60152179], kafka_receivedTimestamp=[1581351585253], kafka_groupId=so60152179}]

Kafka + Spring Batch Listener Flush Batch

Using Kafka Broker: 1.0.1
spring-kafka: 2.1.6.RELEASE
I'm using a batched consumer with the following settings:
// Other settings are not shown..
props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "100");
I use spring listener in the following way:
#KafkaListener(topics = "${topics}", groupId = "${consumer.group.id}")
public void receive(final List<String> data,
#Header(KafkaHeaders.RECEIVED_PARTITION_ID) final List<Integer> partitions,
#Header(KafkaHeaders.RECEIVED_TOPIC) Set<String> topics,
#Header(KafkaHeaders.OFFSET) final List<Long> offsets) { // ......code... }
I always find the a few messages remain in the batch and not received in my listener. It appears to be that if the remaining messages are less than a batch size, it isn't consumed (may be in memory and published to my listener). Is there any way to have a setting to auto-flush the batch after a time interval so as to avoid the messages not being flushed?
What's the best way to deal with such kind of situation with a batch consumer?
I just ran a test without any problems...
#SpringBootApplication
public class So50370851Application {
public static void main(String[] args) {
SpringApplication.run(So50370851Application.class, args);
}
#Bean
public ApplicationRunner runner(KafkaTemplate<String, String> template) {
return args -> {
for (int i = 0; i < 230; i++) {
template.send("so50370851", "foo" + i);
}
};
}
#KafkaListener(id = "foo", topics = "so50370851")
public void listen(List<String> in) {
System.out.println(in.size());
}
#Bean
public NewTopic topic() {
return new NewTopic("so50370851", 1, (short) 1);
}
}
and
spring.kafka.consumer.auto-offset-reset=earliest
spring.kafka.consumer.enable-auto-commit=false
spring.kafka.consumer.max-poll-records=100
spring.kafka.listener.type=batch
and
100
100
30
Also, the debug logs shows after a while that it is polling and fetching 0 records (and this gets repeated over and over).
That implies the problem is on the sending side.

KAFKA + FLINK 1.1.2 consumer group not working as excepted

When I tried to connect to one topic with 3 with partition and 3 FlinkKafkaConsumer09 consume from one topic and using Kafka consumer group property as below.
props.setProperty("group.id", "myGroup");
props.setProperty("auto.offset.reset", "latest");
but still 3 consumer receives all data. according to consumer group concept , data should send to only one consumer inside consumer group.
But it works good with normal Java consumer. issue with FlinkKafkaConsumer09 ?
This issue can be solved by writing on FlinkConsumer .
Steps : 1. you have to pass partitions as property to flink consumer
issue : according this you have one consumer for one partition
public class YourConsumer<T> extends FlinkKafkaConsumerBase<T>
{
public static final long DEFAULT_POLL_TIMEOUT = 100L;
private final long pollTimeout;
public FlinkKafkaConsumer09(String topic, DeserializationSchema<T> valueDeserializer, Properties props) {
this(Collections.singletonList(topic), valueDeserializer, props);
}
public FlinkKafkaConsumer09(String topic, KeyedDeserializationSchema<T> deserializer, Properties props) {
this(Collections.singletonList(topic), deserializer, props);
}
public FlinkKafkaConsumer09(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
this(topics, new KeyedDeserializationSchemaWrapper<>(deserializer), props);
}
public FlinkKafkaConsumer09(List<String> topics, KeyedDeserializationSchema<T> deserializer, Properties props) {
super(topics, deserializer);
this.properties = checkNotNull(props, "props");
setDeserializer(this.properties);
// configure the polling timeout
try {
if (properties.containsKey(KEY_POLL_TIMEOUT)) {
this.pollTimeout = Long.parseLong(properties.getProperty(KEY_POLL_TIMEOUT));
} else {
this.pollTimeout = DEFAULT_POLL_TIMEOUT;
}
}
catch (Exception e) {
throw new IllegalArgumentException("Cannot parse poll timeout for '" + KEY_POLL_TIMEOUT + '\'', e);
}
}
#Override
protected AbstractFetcher<T, ?> createFetcher(
SourceContext<T> sourceContext,
List<KafkaTopicPartition> thisSubtaskPartitions,
SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
StreamingRuntimeContext runtimeContext) throws Exception {
boolean useMetrics = !Boolean.valueOf(properties.getProperty(KEY_DISABLE_METRICS, "false"));
return new Kafka09Fetcher<>(sourceContext, thisSubtaskPartitions,
watermarksPeriodic, watermarksPunctuated,
runtimeContext, deserializer,
properties, pollTimeout, useMetrics);
}
#Override
protected List<KafkaTopicPartition> getKafkaPartitions(List<String> topics) {
// read the partitions that belong to the listed topics
final List<KafkaTopicPartition> partitions = new ArrayList<>();
int partition=Integer.valueOf(this.properties.get("partitions"));
try (KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(this.properties)) {
for (final String topic: topics) {
// get partitions for each topic
List<PartitionInfo> partitionsForTopic = consumer.partitionsFor(topic);
// for non existing topics, the list might be null.
if (partitionsForTopic != null) {
partitions.addAll(convertToFlinkKafkaTopicPartition(partitionsForTopic),partition);
}
}
}
if (partitions.isEmpty()) {
throw new RuntimeException("Unable to retrieve any partitions for the requested topics " + topics);
}
// we now have a list of partitions which is the same for all parallel consumer instances.
LOG.info("Got {} partitions from these topics: {}", partitions.size(), topics);
if (LOG.isInfoEnabled()) {
logPartitionInfo(LOG, partitions);
}
return partitions;
}
private static List<KafkaTopicPartition> convertToFlinkKafkaTopicPartition(List<PartitionInfo> partitions,int partition) {
checkNotNull(partitions);
List<KafkaTopicPartition> ret = new ArrayList<>(partitions.size());
//for (PartitionInfo pi : partitions) {
ret.add(new KafkaTopicPartition(partitions.get(partition).topic(), partitions.get(partition).partition()));
// }
return ret;
}
private static void setDeserializer(Properties props) {
final String deSerName = ByteArrayDeserializer.class.getCanonicalName();
Object keyDeSer = props.get(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
Object valDeSer = props.get(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
if (keyDeSer != null && !keyDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured key DeSerializer ({})", ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
}
if (valDeSer != null && !valDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured value DeSerializer ({})", ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
}
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deSerName);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deSerName);
}
}