kafka fetch records by timestamp, consumer loop - apache-kafka

I am using Kafka 0.10.2.1 cluster. I am using the Kafka's offsetForTimes API to seek to a particular offset and would like to breakout of the loop when i have reached the end timestamp.
My code is like this:
//package kafka.ex.test;
import java.util.*;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
public class ConsumerGroup {
public static OffsetAndTimestamp fetchOffsetByTime( KafkaConsumer<Long, String> consumer , TopicPartition partition , long startTime){
Map<TopicPartition, Long> query = new HashMap<>();
query.put(
partition,
startTime);
final Map<TopicPartition, OffsetAndTimestamp> offsetResult = consumer.offsetsForTimes(query);
if( offsetResult == null || offsetResult.isEmpty() ) {
System.out.println(" No Offset to Fetch ");
System.out.println(" Offset Size "+offsetResult.size());
return null;
}
final OffsetAndTimestamp offsetTimestamp = offsetResult.get(partition);
if(offsetTimestamp == null ){
System.out.println("No Offset Found for partition : "+partition.partition());
}
return offsetTimestamp;
}
public static KafkaConsumer<Long, String> assignOffsetToConsumer( KafkaConsumer<Long, String> consumer, String topic , long startTime ){
final List<PartitionInfo> partitionInfoList = consumer.partitionsFor(topic);
System.out.println("Number of Partitions : "+partitionInfoList.size());
final List<TopicPartition> topicPartitions = new ArrayList<>();
for (PartitionInfo pInfo : partitionInfoList) {
TopicPartition partition = new TopicPartition(topic, pInfo.partition());
topicPartitions.add(partition);
}
consumer.assign(topicPartitions);
for(TopicPartition partition : topicPartitions ){
OffsetAndTimestamp offSetTs = fetchOffsetByTime(consumer, partition, startTime);
if( offSetTs == null ){
System.out.println("No Offset Found for partition : " + partition.partition());
consumer.seekToEnd(Arrays.asList(partition));
}else {
System.out.println(" Offset Found for partition : " +offSetTs.offset()+" " +partition.partition());
System.out.println("FETCH offset success"+
" Offset " + offSetTs.offset() +
" offSetTs " + offSetTs);
consumer.seek(partition, offSetTs.offset());
}
}
return consumer;
}
public static void main(String[] args) throws Exception {
String topic = args[0].toString();
String group = args[1].toString();
long start_time_Stamp = Long.parseLong( args[3].toString());
String bootstrapServers = args[2].toString();
long end_time_Stamp = Long.parseLong( args[4].toString());
Properties props = new Properties();
boolean reachedEnd = false;
props.put("bootstrap.servers", bootstrapServers);
props.put("group.id", group);
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<Long, String> consumer = new KafkaConsumer<Long, String>(props);
assignOffsetToConsumer(consumer, topic, start_time_Stamp);
System.out.println("Subscribed to topic " + topic);
int i = 0;
int arr[] = {0,0,0,0,0};
while (true) {
ConsumerRecords<Long, String> records = consumer.poll(6000);
int count= 0;
long lasttimestamp = 0;
long lastOffset = 0;
for (ConsumerRecord<Long, String> record : records) {
count++;
if(arr[record.partition()] == 0){
arr[record.partition()] =1;
}
if (record.timestamp() >= end_time_Stamp) {
reachedEnd = true;
break;
}
System.out.println("record=>"+" offset="
+record.offset()
+ " timestamp="+record.timestamp()
+ " :"+record);
System.out.println("recordcount = "+count+" bitmap"+Arrays.toString(arr));
}
if (reachedEnd) break;
if (records == null || records.isEmpty()) break; // dont wait for records
}
}
}
I face the following problems:
consumer.poll fails even for 1000 millisecond. I had to poll a few times in loop if i use 1000 millisecond. I have an extremely large value now. But having already, seeked to the relevant offsets within a partition, how to reliably set the poll timeout so that data is returned immediately?
My observations is that when data is returned it is not always from all partitions. Even when data is returned from all partitions not all records are returned. The amount of records that are in the topic is more than 1000. But the amount of records that are actually fetched and printed in loop is less(~200). Is there any issue with the current usage of my Kafka APIs?
how to reliably break out of the loop having obtained all the data between start and end timestamp and not prematurely?

Amount of records fetched per poll depends on your consumer config
You are breaking the loop when one of the partitions reaches the endtimestamp , which is not what you want . You should check that all the partitions are seeked to end before exiting poll loop
Poll call is an async call and fetch requests and responses are per node , so you may or may not get all the responses in a poll depending on the broker response time

Related

Uncommitted event is not received in the next poll

I have a consumer with max.poll.records set to 1 and enable.auto.commit set to false for the manual offset control. However even when I am not calling commitSync, the subsequent poll is returning next event. Here are the details, I produced 4 events onto a topic, in consumer I am not committing for the third event I am skipping commitSync, I was expecting the third event to be returned in the next poll but fourth event has been returned. I am puzzled how evet 3 has been committed.
private static void pauseAndResume() {
int retryDelay = 5; // seconds
SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss");
SimpleProducer.produce(4); //(produces Event1, Event2, Event3, Event4)
Properties properties = new Properties();
String topicName = "output-topic";
properties.put("bootstrap.servers", "localhost:29092");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("group.id", "test-group");
properties.put("max.poll.records", 1);
properties.put("enable.auto.commit", false);
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
List<String> topics = new ArrayList<String>();
topics.add(topicName);
kafkaConsumer.subscribe(topics);
Collection<TopicPartition> topicPartitions = new ArrayList<TopicPartition>();
PartitionInfo partitionInfo = kafkaConsumer.partitionsFor(topicName).get(0);
topicPartitions.add(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()));
int eventsCount = 0;
try {
Date pausedAt = new Date();
while (true) {;
if (!kafkaConsumer.paused().isEmpty()) {
if ((new Date().getTime() - pausedAt.getTime()) / 1000 % 60 >= retryDelay) {
System.out.println("Resuming Consumer..." + sdf.format(new Date()));
kafkaConsumer.resume(topicPartitions);
}
}
ConsumerRecords<String, String> records = kafkaConsumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.println(eventsCount + ":" + record.value());
if (record.value().equals("Event3")) {
System.out.println("consumer is pausing...... for about " + retryDelay + " seconds " + sdf.format(new Date()));
kafkaConsumer.pause(topicPartitions);
pausedAt = new Date();
break;
}else {
kafkaConsumer.commitSync();
}
}
}
} catch (Exception e) {
System.out.println(e.getMessage());
} finally {
kafkaConsumer.close();
}
}
The link KafkaConsumer<K,V> doesn't tell how to stop offset advancing ):
I think some smart internals detected indefinite poll of Event3 and returned Event4 instead
As per my research (google and Kafka forums) I expect the Event3 to replayed as it was not committed, but it's not happening, request someone to point me in the right direction.
Many Thanks
I figured out a workaround to explicitly seek on the topic partition
//In this use case we are consuming from single topic which has only one partition
kafkaConsumer.seek(topicPartitions.iterator().next(), record.offset());

How do I set in Kafka to not consume from where it left?

I have a Kafka consumer in Golang. I don't want to consume from where I left last time, but rather current message. How can I do it?
You can set enable.auto.commit to false and auto.offset.reset to latest for your consumer group id. This means kafka will not be automatically committing your offsets.
With auto commit disabled, your consumer group progress would not be saved (unless you do manually). So whenever the consumer is restarted for whatever reason, it does not find its progress saved and resets to the latest offset.
set a new group.id to your consumer.
Then use auto.offset.reset to define the behavior of this new consumer group, in you case: latest
Apache kafka consumer api provides a method called kafkaConsumer.seekToEnd() which can be used to ignore the existing messages and only consume messages published after the consumer has been started without changing the current group ID of the consumer.
Below is the implementation of the same. The program takes 3 arguments : topic name, group ID and offset range (0 to start from beginning, - 1 to receive messages after consumer has started, other than 0 or - 1 will imply to to consumer to consume from that offset)
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.WakeupException;
import java.util.*;
public class Consumer {
private static Scanner in;
public static void main(String[] argv)throws Exception{
if (argv.length != 3) {
System.err.printf("Usage: %s <topicName> <groupId> <startingOffset>\n",
Consumer.class.getSimpleName());
System.exit(-1);
}
in = new Scanner(System.in);
String topicName = argv[0];
String groupId = argv[1];
final long startingOffset = Long.parseLong(argv[2]);
ConsumerThread consumerThread = new ConsumerThread(topicName,groupId,startingOffset);
consumerThread.start();
String line = "";
while (!line.equals("exit")) {
line = in.next();
}
consumerThread.getKafkaConsumer().wakeup();
System.out.println("Stopping consumer .....");
consumerThread.join();
}
private static class ConsumerThread extends Thread{
private String topicName;
private String groupId;
private long startingOffset;
private KafkaConsumer<String,String> kafkaConsumer;
public ConsumerThread(String topicName, String groupId, long startingOffset){
this.topicName = topicName;
this.groupId = groupId;
this.startingOffset=startingOffset;
}
public void run() {
Properties configProperties = new Properties();
configProperties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
configProperties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
configProperties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
configProperties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
configProperties.put(ConsumerConfig.CLIENT_ID_CONFIG, "offset123");
configProperties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
configProperties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//Figure out where to start processing messages from
kafkaConsumer = new KafkaConsumer<String, String>(configProperties);
kafkaConsumer.subscribe(Arrays.asList(topicName), new ConsumerRebalanceListener() {
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are revoked from this consumer\n", Arrays.toString(partitions.toArray()));
}
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are assigned to this consumer\n", Arrays.toString(partitions.toArray()));
Iterator<TopicPartition> topicPartitionIterator = partitions.iterator();
while(topicPartitionIterator.hasNext()){
TopicPartition topicPartition = topicPartitionIterator.next();
System.out.println("Current offset is " + kafkaConsumer.position(topicPartition) + " committed offset is ->" + kafkaConsumer.committed(topicPartition) );
if(startingOffset == -2) {
System.out.println("Leaving it alone");
}else if(startingOffset ==0){
System.out.println("Setting offset to begining");
kafkaConsumer.seekToBeginning(topicPartition);
}else if(startingOffset == -1){
System.out.println("Setting it to the end ");
kafkaConsumer.seekToEnd(topicPartition);
}else {
System.out.println("Resetting offset to " + startingOffset);
kafkaConsumer.seek(topicPartition, startingOffset);
}
}
}
});
//Start processing messages
try {
while (true) {
ConsumerRecords<String, String> records = kafkaConsumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value());
}
if(startingOffset == -2)
kafkaConsumer.commitSync();
}
}catch(WakeupException ex){
System.out.println("Exception caught " + ex.getMessage());
}finally{
kafkaConsumer.close();
System.out.println("After closing KafkaConsumer");
}
}
public KafkaConsumer<String,String> getKafkaConsumer(){
return this.kafkaConsumer;
}
}
}

How to read all the records in a Kafka topic

I am using kafka : kafka_2.12-2.1.0, spring kafka on client side and have got stuck with an issue.
I need to load an in-memory map by reading all the existing messages within a kafka topic. I did this by starting a new consumer (with a unique consumer group id and setting the offset to earliest). Then I iterate over the consumer (poll method) to get all messages and stop when the consumer records become empty.
But I noticed that, when I start polling, the first few iterations return consumer records as empty and then it starts returning the actual records. Now this breaks my logic as our code thinks there are no records in the topic.
I have tried few other ways (like using offsets number) but haven't been able to come up with any solution, apart from keeping another record somewhere which tells me how many messages there are in the topic which needs to be read before I stop.
Any idea's please ?
To my understanding, what you are trying to achieve is to have a map constructed in your application based on the values that are already in a specific Topic.
For this task, instead of manually polling the topic, you can use Ktable in Kafka Streams DSL which will automatically construct a readable key-value store which is fault tolerant, replication enabled and automatically filled with new values.
You can do this simply by calling groupByKey on a stream and then using the aggregate.
KStreamBuilder builder = new KStreamBuilder();
KStream<String, Long> myKStream = builder.stream(Serdes.String(), Serdes.Long(), "topic_name");
KTable<String, Long> totalCount = myKStream.groupByKey().aggregate(this::initializer, this::aggregator);
(The actual code may vary depending on the kafka version, your configurations, etc..)
Read more about Kafka Stream concepts here
Then I iterate over the consumer (poll method) to get all messages and stop when the consumer records become empty
Kafka is a message streaming platform. Any data you stream is being updated continuously and you probably should not use it in a way that you expect the consuming to stop after a certain number of messages. How will you handle if a new message comes in after you stop the consumer?
Also the reason you are getting null records maybe probably related to records being in different partitions, etc..
What is your specific use case here?, There might be a good way to do it with the Kafka semantics itself.
You have to use 2 consumers one to load the offsets and another one to read all the records.
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
public class KafkaRecordReader {
static final Map<String, Object> props = new HashMap<>();
static {
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
props.put(ConsumerConfig.CLIENT_ID_CONFIG, "sample-client");
}
public static void main(String[] args) {
final Map<TopicPartition, OffsetInfo> partitionOffsetInfos = getOffsets(Arrays.asList("world, sample"));
final List<ConsumerRecord<byte[], byte[]>> records = readRecords(partitionOffsetInfos);
System.out.println(partitionOffsetInfos);
System.out.println("Read : " + records.size() + " records");
}
private static List<ConsumerRecord<byte[], byte[]>> readRecords(final Map<TopicPartition, OffsetInfo> offsetInfos) {
final Properties readerProps = new Properties();
readerProps.putAll(props);
readerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, "record-reader");
final Map<TopicPartition, Boolean> partitionToReadStatusMap = new HashMap<>();
offsetInfos.forEach((tp, offsetInfo) -> {
partitionToReadStatusMap.put(tp, offsetInfo.beginOffset == offsetInfo.endOffset);
});
final List<ConsumerRecord<byte[], byte[]>> cachedRecords = new ArrayList<>();
try (final KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(readerProps)) {
consumer.assign(offsetInfos.keySet());
for (final Map.Entry<TopicPartition, OffsetInfo> entry : offsetInfos.entrySet()) {
consumer.seek(entry.getKey(), entry.getValue().beginOffset);
}
boolean close = false;
while (!close) {
final ConsumerRecords<byte[], byte[]> consumerRecords = consumer.poll(Duration.ofMillis(100));
for (final ConsumerRecord<byte[], byte[]> record : consumerRecords) {
cachedRecords.add(record);
final TopicPartition currentTp = new TopicPartition(record.topic(), record.partition());
if (record.offset() + 1 == offsetInfos.get(currentTp).endOffset) {
partitionToReadStatusMap.put(currentTp, true);
}
}
boolean done = true;
for (final Map.Entry<TopicPartition, Boolean> entry : partitionToReadStatusMap.entrySet()) {
done &= entry.getValue();
}
close = done;
}
}
return cachedRecords;
}
private static Map<TopicPartition, OffsetInfo> getOffsets(final List<String> topics) {
final Properties offsetReaderProps = new Properties();
offsetReaderProps.putAll(props);
offsetReaderProps.put(ConsumerConfig.CLIENT_ID_CONFIG, "offset-reader");
final Map<TopicPartition, OffsetInfo> partitionOffsetInfo = new HashMap<>();
try (final KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(offsetReaderProps)) {
final List<PartitionInfo> partitionInfos = new ArrayList<>();
topics.forEach(topic -> partitionInfos.addAll(consumer.partitionsFor("sample")));
final Set<TopicPartition> topicPartitions = partitionInfos
.stream()
.map(x -> new TopicPartition(x.topic(), x.partition()))
.collect(Collectors.toSet());
consumer.assign(topicPartitions);
final Map<TopicPartition, Long> beginningOffsets = consumer.beginningOffsets(topicPartitions);
final Map<TopicPartition, Long> endOffsets = consumer.endOffsets(topicPartitions);
for (final TopicPartition tp : topicPartitions) {
partitionOffsetInfo.put(tp, new OffsetInfo(beginningOffsets.get(tp), endOffsets.get(tp)));
}
}
return partitionOffsetInfo;
}
private static class OffsetInfo {
private final long beginOffset;
private final long endOffset;
private OffsetInfo(long beginOffset, long endOffset) {
this.beginOffset = beginOffset;
this.endOffset = endOffset;
}
#Override
public String toString() {
return "OffsetInfo{" +
"beginOffset=" + beginOffset +
", endOffset=" + endOffset +
'}';
}
#Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
OffsetInfo that = (OffsetInfo) o;
return beginOffset == that.beginOffset &&
endOffset == that.endOffset;
}
#Override
public int hashCode() {
return Objects.hash(beginOffset, endOffset);
}
}
}
Adding to the above answer from #arshad, the reason you are not getting the records is because you have already read them. See this answer here using earliest or latest does not matter on the consumer after you have a committed offset for the partition
I would use a seek to the beginning or the particular offset if you knew the starting offset.

Kafka consumerRebalanceListener not working

I am using Kafka 2 and trying to commitParition inside rebalance listener and its failing with below Exception.
org.apache.kafka.clients.consumer.CommitFailedException: Commit cannot be completed since the group has already rebalanced and assigned the partitions to another member. This means that the time between subsequent calls to poll() was longer than the configured max.poll.interval.ms, which typically implies that the poll loop is spending too much time message processing. You can address this either by increasing the session timeout or by reducing the maximum size of batches returned in poll() with max.poll.records.
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.sendOffsetCommitRequest(ConsumerCoordinator.java:798)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.commitOffsetsSync(ConsumerCoordinator.java:681)
at org.apache.kafka.clients.consumer.KafkaConsumer.commitSync(KafkaConsumer.java:1416)
at org.apache.kafka.clients.consumer.KafkaConsumer.commitSync(KafkaConsumer.java:1377)
at basics.KafkaConsumerExample$1.commitOffsets(KafkaConsumerExample.java:74)
at basics.KafkaConsumerExample$1.onPartitionsRevoked(KafkaConsumerExample.java:61)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.onJoinPrepare(ConsumerCoordinator.java:465)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.joinGroupIfNeeded(AbstractCoordinator.java:408)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:352)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:337)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.poll(ConsumerCoordinator.java:333)
at org.apache.kafka.clients.consumer.KafkaConsumer.updateAssignmentMetadataIfNeeded(KafkaConsumer.java:1218)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1175)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1154)
at basics.KafkaConsumerExample.run(KafkaConsumerExample.java:97)
at basics.KafkaConsumerExample.main(KafkaConsumerExample.java:305)
Code :-
public void runConsumerWithRebalanceListener() throws Exception {
final KafkaConsumer<byte[], byte[]> consumer = createConsumer();
final TestConsumerRebalanceListener rebalanceListener = new TestConsumerRebalanceListener(consumer);
consumer.subscribe(Collections.singletonList(SIMPLE_CONSUMER_TEST_TOPIC), rebalanceListener);
while (true) {
final ConsumerRecords<byte[], byte[]> records = consumer.poll(Duration.ofMillis(100));
for (final ConsumerRecord<byte[], byte[]> record : records) {
Thread.sleep(1000);
System.out.printf("Received Message topic =%s, partition =%s, offset = %d, key = %s, value = %s\n", record.topic(), record.partition(),
record.offset(), record.key(), record.value());
rebalanceListener.addOffset(record.topic(), record.partition(), record.offset());
}
}
}
RebalanceListener Code :-
private static class TestConsumerRebalanceListener implements ConsumerRebalanceListener {
final List<Future<Boolean>> futures = new ArrayList<>();
private final KafkaConsumer<byte[], byte[]> consumer;
private final Map<TopicPartition, OffsetAndMetadata> currentOffsets = new HashMap<>();
public TestConsumerRebalanceListener(final KafkaConsumer<byte[], byte[]> consumer) {
this.consumer = consumer;
}
#Override
public void onPartitionsRevoked(final Collection<TopicPartition> partitions) {
System.out.println(" Called onPartitionsRevoked with partitions: " + partitions);
if(!futures.isEmpty())
futures.get(0).cancel(true);
consumer.commitSync(currentOffsets);
currentOffsets.clear();
}
public void addOffset(final String topic, final int partition, final long offset) {
currentOffsets.put(new TopicPartition(topic, partition), new OffsetAndMetadata(offset));
}
#Override
public void onPartitionsAssigned(final Collection<TopicPartition> partitions) {
System.out.println("Called onPartitionsAssigned with partitions: " + partitions);
}
}
Setting:-
auto.commit.offset=true
max.poll.records = 100 // Waiting for 1 sec for each msg
max.poll.interval.ms = 60000
So, rebalance will happen as 100 records processing will take more than 60secs of max poll records interval ms. So, rebalancing is happening as expected, but commitSync inside onRevoke fails.
ConsumerRebalanceListenre works for rebalance on new consumer or when consumer dies.

How to know when record is committed in Kafka?

In case of integration testing, I send a record into Kafka, and I would like to know when it will be processed and committed, and then do my assertions (instead of using a Thread.sleep)...
Here is my try :
public void sendRecordAndWaitUntilItsNotConsumed(ProducerRecord<String, String> record)
throws ExecutionException, InterruptedException {
RecordMetadata recordMetadata = producer.send(record).get();
TopicPartition topicPartition = new TopicPartition(recordMetadata.topic(),
recordMetadata.partition());
try (KafkaConsumer<String, String> consumer = new KafkaConsumer<>(consumerConfig)) {
consumer.assign(Collections.singletonList(topicPartition));
long recordOffset = recordMetadata.offset();
long currentOffset = getCurrentOffset(consumer, topicPartition);
while (currentOffset <= recordOffset) {
currentOffset = getCurrentOffset(consumer, topicPartition);
LOGGER.info("Waiting for message to be consumed - Current Offset = " + currentOffset
+ " - Record Offset = " + recordOffset);
}
}
}
private long getCurrentOffset(KafkaConsumer<String, String> consumer,
TopicPartition topicPartition) {
consumer.seekToEnd(Collections.emptyList());
return consumer.position(topicPartition);
}
But it doesn't work. Indeed, I disabled the commit of the consumer, and it doesn't loop on Waiting for message to be consumed...
It works using KafkaConsumer#committed instead of KafkaConsumer#position.
private void sendRecordAndWaitUntilItsNotConsumed(ProducerRecord<String, String> record) throws InterruptedException, ExecutionException {
RecordMetadata recordMetadata = producer.send(record).get();
TopicPartition topicPartition = new TopicPartition(recordMetadata.topic(),
recordMetadata.partition());
consumer.assign(Collections.singletonList(topicPartition));
long recordOffset = recordMetadata.offset();
long currentOffset = getCurrentOffset(topicPartition);
while (currentOffset < recordOffset) {
currentOffset = getCurrentOffset(topicPartition);
LOGGER.info("Waiting for message to be consumed - Current Offset = " + currentOffset
+ " - Record Offset = " + recordOffset);
TimeUnit.MILLISECONDS.sleep(200);
}
}
private long getCurrentOffset(TopicPartition topicPartition) {
OffsetAndMetadata offsetAndMetadata = consumer.committed(topicPartition);
return offsetAndMetadata != null ? offsetAndMetadata.offset() - 1 : -1;
}