How do I set in Kafka to not consume from where it left? - apache-kafka

I have a Kafka consumer in Golang. I don't want to consume from where I left last time, but rather current message. How can I do it?

You can set enable.auto.commit to false and auto.offset.reset to latest for your consumer group id. This means kafka will not be automatically committing your offsets.
With auto commit disabled, your consumer group progress would not be saved (unless you do manually). So whenever the consumer is restarted for whatever reason, it does not find its progress saved and resets to the latest offset.

set a new group.id to your consumer.
Then use auto.offset.reset to define the behavior of this new consumer group, in you case: latest

Apache kafka consumer api provides a method called kafkaConsumer.seekToEnd() which can be used to ignore the existing messages and only consume messages published after the consumer has been started without changing the current group ID of the consumer.
Below is the implementation of the same. The program takes 3 arguments : topic name, group ID and offset range (0 to start from beginning, - 1 to receive messages after consumer has started, other than 0 or - 1 will imply to to consumer to consume from that offset)
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.WakeupException;
import java.util.*;
public class Consumer {
private static Scanner in;
public static void main(String[] argv)throws Exception{
if (argv.length != 3) {
System.err.printf("Usage: %s <topicName> <groupId> <startingOffset>\n",
Consumer.class.getSimpleName());
System.exit(-1);
}
in = new Scanner(System.in);
String topicName = argv[0];
String groupId = argv[1];
final long startingOffset = Long.parseLong(argv[2]);
ConsumerThread consumerThread = new ConsumerThread(topicName,groupId,startingOffset);
consumerThread.start();
String line = "";
while (!line.equals("exit")) {
line = in.next();
}
consumerThread.getKafkaConsumer().wakeup();
System.out.println("Stopping consumer .....");
consumerThread.join();
}
private static class ConsumerThread extends Thread{
private String topicName;
private String groupId;
private long startingOffset;
private KafkaConsumer<String,String> kafkaConsumer;
public ConsumerThread(String topicName, String groupId, long startingOffset){
this.topicName = topicName;
this.groupId = groupId;
this.startingOffset=startingOffset;
}
public void run() {
Properties configProperties = new Properties();
configProperties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
configProperties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
configProperties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
configProperties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
configProperties.put(ConsumerConfig.CLIENT_ID_CONFIG, "offset123");
configProperties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
configProperties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//Figure out where to start processing messages from
kafkaConsumer = new KafkaConsumer<String, String>(configProperties);
kafkaConsumer.subscribe(Arrays.asList(topicName), new ConsumerRebalanceListener() {
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are revoked from this consumer\n", Arrays.toString(partitions.toArray()));
}
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are assigned to this consumer\n", Arrays.toString(partitions.toArray()));
Iterator<TopicPartition> topicPartitionIterator = partitions.iterator();
while(topicPartitionIterator.hasNext()){
TopicPartition topicPartition = topicPartitionIterator.next();
System.out.println("Current offset is " + kafkaConsumer.position(topicPartition) + " committed offset is ->" + kafkaConsumer.committed(topicPartition) );
if(startingOffset == -2) {
System.out.println("Leaving it alone");
}else if(startingOffset ==0){
System.out.println("Setting offset to begining");
kafkaConsumer.seekToBeginning(topicPartition);
}else if(startingOffset == -1){
System.out.println("Setting it to the end ");
kafkaConsumer.seekToEnd(topicPartition);
}else {
System.out.println("Resetting offset to " + startingOffset);
kafkaConsumer.seek(topicPartition, startingOffset);
}
}
}
});
//Start processing messages
try {
while (true) {
ConsumerRecords<String, String> records = kafkaConsumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value());
}
if(startingOffset == -2)
kafkaConsumer.commitSync();
}
}catch(WakeupException ex){
System.out.println("Exception caught " + ex.getMessage());
}finally{
kafkaConsumer.close();
System.out.println("After closing KafkaConsumer");
}
}
public KafkaConsumer<String,String> getKafkaConsumer(){
return this.kafkaConsumer;
}
}
}

Related

Kafka consumerRebalanceListener not working

I am using Kafka 2 and trying to commitParition inside rebalance listener and its failing with below Exception.
org.apache.kafka.clients.consumer.CommitFailedException: Commit cannot be completed since the group has already rebalanced and assigned the partitions to another member. This means that the time between subsequent calls to poll() was longer than the configured max.poll.interval.ms, which typically implies that the poll loop is spending too much time message processing. You can address this either by increasing the session timeout or by reducing the maximum size of batches returned in poll() with max.poll.records.
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.sendOffsetCommitRequest(ConsumerCoordinator.java:798)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.commitOffsetsSync(ConsumerCoordinator.java:681)
at org.apache.kafka.clients.consumer.KafkaConsumer.commitSync(KafkaConsumer.java:1416)
at org.apache.kafka.clients.consumer.KafkaConsumer.commitSync(KafkaConsumer.java:1377)
at basics.KafkaConsumerExample$1.commitOffsets(KafkaConsumerExample.java:74)
at basics.KafkaConsumerExample$1.onPartitionsRevoked(KafkaConsumerExample.java:61)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.onJoinPrepare(ConsumerCoordinator.java:465)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.joinGroupIfNeeded(AbstractCoordinator.java:408)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:352)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:337)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.poll(ConsumerCoordinator.java:333)
at org.apache.kafka.clients.consumer.KafkaConsumer.updateAssignmentMetadataIfNeeded(KafkaConsumer.java:1218)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1175)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1154)
at basics.KafkaConsumerExample.run(KafkaConsumerExample.java:97)
at basics.KafkaConsumerExample.main(KafkaConsumerExample.java:305)
Code :-
public void runConsumerWithRebalanceListener() throws Exception {
final KafkaConsumer<byte[], byte[]> consumer = createConsumer();
final TestConsumerRebalanceListener rebalanceListener = new TestConsumerRebalanceListener(consumer);
consumer.subscribe(Collections.singletonList(SIMPLE_CONSUMER_TEST_TOPIC), rebalanceListener);
while (true) {
final ConsumerRecords<byte[], byte[]> records = consumer.poll(Duration.ofMillis(100));
for (final ConsumerRecord<byte[], byte[]> record : records) {
Thread.sleep(1000);
System.out.printf("Received Message topic =%s, partition =%s, offset = %d, key = %s, value = %s\n", record.topic(), record.partition(),
record.offset(), record.key(), record.value());
rebalanceListener.addOffset(record.topic(), record.partition(), record.offset());
}
}
}
RebalanceListener Code :-
private static class TestConsumerRebalanceListener implements ConsumerRebalanceListener {
final List<Future<Boolean>> futures = new ArrayList<>();
private final KafkaConsumer<byte[], byte[]> consumer;
private final Map<TopicPartition, OffsetAndMetadata> currentOffsets = new HashMap<>();
public TestConsumerRebalanceListener(final KafkaConsumer<byte[], byte[]> consumer) {
this.consumer = consumer;
}
#Override
public void onPartitionsRevoked(final Collection<TopicPartition> partitions) {
System.out.println(" Called onPartitionsRevoked with partitions: " + partitions);
if(!futures.isEmpty())
futures.get(0).cancel(true);
consumer.commitSync(currentOffsets);
currentOffsets.clear();
}
public void addOffset(final String topic, final int partition, final long offset) {
currentOffsets.put(new TopicPartition(topic, partition), new OffsetAndMetadata(offset));
}
#Override
public void onPartitionsAssigned(final Collection<TopicPartition> partitions) {
System.out.println("Called onPartitionsAssigned with partitions: " + partitions);
}
}
Setting:-
auto.commit.offset=true
max.poll.records = 100 // Waiting for 1 sec for each msg
max.poll.interval.ms = 60000
So, rebalance will happen as 100 records processing will take more than 60secs of max poll records interval ms. So, rebalancing is happening as expected, but commitSync inside onRevoke fails.
ConsumerRebalanceListenre works for rebalance on new consumer or when consumer dies.

kafka fetch records by timestamp, consumer loop

I am using Kafka 0.10.2.1 cluster. I am using the Kafka's offsetForTimes API to seek to a particular offset and would like to breakout of the loop when i have reached the end timestamp.
My code is like this:
//package kafka.ex.test;
import java.util.*;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
public class ConsumerGroup {
public static OffsetAndTimestamp fetchOffsetByTime( KafkaConsumer<Long, String> consumer , TopicPartition partition , long startTime){
Map<TopicPartition, Long> query = new HashMap<>();
query.put(
partition,
startTime);
final Map<TopicPartition, OffsetAndTimestamp> offsetResult = consumer.offsetsForTimes(query);
if( offsetResult == null || offsetResult.isEmpty() ) {
System.out.println(" No Offset to Fetch ");
System.out.println(" Offset Size "+offsetResult.size());
return null;
}
final OffsetAndTimestamp offsetTimestamp = offsetResult.get(partition);
if(offsetTimestamp == null ){
System.out.println("No Offset Found for partition : "+partition.partition());
}
return offsetTimestamp;
}
public static KafkaConsumer<Long, String> assignOffsetToConsumer( KafkaConsumer<Long, String> consumer, String topic , long startTime ){
final List<PartitionInfo> partitionInfoList = consumer.partitionsFor(topic);
System.out.println("Number of Partitions : "+partitionInfoList.size());
final List<TopicPartition> topicPartitions = new ArrayList<>();
for (PartitionInfo pInfo : partitionInfoList) {
TopicPartition partition = new TopicPartition(topic, pInfo.partition());
topicPartitions.add(partition);
}
consumer.assign(topicPartitions);
for(TopicPartition partition : topicPartitions ){
OffsetAndTimestamp offSetTs = fetchOffsetByTime(consumer, partition, startTime);
if( offSetTs == null ){
System.out.println("No Offset Found for partition : " + partition.partition());
consumer.seekToEnd(Arrays.asList(partition));
}else {
System.out.println(" Offset Found for partition : " +offSetTs.offset()+" " +partition.partition());
System.out.println("FETCH offset success"+
" Offset " + offSetTs.offset() +
" offSetTs " + offSetTs);
consumer.seek(partition, offSetTs.offset());
}
}
return consumer;
}
public static void main(String[] args) throws Exception {
String topic = args[0].toString();
String group = args[1].toString();
long start_time_Stamp = Long.parseLong( args[3].toString());
String bootstrapServers = args[2].toString();
long end_time_Stamp = Long.parseLong( args[4].toString());
Properties props = new Properties();
boolean reachedEnd = false;
props.put("bootstrap.servers", bootstrapServers);
props.put("group.id", group);
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<Long, String> consumer = new KafkaConsumer<Long, String>(props);
assignOffsetToConsumer(consumer, topic, start_time_Stamp);
System.out.println("Subscribed to topic " + topic);
int i = 0;
int arr[] = {0,0,0,0,0};
while (true) {
ConsumerRecords<Long, String> records = consumer.poll(6000);
int count= 0;
long lasttimestamp = 0;
long lastOffset = 0;
for (ConsumerRecord<Long, String> record : records) {
count++;
if(arr[record.partition()] == 0){
arr[record.partition()] =1;
}
if (record.timestamp() >= end_time_Stamp) {
reachedEnd = true;
break;
}
System.out.println("record=>"+" offset="
+record.offset()
+ " timestamp="+record.timestamp()
+ " :"+record);
System.out.println("recordcount = "+count+" bitmap"+Arrays.toString(arr));
}
if (reachedEnd) break;
if (records == null || records.isEmpty()) break; // dont wait for records
}
}
}
I face the following problems:
consumer.poll fails even for 1000 millisecond. I had to poll a few times in loop if i use 1000 millisecond. I have an extremely large value now. But having already, seeked to the relevant offsets within a partition, how to reliably set the poll timeout so that data is returned immediately?
My observations is that when data is returned it is not always from all partitions. Even when data is returned from all partitions not all records are returned. The amount of records that are in the topic is more than 1000. But the amount of records that are actually fetched and printed in loop is less(~200). Is there any issue with the current usage of my Kafka APIs?
how to reliably break out of the loop having obtained all the data between start and end timestamp and not prematurely?
Amount of records fetched per poll depends on your consumer config
You are breaking the loop when one of the partitions reaches the endtimestamp , which is not what you want . You should check that all the partitions are seeked to end before exiting poll loop
Poll call is an async call and fetch requests and responses are per node , so you may or may not get all the responses in a poll depending on the broker response time

Kafka Consumer committing manually based on a condition.

#kafkaListener consumer is commiting once a specific condition is met. Let us say a topic gets the following data from a producer
"Message 0" at offset[0]
"Message 1" at offset[1]
They are received at the consumer and commited with help of acknowledgement.acknowledge()
then the below messages come to the topic
"Message 2" at offset[2]
"Message 3" at offset[3]
The consumer which is running receive the above data. Here condition fail and the above offsets are not committed.
Even if new data comes at the topic, then also "Message 2" and "Message 3" should be picked up by any consumer from the same consumer group as they are not committed. But this is not happening,the consumer picks up a new message.
When I restart my consumer then I get back Message2 and Message3. This should have happened while the consumers were running.
The code is as follows -:
KafkaConsumerConfig file
enter code here
#Configuration
#EnableKafka
public class KafkaConsumerConfig {
#Bean
KafkaListenerContainerFactory<ConcurrentMessageListenerContainer<String, String>> kafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
factory.setConcurrency(3);
factory.setBatchListener(true);
factory.getContainerProperties().setAckMode(AbstractMessageListenerContainer.AckMode.MANUAL_IMMEDIATE);
factory.getContainerProperties().setSyncCommits(true);
return factory;
}
#Bean
public ConsumerFactory<String, String> consumerFactory() {
return new DefaultKafkaConsumerFactory<>(consumerConfigs());
}
#Bean
public Map<String, Object> consumerConfigs() {
Map<String, Object> propsMap = new HashMap<>();
propsMap.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
propsMap.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
propsMap.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "100");
propsMap.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "15000");
propsMap.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
propsMap.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
propsMap.put(ConsumerConfig.GROUP_ID_CONFIG, "group1");
propsMap.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
propsMap.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,"1");
return propsMap;
}
#Bean
public Listener listener() {
return new Listener();
}
}
Listner Class
public class Listener {
public CountDownLatch countDownLatch0 = new CountDownLatch(3);
private Logger LOGGER = LoggerFactory.getLogger(Listener.class);
static int count0 =0;
#KafkaListener(topics = "abcdefghi", group = "group1", containerFactory = "kafkaListenerContainerFactory")
public void listenPartition0(String data, #Header(KafkaHeaders.RECEIVED_PARTITION_ID) List<Integer> partitions,
#Header(KafkaHeaders.OFFSET) List<Long> offsets, Acknowledgment acknowledgment) throws InterruptedException {
count0 = count0 + 1;
LOGGER.info("start consumer 0");
LOGGER.info("received message via consumer 0='{}' with partition-offset='{}'", data, partitions + "-" + offsets);
if (count0%2 ==0)
acknowledgment.acknowledge();
LOGGER.info("end of consumer 0");
}
How can i achieve my desired result?
That's correct. The offset is a number which is pretty easy to keep tracking in the memory on consumer instance. We need offsets commited for newly arrived consumers in the group for the same partitions. That's why it works as expected when you restart an application or when rebalance happens for the group.
To make it working as you would like you should consider to implement ConsumerSeekAware in your listener and call ConsumerSeekCallback.seek() for the offset you would like to star consume from the next poll cycle.
http://docs.spring.io/spring-kafka/docs/2.0.0.M2/reference/html/_reference.html#seek:
public class Listener implements ConsumerSeekAware {
private final ThreadLocal<ConsumerSeekCallback> seekCallBack = new ThreadLocal<>();
#Override
public void registerSeekCallback(ConsumerSeekCallback callback) {
this.seekCallBack.set(callback);
}
#KafkaListener()
public void listen(...) {
this.seekCallBack.get().seek(topic, partition, 0);
}
}

Understanding kafka zookeper auto reset

I still having doubts with kafka ZOOKEPER_AUTO_RESET.I have seen lot of questions asked on this regard. Kindly excuse if the same is a duplicate query .
I am having a high level java consumer which keeps on consuming.
I do have multiple topics and all topics are having a single partition.
My concern is on the below.
I started the consumerkafka.jar with consumer group name as “ncdev1” and ZOOKEPER_AUTO_RESET = smallest . Could observe that init offset is set as -1. Then I stop/started the jar after sometime. At this time, it picks the latest offset assigned to the consumer group (ncdev1) ie 36. I again restarted after sometime, then the initoffset is set to 39. Which is the latest value.
Then I changed the group name to ZOOKEPER_GROUP_ID = ncdev2. And restarted the jar file, this time again the offset is set to -1. In further restarts, it jumped to the latest value ie 39
Then I set the
ZOOKEPER_AUTO_RESET=largest and ZOOKEPER_GROUP_ID = ncdev3
Then tried restarting the jar file with group name ncdev3. There is no difference in the way it picks offset when it restarts. That is it is picking 39 when it restarts, which is same as the previous configuration.
Any idea on why is it not picking offset form the beginning.Any other configuration to be done to make it read from the beginning?(largest and smallest understanding from What determines Kafka consumer offset?)
Thanks in Advance
Code addedd
public class ConsumerForKafka {
private final ConsumerConnector consumer;
private final String topic;
private ExecutorService executor;
ServerSocket soketToWrite;
Socket s_Accept ;
OutputStream s1out ;
DataOutputStream dos;
static boolean logEnabled ;
static File fileName;
private static final Logger logger = Logger.getLogger(ConsumerForKafka.class);
public ConsumerForKafka(String a_zookeeper, String a_groupId, String a_topic,String session_timeout,String auto_reset,String a_commitEnable) {
consumer = kafka.consumer.Consumer.createJavaConsumerConnector(
createConsumerConfig(a_zookeeper, a_groupId,session_timeout,auto_reset,a_commitEnable));
this.topic =a_topic;
}
public void run(int a_numThreads,String a_zookeeper, String a_topic) throws InterruptedException, IOException {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(topic, new Integer(a_numThreads));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
String socketURL = PropertyUtils.getProperty("SOCKET_CONNECT_HOST");
int socketPort = Integer.parseInt(PropertyUtils.getProperty("SOCKET_CONNECT_PORT"));
Socket socks = new Socket(socketURL,socketPort);
//****
String keeper = a_zookeeper;
String topic = a_topic;
long millis = new java.util.Date().getTime();
//****
PrintWriter outWriter = new PrintWriter(socks.getOutputStream(), true);
List<KafkaStream<byte[], byte[]>> streams = null;
// now create an object to consume the messages
//
int threadNumber = 0;
// System.out.println("going to forTopic value is "+topic);
boolean keepRunningThread =false;
boolean chcek = false;
logger.info("logged");
BufferedWriter bw = null;
FileWriter fw = null;
if(logEnabled){
fw = new FileWriter(fileName, true);
bw = new BufferedWriter(fw);
}
for (;;) {
streams = consumerMap.get(topic);
keepRunningThread =true;
for (final KafkaStream stream : streams) {
ConsumerIterator<byte[], byte[]> it = stream.iterator();
while(keepRunningThread)
{
try{
if (it.hasNext()){
if(logEnabled){
String data = new String(it.next().message())+""+"\n";
bw.write(data);
bw.flush();
outWriter.print(data);
outWriter.flush();
consumer.commitOffsets();
logger.info("Explicit commit ......");
}else{
outWriter.print(new String(it.next().message())+""+"\n");
outWriter.flush();
}
}
// logger.info("running");
} catch(ConsumerTimeoutException ex) {
keepRunningThread =false;
break;
}catch(NullPointerException npe ){
keepRunningThread =true;
npe.printStackTrace();
}catch(IllegalStateException ile){
keepRunningThread =true;
ile.printStackTrace();
}
}
}
}
}
private static ConsumerConfig createConsumerConfig(String a_zookeeper, String a_groupId,String session_timeout,String auto_reset,String commitEnable) {
Properties props = new Properties();
props.put("zookeeper.connect", a_zookeeper);
props.put("group.id", a_groupId);
props.put("zookeeper.session.timeout.ms", session_timeout);
props.put("zookeeper.sync.time.ms", "2000");
props.put("auto.offset.reset", auto_reset);
props.put("auto.commit.interval.ms", "60000");
props.put("consumer.timeout.ms", "30");
props.put("auto.commit.enable",commitEnable);
//props.put("rebalance.max.retries", "4");
return new ConsumerConfig(props);
}
public static void main(String[] args) throws InterruptedException {
String zooKeeper = PropertyUtils.getProperty("ZOOKEEPER_URL_PORT");
String groupId = PropertyUtils.getProperty("ZOOKEPER_GROUP_ID");
String session_timeout = PropertyUtils.getProperty("ZOOKEPER_SESSION_TIMOUT_MS"); //6400
String auto_reset = PropertyUtils.getProperty("ZOOKEPER_AUTO_RESET"); //smallest
String enableLogging = PropertyUtils.getProperty("ENABLE_LOG");
String directoryPath = PropertyUtils.getProperty("LOG_DIRECTORY");
String log4jpath = PropertyUtils.getProperty("LOG_DIR");
String commitEnable = PropertyUtils.getProperty("ZOOKEPER_COMMIT"); //false
PropertyConfigurator.configure(log4jpath);
String socketURL = PropertyUtils.getProperty("SOCKET_CONNECT_HOST");
int socketPort = Integer.parseInt(PropertyUtils.getProperty("SOCKET_CONNECT_PORT"));
try {
Socket socks = new Socket(socketURL,socketPort);
boolean connected = socks.isConnected() && !socks.isClosed();
if(connected){
//System.out.println("Able to connect ");
}else{
logger.info("Not able to conenct to socket ..Exiting...");
System.exit(0);
}
} catch (UnknownHostException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch(java.net.ConnectException cne){
logger.info("Not able to conenct to socket ..Exitring...");
System.exit(0);
}
catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// String zooKeeper = args[0];
// String groupId = args[1];
String topic = args[0];
int threads = 1;
logEnabled = Boolean.parseBoolean(enableLogging);
if(logEnabled)
createDirectory(topic,directoryPath);
ConsumerForKafka example = new ConsumerForKafka(zooKeeper, groupId, topic, session_timeout,auto_reset,commitEnable);
try {
example.run(threads,zooKeeper,topic);
} catch(java.net.ConnectException cne){
cne.printStackTrace();
System.exit(0);
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void createDirectory(String topic,String d_Path) {
try{
File file = new File(d_Path);
if (!file.exists()) {
if (file.mkdir()) {
logger.info("Directory Created" +file.getPath());
} else {
logger.info("Directory Creation failed");
}
}
fileName = new File(d_Path + topic + ".log");
if (!fileName.exists()) {
fileName.createNewFile();
}
}catch(IOException IOE){
//logger.info("IOException occured during Directory or During File creation ");
}
}
}
After rereading your post carefully, I think what you ran into should be as expected.
I started the consumerkafka.jar with consumer group name as “ncdev1” and ZOOKEPER_AUTO_RESET = smallest . Could observe that init offset is set as -1. Then I stop/started the jar after sometime. At this time, it picks the latest offset assigned to the consumer group (ncdev1) ie 36.
auto.offset.reset only applies when there is no initial offset or if an offset is out of range. Since you only have 36 messages in the log, it's possible for the consumer group to read all those records very quickly, that's why you see consumer group always picked the latest offsets every time it got restarted.

KAFKA + FLINK 1.1.2 consumer group not working as excepted

When I tried to connect to one topic with 3 with partition and 3 FlinkKafkaConsumer09 consume from one topic and using Kafka consumer group property as below.
props.setProperty("group.id", "myGroup");
props.setProperty("auto.offset.reset", "latest");
but still 3 consumer receives all data. according to consumer group concept , data should send to only one consumer inside consumer group.
But it works good with normal Java consumer. issue with FlinkKafkaConsumer09 ?
This issue can be solved by writing on FlinkConsumer .
Steps : 1. you have to pass partitions as property to flink consumer
issue : according this you have one consumer for one partition
public class YourConsumer<T> extends FlinkKafkaConsumerBase<T>
{
public static final long DEFAULT_POLL_TIMEOUT = 100L;
private final long pollTimeout;
public FlinkKafkaConsumer09(String topic, DeserializationSchema<T> valueDeserializer, Properties props) {
this(Collections.singletonList(topic), valueDeserializer, props);
}
public FlinkKafkaConsumer09(String topic, KeyedDeserializationSchema<T> deserializer, Properties props) {
this(Collections.singletonList(topic), deserializer, props);
}
public FlinkKafkaConsumer09(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
this(topics, new KeyedDeserializationSchemaWrapper<>(deserializer), props);
}
public FlinkKafkaConsumer09(List<String> topics, KeyedDeserializationSchema<T> deserializer, Properties props) {
super(topics, deserializer);
this.properties = checkNotNull(props, "props");
setDeserializer(this.properties);
// configure the polling timeout
try {
if (properties.containsKey(KEY_POLL_TIMEOUT)) {
this.pollTimeout = Long.parseLong(properties.getProperty(KEY_POLL_TIMEOUT));
} else {
this.pollTimeout = DEFAULT_POLL_TIMEOUT;
}
}
catch (Exception e) {
throw new IllegalArgumentException("Cannot parse poll timeout for '" + KEY_POLL_TIMEOUT + '\'', e);
}
}
#Override
protected AbstractFetcher<T, ?> createFetcher(
SourceContext<T> sourceContext,
List<KafkaTopicPartition> thisSubtaskPartitions,
SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
StreamingRuntimeContext runtimeContext) throws Exception {
boolean useMetrics = !Boolean.valueOf(properties.getProperty(KEY_DISABLE_METRICS, "false"));
return new Kafka09Fetcher<>(sourceContext, thisSubtaskPartitions,
watermarksPeriodic, watermarksPunctuated,
runtimeContext, deserializer,
properties, pollTimeout, useMetrics);
}
#Override
protected List<KafkaTopicPartition> getKafkaPartitions(List<String> topics) {
// read the partitions that belong to the listed topics
final List<KafkaTopicPartition> partitions = new ArrayList<>();
int partition=Integer.valueOf(this.properties.get("partitions"));
try (KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(this.properties)) {
for (final String topic: topics) {
// get partitions for each topic
List<PartitionInfo> partitionsForTopic = consumer.partitionsFor(topic);
// for non existing topics, the list might be null.
if (partitionsForTopic != null) {
partitions.addAll(convertToFlinkKafkaTopicPartition(partitionsForTopic),partition);
}
}
}
if (partitions.isEmpty()) {
throw new RuntimeException("Unable to retrieve any partitions for the requested topics " + topics);
}
// we now have a list of partitions which is the same for all parallel consumer instances.
LOG.info("Got {} partitions from these topics: {}", partitions.size(), topics);
if (LOG.isInfoEnabled()) {
logPartitionInfo(LOG, partitions);
}
return partitions;
}
private static List<KafkaTopicPartition> convertToFlinkKafkaTopicPartition(List<PartitionInfo> partitions,int partition) {
checkNotNull(partitions);
List<KafkaTopicPartition> ret = new ArrayList<>(partitions.size());
//for (PartitionInfo pi : partitions) {
ret.add(new KafkaTopicPartition(partitions.get(partition).topic(), partitions.get(partition).partition()));
// }
return ret;
}
private static void setDeserializer(Properties props) {
final String deSerName = ByteArrayDeserializer.class.getCanonicalName();
Object keyDeSer = props.get(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
Object valDeSer = props.get(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
if (keyDeSer != null && !keyDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured key DeSerializer ({})", ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
}
if (valDeSer != null && !valDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured value DeSerializer ({})", ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
}
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deSerName);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deSerName);
}
}