How to check multiple kafka topics exists via Kafka client (consumer or producer) [duplicate] - scala

I am trying to create a topic in kafka 0.8.2 by using :
AdminUtils.createTopic(zkClient, myTopic, 2, 1, properties);
If I run the code more than once locally for testing, this fails as the topic was already created. Is there a way to check if the topic exists before creating the topic? The TopicCommand api doesn't seem to return anything for listTopics or describeTopic
.

You can use AdminClient from kakfa-client version 0.11.0.0
Sample code:
Properties config = new Properties();
config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhist:9091");
AdminClient admin = AdminClient.create(config);
ListTopicsResult listTopics = admin.listTopics();
Set<String> names = listTopics.names().get();
boolean contains = names.contains("TEST_6");
if (!contains) {
List<NewTopic> topicList = new ArrayList<NewTopic>();
Map<String, String> configs = new HashMap<String, String>();
int partitions = 5;
Short replication = 1;
NewTopic newTopic = new NewTopic("TEST_6", partitions, replication).configs(configs);
topicList.add(newTopic);
admin.createTopics(topicList);
}

For this purpose, you can use the method AdminUtils.topicExists(ZkUtils zkClient, String topic), it will return true if the topic already exists, false otherwise.
Your code would then be something like this:
if (!AdminUtils.topicExists(zkClient, myTopic)){
AdminUtils.createTopic(zkClient, myTopic, 2, 1, properties);
}

public static void createKafkaTopic(String sourceTopicName, String sinkTopicName, String responseTopicName, String kafkaUrl) {
try {
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaUrl);
AdminClient kafkaAdminClient = KafkaAdminClient.create(properties);
ListTopicsResult topics = kafkaAdminClient.listTopics();
Set <String> names = topics.names().get();
boolean containsSourceTopic = names.contains(sourceTopicName);
boolean containsSinkTopic = names.contains(sinkTopicName);
boolean containsResponseTopic = names.contains(responseTopicName);
if (!containsResponseTopic && !containsSinkTopic && !containsSourceTopic) {
CreateTopicsResult result = kafkaAdminClient.createTopics(
Stream.of(sourceTopicName, sinkTopicName, responseTopicName).map(
name -> new NewTopic(name, 1, (short) 1)
).collect(Collectors.toList())
);
result.all().get();
LOG.info("new sourceTopicName: {}, sinkTopicName: {}, responseTopicName: {} are created",
sourceTopicName, sinkTopicName, responseTopicName);
}
} catch (ExecutionException | InterruptedException e) {
LOG.info("Error message {}", e.getMessage());
}
}

Related

Kafka multiple consumer groups different threads not working as expected

So I am fairly familiar with Kafka and how consumer groups work, in that 2 consumers in different consumer groups that subscribe to the same topic should both get their own copy of the published messages on a Kafka Topic.
consumer pre process
This holds true when using 2 different processes for the 2 consumers. Where I can use this code for the producer
using System;
using System.IO;
using System.Reflection;
using System.Threading.Tasks;
using Confluent.Kafka;
using Confluent.Kafka.Admin;
using NLog;
namespace Producer
{
class Program
{
private static ILogger _logger = LogManager.GetLogger("Global");
private static string topicName = "insane6";
public static async Task Main(string[] args)
{
var config = new ProducerConfig
{
Acks = Acks.Leader,
BootstrapServers = "XXXXXXXX"
};
using (var adminClient = new AdminClientBuilder(new AdminClientConfig { BootstrapServers = config.BootstrapServers }).Build())
{
try
{
adminClient.CreateTopicsAsync(new TopicSpecification[] {
new TopicSpecification { Name = topicName, ReplicationFactor = 1, NumPartitions = 1 } }).ConfigureAwait(false).GetAwaiter().GetResult();
}
catch (CreateTopicsException e)
{
_logger.Error($"An error occured creating topic {e.Results[0].Topic}: {e.Results[0].Error.Reason}");
}
}
// If serializers are not specified, default serializers from
// `Confluent.Kafka.Serializers` will be automatically used where
// available. Note: by default strings are encoded as UTF8.
using (var p = new ProducerBuilder<Null, string>(config)
// Note: All handlers are called on the main .Consume thread.
.SetErrorHandler((_, e) => _logger.Error($"Error: {e.Reason}"))
.SetStatisticsHandler((_, json) => _logger.Debug($"Statistics: {json}"))
.SetLogHandler((consumer, message) => _logger.Debug($"{message.Level} {message.Message}"))
.Build())
{
Console.WriteLine("Type 'Q' to quit");
while (true)
{
try
{
var dr = await p.ProduceAsync(topicName,
new Message<Null, string>
{ Value =DateTime.UtcNow.ToString("O") });
_logger.Debug($"Delivered '{dr.Value}' to '{dr.TopicPartitionOffset}'");
}
catch (ProduceException<Null, string> e)
{
_logger.Error($"Delivery failed: {e.Error.Reason}");
}
var key = Console.ReadKey();
if (key.Key == ConsoleKey.Q)
{
break;
}
}
}
Console.ReadLine();
}
}
}
And I have this consumer code
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Confluent.Kafka;
namespace Consumer
{
class Program
{
private CancellationTokenSource cts = new CancellationTokenSource();
private KafkaSubscriber kafkaSubscriber;
private string topicName = "insane6";
public static void Main(string[] args)
{
var p = new Program(args[0]);
}
public Program(string consumerGroup)
{
kafkaSubscriber = new KafkaSubscriber(new ConsumerSettings()
{
ConsumerConfig = CreateConfig(consumerGroup),
Topic = topicName,
});
kafkaSubscriber.ReceiveError += KafkaSubscriber_ReceiveError;
kafkaSubscriber.CreateConsumer(cts.Token);
Console.ReadLine();
}
private void KafkaSubscriber_ReceiveError(object sender, System.IO.ErrorEventArgs e)
{
cts = new CancellationTokenSource();
kafkaSubscriber.CreateConsumer(cts.Token);
}
public ConsumerConfig CreateConfig(string consumerGroup)
{
var conf = new ConsumerConfig
{
GroupId = consumerGroup,
BootstrapServers = "XXXXXX",
AutoOffsetReset = AutoOffsetReset.Earliest,
EnableAutoCommit = false,
ClientId = Guid.NewGuid().ToString("N")
};
return conf;
}
}
}
Where the actual subscriber code looks like this
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Confluent.Kafka;
using NLog;
namespace Consumer
{
class KafkaSubscriber
{
private readonly ConsumerSettings _consumerSettings;
private static ILogger _logger = LogManager.GetLogger("Global");
public KafkaSubscriber(ConsumerSettings consumerSettings)
{
_consumerSettings = consumerSettings;
}
public event EventHandler<ErrorEventArgs> ReceiveError;
protected virtual void OnReceiveError(ErrorEventArgs e)
{
ReceiveError?.Invoke(this, e);
}
private void ErrorHandler(IConsumer<Ignore, string> consumer, Error error)
{
_logger.Error("Kafka ErrorHandler", error);
if (error.IsFatal || error.Code == ErrorCode.Local_TimedOut)
{
_logger.Error("Throwing fatal error code as exception");
throw new KafkaException(error);
}
}
public void CreateConsumer(CancellationToken ct)
{
using (var c = new ConsumerBuilder<Ignore, string>(_consumerSettings.ConsumerConfig)
.SetErrorHandler(ErrorHandler)
.SetStatisticsHandler((_, json) => _logger.Debug($"Statistics: {json}"))
.SetLogHandler((consumer, message) => _logger.Debug($"{message.Level} {message.Message}"))
.SetPartitionsAssignedHandler((c, partitions) =>
{
_logger.Info($"Assigned partitions: [{string.Join(", ", partitions)}]");
})
.SetPartitionsRevokedHandler((c, partitions) =>
{
_logger.Info($"Revoking assignment: [{string.Join(", ", partitions)}]");
})
.Build())
{
c.Subscribe(_consumerSettings.Topic);
//c.Assign();
try
{
var count = 0;
var offsets = new List<ConsumeResult<Ignore, string>>();
while (true)
{
try
{
var cr = c.Consume(ct);
c.Commit(cr);
_logger.Debug(
$"\r\n{_consumerSettings.ConsumerConfig.GroupId} Consumed message '{cr.Message.Value}' at: '{cr.TopicPartitionOffset}'.\r\n");
}
catch (ConsumeException e)
{
_logger.Error($"Error occured: {e.Error.Reason}");
OnReceiveError(new ErrorEventArgs(e));
}
catch (InvalidProgramException e)
{
_logger.Error($"Error occured: {e.Message}");
OnReceiveError(new ErrorEventArgs(e));
}
catch (KafkaException kex)
{
_logger.Error($"Error occured: {kex.Message}");
OnReceiveError(new ErrorEventArgs(kex));
}
}
}
catch (OperationCanceledException)
{
c.Close();
}
}
}
}
}
So if I run a single producer, and 2 consumer processes (consumers run from command line like Consumer.exe "cg1" and Consumer.exe "cg2"
Everything works as expected, both consumers get the message from the publisher on the topic, as shown in the following screen shot
All good so far, but according to every other StackOverflow or Kafka doc I have seen it should be possible to have a consumer per thread.
consumer pre thread
So If I adjust my bootstrap consumer code to this, which should be identical to running the 2 separate processes, since each consumer is using a new consumer group name, each consumer is in its own thread, there really should be no difference to the 2 separate processes
public static void Main(string[] args)
{
var p = new Program(new [] { "cat","dog"});
}
public Program(string[] consumerGroups)
{
foreach (var consumerGroup in consumerGroups)
{
var thread = new Thread((x) =>
{
kafkaSubscriber = new KafkaSubscriber(new ConsumerSettings()
{
ConsumerConfig = CreateConfig(consumerGroup),
Topic = topicName,
});
kafkaSubscriber.ReceiveError += KafkaSubscriber_ReceiveError;
kafkaSubscriber.CreateConsumer(cts.Token);
});
thread.Start();
}
Console.ReadLine();
}
Yet when running this code, this is what is seen, where only 1 of the consumers actually picks up a message from the topic, this is not the expected behavior at all for me.
I really can't see anything weird, I don't think I have missed anything. All seems correct. Yet only 1 consumer "dog consumer group" in this case sees the produced topic messages, for me the "cat consumer group" should also see the produced messages.
What am I doing wrong?
I am using the Confluent.Kafka official C# driver : https://docs.confluent.io/clients-confluent-kafka-dotnet/current/overview.html
Was being complete spanner, the consumer variable was not private to the thread, all ok now. Phew

Unable to get number of messages in kafka topic

I am fairly new to kafka. I have created a sample producer and consumer in java. Using the producer, I was able to send data to a kafka topic but I am not able to get the number of records in the topic using the following consumer code.
public class ConsumerTests {
public static void main(String[] args) throws Exception {
BasicConfigurator.configure();
String topicName = "MobileData";
String groupId = "TestGroup";
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", groupId);
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<>(properties);
kafkaConsumer.subscribe(Arrays.asList(topicName));
try {
while (true) {
ConsumerRecords<String, String> consumerRecords = consumer.poll(100);
System.out.println("Record count is " + records.count());
}
} catch (WakeupException e) {
// ignore for shutdown
} finally {
consumer.close();
}
}
}
I don't get any exception in the console but consumerRecords.count() always returns 0, even if there are messages in the topic. Please let me know, if I am missing something to get the record details.
The poll(...) call should normally be in a loop. It's always possible for the initial poll(...) to return no data (depending on the timeout) while the partition assignment is in progress. Here's an example:
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
System.out.println("Record count is " + records.count());
}
} catch (WakeupException e) {
// ignore for shutdown
} finally {
consumer.close();
}
For more info see this relevant article:

Why is Apache Storm KafkaSpout emitting so many items from Kafka topic?

I'm having issues with Kafka and Storm. I'm not sure at this point if it's a problem with the KafkaSpout config I am setting up, or if I am not ACKing properly or what.
I en-queued 50 items onto my Kafka Topic, but my spout has emitted over 1300 (and counting) tuples. Also, the Spout reports that almost all have "failed." The topology is actually not failing, it's writing to a database successfully, but I just don't know why it is apparently replaying everything so much (if that's what it's doing)
The big question is:
Why is it emitting so many tuples when I only passed 50 to Kafka?
Here is how I am setting up the topology and the KafkaSpout
public static void main(String[] args) {
try {
String databaseServerIP = "";
String kafkaZookeepers = "";
String kafkaTopicName = "";
int numWorkers = 1;
int numAckers = 1;
int numSpouts = 1;
int numBolts = 1;
int messageTimeOut = 10;
String topologyName = "";
if (args == null || args[0].isEmpty()) {
System.out.println("Args cannot be null or empty. Exiting");
return;
} else {
if (args.length == 8) {
for (String arg : args) {
if (arg == null) {
System.out.println("Parameters cannot be null. Exiting");
return;
}
}
databaseServerIP = args[0];
kafkaZookeepers = args[1];
kafkaTopicName = args[2];
numWorkers = Integer.valueOf(args[3]);
numAckers = Integer.valueOf(args[4]);
numSpouts = Integer.valueOf(args[5]);
numBolts = Integer.valueOf(args[6]);
topologyName = args[7];
} else {
System.out.println("Bad parameters: found " + args.length + ", required = 8");
return;
}
}
Config conf = new Config();
conf.setNumWorkers(numWorkers);
conf.setNumAckers(numAckers);
conf.setMessageTimeoutSecs(messageTimeOut);
conf.put("databaseServerIP", databaseServerIP);
conf.put("kafkaZookeepers", kafkaZookeepers);
conf.put("kafkaTopicName", kafkaTopicName);
/**
* Now would put kafkaSpout instance below instead of TemplateSpout()
*/
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(topologyName + "-flatItems-from-kafka-spout", getKafkaSpout(kafkaZookeepers, kafkaTopicName), numSpouts);
builder.setBolt(topologyName + "-flatItem-Writer-Bolt", new ItemWriterBolt(), numBolts).shuffleGrouping(topologyName + "-flatItems-from-kafka-spout");
StormTopology topology = builder.createTopology();
StormSubmitter.submitTopology(topologyName, conf, topology);
} catch (Exception e) {
System.out.println("There was a problem starting the topology. Check parameters.");
e.printStackTrace();
}
}
private static KafkaSpout getKafkaSpout(String zkHosts, String topic) throws Exception {
//String topic = "FLAT-ITEMS";
String zkNode = "/" + topic + "-subscriber-pipeline";
String zkSpoutId = topic + "subscriberpipeline";
KafkaTopicInZkCreator.createTopic(topic, zkHosts);
SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkHosts), topic, zkNode, zkSpoutId);
spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
// spoutConfig.useStartOffsetTimeIfOffsetOutOfRange = true;
//spoutConfig.startOffsetTime = System.currentTimeMillis();
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
return new KafkaSpout(spoutConfig);
}
and here is the creation of the topic in case that matters
public static void createTopic(String topicName, String zookeeperHosts) throws Exception {
ZkClient zkClient = null;
ZkUtils zkUtils = null;
try {
int sessionTimeOutInMs = 15 * 1000; // 15 secs
int connectionTimeOutInMs = 10 * 1000; // 10 secs
zkClient = new ZkClient(zookeeperHosts, sessionTimeOutInMs, connectionTimeOutInMs, ZKStringSerializer$.MODULE$);
zkUtils = new ZkUtils(zkClient, new ZkConnection(zookeeperHosts), false);
int noOfPartitions = 1;
int noOfReplication = 1;
Properties topicConfiguration = new Properties();
boolean topicExists = AdminUtils.topicExists(zkUtils, topicName);
if (!topicExists) {
AdminUtils.createTopic(zkUtils, topicName, noOfPartitions, noOfReplication, topicConfiguration, RackAwareMode.Disabled$.MODULE$);
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (zkClient != null) {
zkClient.close();
}
}
}
you need to see if messages in the bolt failed.
If they all failed too, you probably didn't ack the message in the bolt, or there is exception in the bolt code.
If bolt messages acked, it's more likely a timeout. Increasing the topology timeout config or the paralisim should fix the problem.

Understanding kafka zookeper auto reset

I still having doubts with kafka ZOOKEPER_AUTO_RESET.I have seen lot of questions asked on this regard. Kindly excuse if the same is a duplicate query .
I am having a high level java consumer which keeps on consuming.
I do have multiple topics and all topics are having a single partition.
My concern is on the below.
I started the consumerkafka.jar with consumer group name as “ncdev1” and ZOOKEPER_AUTO_RESET = smallest . Could observe that init offset is set as -1. Then I stop/started the jar after sometime. At this time, it picks the latest offset assigned to the consumer group (ncdev1) ie 36. I again restarted after sometime, then the initoffset is set to 39. Which is the latest value.
Then I changed the group name to ZOOKEPER_GROUP_ID = ncdev2. And restarted the jar file, this time again the offset is set to -1. In further restarts, it jumped to the latest value ie 39
Then I set the
ZOOKEPER_AUTO_RESET=largest and ZOOKEPER_GROUP_ID = ncdev3
Then tried restarting the jar file with group name ncdev3. There is no difference in the way it picks offset when it restarts. That is it is picking 39 when it restarts, which is same as the previous configuration.
Any idea on why is it not picking offset form the beginning.Any other configuration to be done to make it read from the beginning?(largest and smallest understanding from What determines Kafka consumer offset?)
Thanks in Advance
Code addedd
public class ConsumerForKafka {
private final ConsumerConnector consumer;
private final String topic;
private ExecutorService executor;
ServerSocket soketToWrite;
Socket s_Accept ;
OutputStream s1out ;
DataOutputStream dos;
static boolean logEnabled ;
static File fileName;
private static final Logger logger = Logger.getLogger(ConsumerForKafka.class);
public ConsumerForKafka(String a_zookeeper, String a_groupId, String a_topic,String session_timeout,String auto_reset,String a_commitEnable) {
consumer = kafka.consumer.Consumer.createJavaConsumerConnector(
createConsumerConfig(a_zookeeper, a_groupId,session_timeout,auto_reset,a_commitEnable));
this.topic =a_topic;
}
public void run(int a_numThreads,String a_zookeeper, String a_topic) throws InterruptedException, IOException {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(topic, new Integer(a_numThreads));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
String socketURL = PropertyUtils.getProperty("SOCKET_CONNECT_HOST");
int socketPort = Integer.parseInt(PropertyUtils.getProperty("SOCKET_CONNECT_PORT"));
Socket socks = new Socket(socketURL,socketPort);
//****
String keeper = a_zookeeper;
String topic = a_topic;
long millis = new java.util.Date().getTime();
//****
PrintWriter outWriter = new PrintWriter(socks.getOutputStream(), true);
List<KafkaStream<byte[], byte[]>> streams = null;
// now create an object to consume the messages
//
int threadNumber = 0;
// System.out.println("going to forTopic value is "+topic);
boolean keepRunningThread =false;
boolean chcek = false;
logger.info("logged");
BufferedWriter bw = null;
FileWriter fw = null;
if(logEnabled){
fw = new FileWriter(fileName, true);
bw = new BufferedWriter(fw);
}
for (;;) {
streams = consumerMap.get(topic);
keepRunningThread =true;
for (final KafkaStream stream : streams) {
ConsumerIterator<byte[], byte[]> it = stream.iterator();
while(keepRunningThread)
{
try{
if (it.hasNext()){
if(logEnabled){
String data = new String(it.next().message())+""+"\n";
bw.write(data);
bw.flush();
outWriter.print(data);
outWriter.flush();
consumer.commitOffsets();
logger.info("Explicit commit ......");
}else{
outWriter.print(new String(it.next().message())+""+"\n");
outWriter.flush();
}
}
// logger.info("running");
} catch(ConsumerTimeoutException ex) {
keepRunningThread =false;
break;
}catch(NullPointerException npe ){
keepRunningThread =true;
npe.printStackTrace();
}catch(IllegalStateException ile){
keepRunningThread =true;
ile.printStackTrace();
}
}
}
}
}
private static ConsumerConfig createConsumerConfig(String a_zookeeper, String a_groupId,String session_timeout,String auto_reset,String commitEnable) {
Properties props = new Properties();
props.put("zookeeper.connect", a_zookeeper);
props.put("group.id", a_groupId);
props.put("zookeeper.session.timeout.ms", session_timeout);
props.put("zookeeper.sync.time.ms", "2000");
props.put("auto.offset.reset", auto_reset);
props.put("auto.commit.interval.ms", "60000");
props.put("consumer.timeout.ms", "30");
props.put("auto.commit.enable",commitEnable);
//props.put("rebalance.max.retries", "4");
return new ConsumerConfig(props);
}
public static void main(String[] args) throws InterruptedException {
String zooKeeper = PropertyUtils.getProperty("ZOOKEEPER_URL_PORT");
String groupId = PropertyUtils.getProperty("ZOOKEPER_GROUP_ID");
String session_timeout = PropertyUtils.getProperty("ZOOKEPER_SESSION_TIMOUT_MS"); //6400
String auto_reset = PropertyUtils.getProperty("ZOOKEPER_AUTO_RESET"); //smallest
String enableLogging = PropertyUtils.getProperty("ENABLE_LOG");
String directoryPath = PropertyUtils.getProperty("LOG_DIRECTORY");
String log4jpath = PropertyUtils.getProperty("LOG_DIR");
String commitEnable = PropertyUtils.getProperty("ZOOKEPER_COMMIT"); //false
PropertyConfigurator.configure(log4jpath);
String socketURL = PropertyUtils.getProperty("SOCKET_CONNECT_HOST");
int socketPort = Integer.parseInt(PropertyUtils.getProperty("SOCKET_CONNECT_PORT"));
try {
Socket socks = new Socket(socketURL,socketPort);
boolean connected = socks.isConnected() && !socks.isClosed();
if(connected){
//System.out.println("Able to connect ");
}else{
logger.info("Not able to conenct to socket ..Exiting...");
System.exit(0);
}
} catch (UnknownHostException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch(java.net.ConnectException cne){
logger.info("Not able to conenct to socket ..Exitring...");
System.exit(0);
}
catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// String zooKeeper = args[0];
// String groupId = args[1];
String topic = args[0];
int threads = 1;
logEnabled = Boolean.parseBoolean(enableLogging);
if(logEnabled)
createDirectory(topic,directoryPath);
ConsumerForKafka example = new ConsumerForKafka(zooKeeper, groupId, topic, session_timeout,auto_reset,commitEnable);
try {
example.run(threads,zooKeeper,topic);
} catch(java.net.ConnectException cne){
cne.printStackTrace();
System.exit(0);
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void createDirectory(String topic,String d_Path) {
try{
File file = new File(d_Path);
if (!file.exists()) {
if (file.mkdir()) {
logger.info("Directory Created" +file.getPath());
} else {
logger.info("Directory Creation failed");
}
}
fileName = new File(d_Path + topic + ".log");
if (!fileName.exists()) {
fileName.createNewFile();
}
}catch(IOException IOE){
//logger.info("IOException occured during Directory or During File creation ");
}
}
}
After rereading your post carefully, I think what you ran into should be as expected.
I started the consumerkafka.jar with consumer group name as “ncdev1” and ZOOKEPER_AUTO_RESET = smallest . Could observe that init offset is set as -1. Then I stop/started the jar after sometime. At this time, it picks the latest offset assigned to the consumer group (ncdev1) ie 36.
auto.offset.reset only applies when there is no initial offset or if an offset is out of range. Since you only have 36 messages in the log, it's possible for the consumer group to read all those records very quickly, that's why you see consumer group always picked the latest offsets every time it got restarted.

KAFKA + FLINK 1.1.2 consumer group not working as excepted

When I tried to connect to one topic with 3 with partition and 3 FlinkKafkaConsumer09 consume from one topic and using Kafka consumer group property as below.
props.setProperty("group.id", "myGroup");
props.setProperty("auto.offset.reset", "latest");
but still 3 consumer receives all data. according to consumer group concept , data should send to only one consumer inside consumer group.
But it works good with normal Java consumer. issue with FlinkKafkaConsumer09 ?
This issue can be solved by writing on FlinkConsumer .
Steps : 1. you have to pass partitions as property to flink consumer
issue : according this you have one consumer for one partition
public class YourConsumer<T> extends FlinkKafkaConsumerBase<T>
{
public static final long DEFAULT_POLL_TIMEOUT = 100L;
private final long pollTimeout;
public FlinkKafkaConsumer09(String topic, DeserializationSchema<T> valueDeserializer, Properties props) {
this(Collections.singletonList(topic), valueDeserializer, props);
}
public FlinkKafkaConsumer09(String topic, KeyedDeserializationSchema<T> deserializer, Properties props) {
this(Collections.singletonList(topic), deserializer, props);
}
public FlinkKafkaConsumer09(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
this(topics, new KeyedDeserializationSchemaWrapper<>(deserializer), props);
}
public FlinkKafkaConsumer09(List<String> topics, KeyedDeserializationSchema<T> deserializer, Properties props) {
super(topics, deserializer);
this.properties = checkNotNull(props, "props");
setDeserializer(this.properties);
// configure the polling timeout
try {
if (properties.containsKey(KEY_POLL_TIMEOUT)) {
this.pollTimeout = Long.parseLong(properties.getProperty(KEY_POLL_TIMEOUT));
} else {
this.pollTimeout = DEFAULT_POLL_TIMEOUT;
}
}
catch (Exception e) {
throw new IllegalArgumentException("Cannot parse poll timeout for '" + KEY_POLL_TIMEOUT + '\'', e);
}
}
#Override
protected AbstractFetcher<T, ?> createFetcher(
SourceContext<T> sourceContext,
List<KafkaTopicPartition> thisSubtaskPartitions,
SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
StreamingRuntimeContext runtimeContext) throws Exception {
boolean useMetrics = !Boolean.valueOf(properties.getProperty(KEY_DISABLE_METRICS, "false"));
return new Kafka09Fetcher<>(sourceContext, thisSubtaskPartitions,
watermarksPeriodic, watermarksPunctuated,
runtimeContext, deserializer,
properties, pollTimeout, useMetrics);
}
#Override
protected List<KafkaTopicPartition> getKafkaPartitions(List<String> topics) {
// read the partitions that belong to the listed topics
final List<KafkaTopicPartition> partitions = new ArrayList<>();
int partition=Integer.valueOf(this.properties.get("partitions"));
try (KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(this.properties)) {
for (final String topic: topics) {
// get partitions for each topic
List<PartitionInfo> partitionsForTopic = consumer.partitionsFor(topic);
// for non existing topics, the list might be null.
if (partitionsForTopic != null) {
partitions.addAll(convertToFlinkKafkaTopicPartition(partitionsForTopic),partition);
}
}
}
if (partitions.isEmpty()) {
throw new RuntimeException("Unable to retrieve any partitions for the requested topics " + topics);
}
// we now have a list of partitions which is the same for all parallel consumer instances.
LOG.info("Got {} partitions from these topics: {}", partitions.size(), topics);
if (LOG.isInfoEnabled()) {
logPartitionInfo(LOG, partitions);
}
return partitions;
}
private static List<KafkaTopicPartition> convertToFlinkKafkaTopicPartition(List<PartitionInfo> partitions,int partition) {
checkNotNull(partitions);
List<KafkaTopicPartition> ret = new ArrayList<>(partitions.size());
//for (PartitionInfo pi : partitions) {
ret.add(new KafkaTopicPartition(partitions.get(partition).topic(), partitions.get(partition).partition()));
// }
return ret;
}
private static void setDeserializer(Properties props) {
final String deSerName = ByteArrayDeserializer.class.getCanonicalName();
Object keyDeSer = props.get(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
Object valDeSer = props.get(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
if (keyDeSer != null && !keyDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured key DeSerializer ({})", ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
}
if (valDeSer != null && !valDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured value DeSerializer ({})", ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
}
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deSerName);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deSerName);
}
}