Kafka Streams: State Store partition error - apache-kafka

Defined a custom store, for usage in custom Transformer (reference below).
https://github.com/apache/kafka/blob/trunk/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java
public class KafkaStream {
public static void main(String[] args) {
StateStoreSupplier houseStore = Stores.create("HOUSE").withKeys(Serdes.String()).withValues(houseSerde).persistent().build();
KStreamBuilder kstreamBuilder = new KStreamBuilder();
kstreamBuilder.addStateStore(houseStore);
.
.
.
KStream<String, String> testStream = kstreamBuilder.stream(Serdes.String(), Serdes.String(), "test");
testStream.transform(HourlyDetail::new, houseStore.name());
.
.
.
}
}
class HouseDetail implements Transformer<String, String, KeyValue<String, House>> {
#SuppressWarnings("unchecked")
#Override
public void init(ProcessorContext context) {
this.usageStore = (KeyValueStore<String, House>) context.getStateStore("HOUSE");
}
.
.
.
}
I get the following exception. Not sure, why the internal topic "test_01-HOUSE-changelog" being created with a single partition and single replication, as opposed to the 2 partitions in the source partition "test". What am missing here?
[2018-05-14 23:38:09,391] ERROR stream-thread [StreamThread-1] Failed to create an active task 0_1: (org.apache.kafka.streams.processor.internals.StreamThread:666)
org.apache.kafka.streams.errors.StreamsException: task [0_1] Store HOUSE's change log (test_01-HOUSE-changelog) does not contain partition 1
at org.apache.kafka.streams.processor.internals.ProcessorStateManager.register(ProcessorStateManager.java:185)
at org.apache.kafka.streams.processor.internals.ProcessorContextImpl.register(ProcessorContextImpl.java:123)
at org.apache.kafka.streams.state.internals.RocksDBStore.init(RocksDBStore.java:169)
at org.apache.kafka.streams.state.internals.MeteredKeyValueStore.init(MeteredKeyValueStore.java:85)
at org.apache.kafka.streams.processor.internals.AbstractTask.initializeStateStores(AbstractTask.java:81)
at org.apache.kafka.streams.processor.internals.StreamTask.<init>(StreamTask.java:119)
at org.apache.kafka.streams.processor.internals.StreamThread.createStreamTask(StreamThread.java:633)
at org.apache.kafka.streams.processor.internals.StreamThread.addStreamTasks(StreamThread.java:660)
at org.apache.kafka.streams.processor.internals.StreamThread.access$100(StreamThread.java:69)
at org.apache.kafka.streams.processor.internals.StreamThread$1.onPartitionsAssigned(StreamThread.java:124)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.onJoinComplete(ConsumerCoordinator.java:228)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.joinGroupIfNeeded(AbstractCoordinator.java:313)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:277)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.poll(ConsumerCoordinator.java:259)
at org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:1013)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:979)
at org.apache.kafka.streams.processor.internals.StreamThread.runLoop(StreamThread.java:407)
at org.apache.kafka.streams.processor.internals.StreamThread.run(StreamThread.java:242)
$ ./kafka-topics.sh --zookeeper localhost:2181 --topic test --describe
Topic:test PartitionCount:2 ReplicationFactor:3 Configs:
Topic: test Partition: 0 Leader: 1001 Replicas: 1001,1002,1003 Isr: 1002,1001,1003
Topic: test Partition: 1 Leader: 1002 Replicas: 1002,1003,1001 Isr: 1002,1001,1003
$ ./kafka-topics.sh --zookeeper localhost:2181 --topic test_01-HOUSE-changelog --describe
Topic:test_01-HOUSE-changelog PartitionCount:1 ReplicationFactor:1 Configs:
Topic: test_01-HOUSE-changelog Partition: 0 Leader: 1001 Replicas: 1001 Isr: 1001
Exception after disabling the auto-topic creation
[2018-05-17 14:25:41,114] ERROR stream-thread [StreamThread-1] Failed to create an active task 0_0: (org.apache.kafka.streams.processor.internals.StreamThread:666)
org.apache.kafka.streams.errors.StreamsException: task [0_0] Could not find partition info for topic: test_01-HOUSE-changelog
at org.apache.kafka.streams.processor.internals.ProcessorStateManager.register(ProcessorStateManager.java:174)
at org.apache.kafka.streams.processor.internals.ProcessorContextImpl.register(ProcessorContextImpl.java:123)
at org.apache.kafka.streams.state.internals.RocksDBStore.init(RocksDBStore.java:169)
at org.apache.kafka.streams.state.internals.MeteredKeyValueStore.init(MeteredKeyValueStore.java:85)
at org.apache.kafka.streams.processor.internals.AbstractTask.initializeStateStores(AbstractTask.java:81)
at org.apache.kafka.streams.processor.internals.StreamTask.<init>(StreamTask.java:119)
at org.apache.kafka.streams.processor.internals.StreamThread.createStreamTask(StreamThread.java:633)
at org.apache.kafka.streams.processor.internals.StreamThread.addStreamTasks(StreamThread.java:660)
at org.apache.kafka.streams.processor.internals.StreamThread.access$100(StreamThread.java:69)
at org.apache.kafka.streams.processor.internals.StreamThread$1.onPartitionsAssigned(StreamThread.java:124)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.onJoinComplete(ConsumerCoordinator.java:228)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.joinGroupIfNeeded(AbstractCoordinator.java:313)
at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:277)
at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.poll(ConsumerCoordinator.java:259)
at org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:1013)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:979)
at org.apache.kafka.streams.processor.internals.StreamThread.runLoop(StreamThread.java:407)
at org.apache.kafka.streams.processor.internals.StreamThread.run(StreamThread.java:242)

If the topic exist with one partition, Kafka Streams will not automatically change the number of partitions. It's unclear why the topic was created with one partitions from the information you provide. One possibility would be, that your input topic had one partitions when you started your app the first time and you later added a second partitions to the input topic.
You need to clean up the application using the application reset tool as described in the docs (note, it's a two step process): https://docs.confluent.io/current/streams/developer-guide/app-reset-tool.html

Related

FlinkKafkaConsumer010 doesn't work when set with setStartFromTimestamp

I'm using flink streaming and flink-connector-kafka to process data from kafka. when I configure FlinkKafkaConsumer010 with setStartFromTimestamp(1586852770000L) , at this time, all data's time in kafka topic A is before 1586852770000L, then I send some message to partition-0 and partition-4 of Topic A (Topic A has 6 partitions, current system time is already after 1586852770000L). but my flink program doesn't consume any data from Topic A. So is this a issue?
if I stop my flink program and restart it, it can consume data from partition-0 and partition-4 of Topic A , but still won't consume any data from other 4 partitions if i send data to the other 4 partitions unless i restart my flink program again.
the log of kafka is as follows:
2020-04-15 11:48:46,447 TRACE org.apache.kafka.clients.consumer.internals.Fetcher - Sending ListOffsetRequest (type=ListOffsetRequest, replicaId=-1, partitionTimestamps={TopicA-4=1586836800000}, minVersion=1) to broker server1:9092 (id: 185 rack: null)
2020-04-15 11:48:46,463 TRACE org.apache.kafka.clients.NetworkClient - Sending {replica_id=-1,topics=[{topic=TopicA,partitions=[{partition=0,timestamp=1586836800000}]}]} to node 184.
2020-04-15 11:48:46,466 TRACE org.apache.kafka.clients.NetworkClient - Completed receive from node 185, for key 2, received {responses=[{topic=TopicA,partition_responses=[{partition=4,error_code=0,timestamp=1586852770000,offset=4}]}]}
2020-04-15 11:48:46,467 TRACE org.apache.kafka.clients.consumer.internals.Fetcher - Received ListOffsetResponse {responses=[{topic=TopicA,partition_responses=[{partition=4,error_code=0,timestamp=1586852770000,offset=4}]}]} from broker server1:9092 (id: 185 rack: null)
2020-04-15 11:48:46,467 DEBUG org.apache.kafka.clients.consumer.internals.Fetcher - Handling ListOffsetResponse response for TopicA-4. Fetched offset 4, timestamp 1586852770000
2020-04-15 11:48:46,448 TRACE org.apache.kafka.clients.consumer.internals.Fetcher - Sending ListOffsetRequest (type=ListOffsetRequest, replicaId=-1, partitionTimestamps={TopicA-0=1586836800000}, minVersion=1) to broker server2:9092 (id: 184 rack: null)
2020-04-15 11:48:46,463 TRACE org.apache.kafka.clients.NetworkClient - Sending {replica_id=-1,topics=[{topic=TopicA,partitions=[{partition=0,timestamp=1586836800000}]}]} to node 184.
2020-04-15 11:48:46,467 TRACE org.apache.kafka.clients.NetworkClient - Completed receive from node 184, for key 2, received {responses=[{topic=TopicA,partition_responses=[{partition=0,error_code=0,timestamp=1586863210000,offset=47}]}]}
2020-04-15 11:48:46,467 TRACE org.apache.kafka.clients.consumer.internals.Fetcher - Received ListOffsetResponse {responses=[{topic=TopicA,partition_responses=[{partition=0,error_code=0,timestamp=1586863210000,offset=47}]}]} from broker server2:9092 (id: 184 rack: null)
2020-04-15 11:48:46,467 DEBUG org.apache.kafka.clients.consumer.internals.Fetcher - Handling ListOffsetResponse response for TopicA-0. Fetched offset 47, timestamp 1586863210000
2020-04-15 11:48:46,448 TRACE org.apache.kafka.clients.consumer.internals.Fetcher - Sending ListOffsetRequest (type=ListOffsetRequest, replicaId=-1, partitionTimestamps={TopicA-2=1586836800000}, minVersion=1) to broker server3:9092 (id: 183 rack: null)
2020-04-15 11:48:46,465 TRACE org.apache.kafka.clients.NetworkClient - Sending {replica_id=-1,topics=[{topic=TopicA,partitions=[{partition=2,timestamp=1586836800000}]}]} to node 183.
2020-04-15 11:48:46,468 TRACE org.apache.kafka.clients.NetworkClient - Completed receive from node 183, for key 2, received {responses=[{topic=TopicA,partition_responses=[{partition=2,error_code=0,timestamp=-1,offset=-1}]}]}
2020-04-15 11:48:46,468 TRACE org.apache.kafka.clients.consumer.internals.Fetcher - Received ListOffsetResponse {responses=[{topic=TopicA,partition_responses=[{partition=2,error_code=
0,timestamp=-1,offset=-1}]}]} from broker server3:9092 (id: 183 rack: null)
2020-04-15 11:48:46,468 DEBUG org.apache.kafka.clients.consumer.internals.Fetcher - Handling ListOffsetResponse response for TopicA-2. Fetched offset -1, timestamp -1
2020-04-15 11:48:46,481 INFO org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase - Consumer subtask 0 will start reading the following 2 partitions from timestamp 1586836800000: [KafkaTopicPartition{topic='TopicA', partition=4}, KafkaTopicPartition{topic='TopicA', partition=0}]
from the log, except partition-0 and partition-4, other 4 partition's offset is -1. why the return offset is -1 instead of the lastest offset?
in Kafka client's code( Fetcher.java,line: 674-680)
// Handle v1 and later response
log.debug("Handling ListOffsetResponse response for {}. Fetched offset {}, timestamp {}",topicPartition, partitionData.offset, partitionData.timestamp);
if (partitionData.offset != ListOffsetResponse.UNKNOWN_OFFSET) {
OffsetData offsetData = new OffsetData(partitionData.offset, partitionData.timestamp);
timestampOffsetMap.put(topicPartition, offsetData);
}
the value of ListOffsetResponse.UNKNOWN_OFFSET is -1 . So the other 4 partitions is filtered , and the kafka consumer will not consume data from the other 4 partitions.
My Flink version is 1.9.2 and flink kafka connertor is
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.10_2.11</artifactId>
<version>1.9.2</version>
</dependency>
the doc of flink kafka connector is as follows:
setStartFromTimestamp(long): Start from the specified timestamp. For
each partition, the record whose timestamp is larger than or equal to
the specified timestamp will be used as the start position. If a
partition’s latest record is earlier than the timestamp, the partition
will simply be read from the latest record.
test program code:
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.junit.Test
class TestFlinkKafka {
#Test
def testFlinkKafkaDemo: Unit ={
//1. set up the streaming execution environment.
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic( TimeCharacteristic.ProcessingTime)
// To use fault tolerant Kafka Consumers, checkpointing needs to be enabled at the execution environment
env.enableCheckpointing(60000)
//2. kafka source
val topic = "message"
val schema = new SimpleStringSchema()
//server1:9092,server2:9092,server3:9092
val props = getKafkaConsumerProperties("localhost:9092","flink-streaming-client", "latest")
val consumer = new FlinkKafkaConsumer010(topic, schema, props)
//consume data from special timestamp's offset
//2020/4/14 20:0:0
//consumer.setStartFromTimestamp(1586865600000L)
//2020/4/15 20:0:0
consumer.setStartFromTimestamp(1586952000000L)
consumer.setCommitOffsetsOnCheckpoints(true)
//3. transform
val stream = env.addSource(consumer)
.map(x => x)
//4. sink
stream.print()
//5. execute
env.execute("testFlinkKafkaConsumer")
}
def getKafkaConsumerProperties(brokerList:String, groupId:String, offsetReset:String): Properties ={
val props = new Properties()
props.setProperty("bootstrap.servers", brokerList)
props.setProperty("group.id", groupId)
props.setProperty("auto.offset.reset", offsetReset)
props.setProperty("flink.partition-discovery.interval-millis", "30000")
props
}
}
set log level for kafka:
log4j.logger.org.apache.kafka=TRACE
create kafka topic:
kafka-topics --zookeeper localhost:2181/kafka --create --topic message --partitions 6 --replication-factor 1
send message to kafka topic
kafka-console-producer --broker-list localhost:9092 --topic message
{"name":"tom"}
{"name":"michael"}
This problem was resolved by upgrading the Flink/Kafka connector to the newer, universal connector -- FlinkKafkaConsumer -- available from flink-connector-kafka_2.11. This version of the connector is recommended for all versions of Kafka from 1.0.0 forward. With Kafka 0.10.x or 0.11.x, it is better to use the version-specific flink-connector-kafka-0.10_2.11 or flink-connector-kafka-0.11_2.11 connectors. (And in all cases, substitute 2.12 for 2.11 if you are using Scala 2.12.)
See the Flink documentation for more information on Flink's Kafka connector.

Kafka: Increased partitions could not be assigned in next rebalance

I meet a strange thing about Kafka rebalance. If I increase partitions on a topic, which subscribed by some java consumers(in same one group), there is no consumer rebalance occur. After that, I try to cause a rebalance by starting a new consumer (or kill one), and the new increased partitions could not be assigned in this rebalance. I found that the new partitions could only be assigned after I stop all consumers and start them. I don't know if it's normal or if there is any explanation for it.
Below is my test on my computer:
1.Start Kafka, ZK. Create a normal topic(test-topic) with 1 partitions
./bin/kafka-topics.sh --zookeeper 127.0.0.1:2181 --create --topic test-topic --partitions 1 --replication-factor 1 --config retention.ms=604800000
2.Start 2 java consumers (C1, C2), subscribe test-topic
3.Increase 2 partitions of test-topic
$ ./bin/kafka-topics.sh --zookeeper 127.0.0.1:2181 --alter --topic test-topic --partitions 3
There is no rebalance occur in C1, C2
4.Start a new consumer C3 to subscribed test-topic. Rebalance occur, but only partition test-topic-0 involved in reassign, neither test-topic-1 or test-topic-2 involved.
5.I try to cause rebalance by stopping C2 and C3. However test-topic-1 and test-topic-2 are still not be assigned.
6.Stop all running consumers, and then start them. All test-topic-0,1,2 are assigned normally.
kafka & java api version: kafka_2.12-2.0.0 (I also tried kafka_2.11-1.0.0 and kafka_2.10-0.10.2.1, same result)
zookeeper: 3.4.13
consumer code:
public class KafkaConsumerThread extends Thread {
    // consumer settings
    public static org.apache.kafka.clients.consumer.KafkaConsumer<String, String> createNativeConsumer(String groupName, String kafkaBootstrap) {
        Properties props = new Properties();
        props.put("bootstrap.servers", kafkaBootstrap);
        props.put("group.id", groupName);
        props.put("auto.offset.reset", "earliest");
        props.put("enable.auto.commit", true);
       props.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        return new KafkaConsumer<String, String>(props);
    }
    #Override
    public void run() {
        log.info("Start consumer ..");
        consumer.subscribe(Collections.singleton(topicName), consumerRebalanceListener);
        while (!stop) {
            try {
                ConsumerRecords<String, String> records = consumer.poll(100);
                receivedRecordNumber.addAndGet(records.count());
                Iterator<ConsumerRecord<String, String>> iterator = records.iterator();
                while (iterator.hasNext()) {
                    ConsumerRecord<String, String> record = iterator.next();
                    log.info("Receive [key:{}][value:{}]", record.key(), record.value());
                }
            } catch (TimeoutException e) {
                log.info("no data");
            }
        }
        consumer.close();
    }
}
Thanks for #Aftab Virtual's comment. I test again and wait for a longer time. About 5 minutes after the first consumer started, an rebalance did automatically raise and all partitions test-topic-0,1,2 reassigned. Therefore, Kafka do have an auto rebalance after alter partitions.
Furthermore, I followed #Aftab Virtual's advise to change leader.imbalance.check.interval.seconds to 30. However the rebalance involved all partitions occur about 3 minutes after partition increased. I do add settings for broker:
auto.leader.rebalance.enable = true
leader.imbalance.check.interval.seconds = 30
I don't know what is the mechanism for this rebalance. And there is no more logs for this rebalance:
[2018-10-18 11:32:47,958] INFO [GroupCoordinator 0]: Preparing to rebalance group test-group with old generation 4 (__consumer_offsets-12) (kafka.coordinator.group.GroupCoordinator)
[2018-10-18 11:32:50,963] INFO [GroupCoordinator 0]: Stabilized group test-group generation 5 (__consumer_offsets-12) (kafka.coordinator.group.GroupCoordinator)
[2018-10-18 11:32:50,964] INFO [GroupCoordinator 0]: Assignment received from leader for group test-group for generation 5 (kafka.coordinator.group.GroupCoordinator)
After seeking advice from Kafka team and some Kafka users, I got the explanation for my test result. It's not a bug.
The increasing of the partitions will marked the metadata.updateNeeded=true. However this will not really trigger a update till the next metadata expire time (the default metadata.max.age.ms is 5*60*1000 ms). Before the leader of the group update its metadata, the rebalance caused by changing consumer number will not involved the new partitions.
I decreased metadata.max.age.ms to 30 seconds and Kafka became more sensitive to partitions increasing.

Spring kafka and Kafka Cluster

I've configured 3 kafka's in cluster and I'm trying to use with spring-kafka.
But After I kill the kafka leader I'm not able to send other messages to queue.
I'm setting the spring.kafka.bootstrap-servers property as: "kafka-1:9092;kafka-2:9093,kafka-3:9094" and all of names in my hosts file.
Kafka version 0.10
Some knows how the correct configuration?
Edit
I have tested one thing and happened a strange behavior.
When I start the service I send a message to topic( to force the creation)
Code:
#Bean
public KafkaSyncListener synchronousListener(MessageSender sender, KafkaProperties prop) {
sender.send(prop.getSynchronousTopic(), "Message to force create the topic! Run, Forrest, Run!");
return new KafkaSyncListener();
}
So, In this time I did not start the kafka-1 server( just the others) and It happened the exception:
org.springframework.kafka.core.KafkaProducerException: Failed to send;
nested exception is org.apache.kafka.common.errors.TimeoutException:
Failed to update metadata after 60000 ms.
It seems spring-kafka just tries to connect on the first bootstrap server.
I'm using spring-kafka 1.3.5.RELEASE and kafka 0.10.1.1
Edit 2
I have done the test that you did. It happens the same when I remove the first docker container (kafka-1) the leader have changed. So, My consumer(spring service) does not able to consume the messages.
But when I start the kafka-1 again the service get all messages
My consumer ConcurrentKafkaListenerContainerFactory:
{
key.deserializer=class
org.apache.kafka.common.serialization.IntegerDeserializer,
value.deserializer=class
org.apache.kafka.common.serialization.StringDeserializer,
max.poll.records=500,
group.id=mongo-adapter-service,
ssl.keystore.location=/certs/kafka.keystore.jks,
bootstrap.servers=[kafka-2:9093, kafka-1:9092, kafka-3:9094],
auto.commit.interval.ms=100,
security.protocol=SSL,
max.request.size=5242880,
ssl.truststore.location=/certs/kafka.keystore.jks,
auto.offset.reset=earliest
}
You need a comma between server addresses, not a semicolon.
EDIT
I just ran a test with no problems:
spring.kafka.bootstrap-servers=localhost:9092,localhost:9093,localhost:9094
and
#SpringBootApplication
public class So50804678Application {
public static void main(String[] args) {
SpringApplication.run(So50804678Application.class, args);
}
#KafkaListener(id = "foo", topics = "so50804678")
public void in(String in) {
System.out.println(in);
}
#Bean
public NewTopic topic() {
return new NewTopic("so50804678", 1, (short) 3);
}
}
and
$ kafka-topics --zookeeper localhost:2181 --describe --topic so50804678
Topic:so50804678 PartitionCount:1 ReplicationFactor:3 Configs:
Topic: so50804678 Partition: 0 Leader: 0 Replicas: 0,1,2 Isr: 0,1,2
Killed the leader, and
$ kafka-topics --zookeeper localhost:2181 --describe --topic so50804678
Topic:so50804678 PartitionCount:1 ReplicationFactor:3 Configs:
Topic: so50804678 Partition: 0 Leader: 1 Replicas: 0,1,2 Isr: 1,2
and
$ kafka-console-producer --broker-list localhost:9092,localhost:9093,localhost:9093 --topic so50804678
Sent a message and it was received by the app; no errors in the log except a WARN:
[Consumer clientId=consumer-1, groupId=foo] Connection to node 0 could not be established. Broker may not be available.
I then restarted the dead server; stopped my app; then added this code...
#Bean
public ApplicationRunner runner(KafkaTemplate<String, String> template) {
return args -> {
while(true) {
System.out.println(template.send("so50804678", "foo").get().getRecordMetadata());
Thread.sleep(3_000);
}
};
}
Again, killing the current leader had no impact; everything recovered ok.
You may need to tweak the listeners/advertised.listeners properties in your server props. Since my brokers are all on local host, I left them to default.

Code to find the lag in the consumer offset using kafka library?

I want to get the progress of the kafka consumer i.e. Lag. I know the following commands give me the lag and other valuable description.
bin/kafka-run-class.sh kafka.admin.ConsumerGroupCommand --zookeeper localhost:2182 --describe --group DemoConsumer
bin/kafka-consumer-groups.sh --zookeeper localhost:2182 --describe --group DemoConsumer
I can also get the current consumer offset using following code snippet with the help of kafka-client
ConsumerRecords<Integer, String> records = consumer.poll(100);
for (ConsumerRecord<Integer, String> record : records) {
System.out.println("Received message: (" + record.topic()+ ",
" + record.partition()+ ", " + record.key() + ", " +
record.value() + ") at offset " + record.offset());
}
But I can't find a code to get the details as the above two commands. Is there any code to find the lag and other details using kafka library?
According to this topic, you can get the consumer lag. However, the maven dependency is wrong in that topic, it should be
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.12</artifactId>
<version>0.10.2.0</version>
</dependency>
And the code is:
AdminClient client = AdminClient.createSimplePlaintext("localhost:9092");
Map<TopicPartition, Object> offsets = JavaConversions.asJavaMap(
client.listGroupOffsets("groupID"));
Long offset = (Long) offsets.get(new TopicPartition("topic", 0));

Apache Kafka 0.9.0.0 Show all Topics with Partitions

I'm currently evaluating Apache Kafka and I have a simple consumer that is supposed to read messages from a specific topic partition. Here is my client:
public static void main(String args[]) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("group.id", "test");
props.put("enable.auto.commit", "false");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
TopicPartition partition0 = new TopicPartition("test_topic", Integer.parseInt(args[0]));
ArrayList topicAssignment = new ArrayList();
topicAssignment.add(partition0);
consumer.assign(topicAssignment);
//consumer.subscribe(Arrays.asList("test_topic"));
int commitInterval = 200;
List<ConsumerRecord<String, String>> buffer = new ArrayList<ConsumerRecord<String, String>>();
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
buffer.add(record);
if (buffer.size() >= commitInterval) {
process(buffer);
consumer.commitSync();
buffer.clear();
}
}
}
}
static void process(List<ConsumerRecord<String, String>> buffers) {
for (ConsumerRecord<String, String> buffer : buffers) {
System.out.println(buffer);
}
}
Here is the command that I use to start Apache Kafka:
bin/kafka-server-start.sh config/server.properties & bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 2 --topic test_topic
As you can see here, I'm creating the topic with 2 partitions (p0 and p1)!
I'm then starting two instances of my consumer with the following commands:
For Consumer 1:
java -cp target/scala-2.11/kafka-consumer-0.1.0-SNAPAHOT.jar com.test.api.consumer.KafkaConsumer09Java 0
For Consumer 2:
java -cp target/scala-2.11/kafka-consumer-0.1.0-SNAPAHOT.jar com.test.api.consumer.KafkaConsumer09Java 1
Where 0 and 1 represent the actual partition from which I want my consumer's to read the messages from.
But what happens is that only my Consumer 1 is getting all the messages. I was under the impression that the messages from the producer end up equally on the partitions.
I used the following command to see how many partitions that I have for my topic test_topic:
Joes-MacBook-Pro:kafka_2.11-0.9.0.0 joe$ bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --broker-info --group test --topic test_topic --zookeeper localhost:2181
[2016-01-14 13:36:48,831] WARN WARNING: ConsumerOffsetChecker is deprecated and will be dropped in releases following 0.9.0. Use ConsumerGroupCommand instead. (kafka.tools.ConsumerOffsetChecker$)
Group Topic Pid Offset logSize Lag Owner
test test_topic 0 10000 10000 0 none
BROKER INFO
0 -> 172.22.4.34:9092
Why is there only one partition even though I said to Kafka to create 2 partitions for the test_topic?
Here is my producer:
def main(args: Array[String]) {
//val conf = new SparkConf().setAppName("VPP metrics producer")
//val sc = new SparkContext(conf)
val props: Properties = new Properties()
props.put("metadata.broker.list", "localhost:9092,localhost:9093")
props.put("serializer.class", "kafka.serializer.StringEncoder")
val config = new ProducerConfig(props)
val producer = new Producer[String, String](config)
1 to 10000 map {
case i =>
val jsonStr = getRandomTsDataPoint().toJson.toString
println(s"sending message $i to kafka")
producer.send(new KeyedMessage[String, String]("test_topic", jsonStr))
println(s"sent message $i to kafka")
}
}
I'm not sure why you would have 1 partition if you created the topic with 2. Never happened to me, that's for sure.
Can you try this:
bin/kafka-topics.sh --describe --zookeeper localhost:2181 --topic test_topic
That should show you how many partitions are really there.
Then, if there's really 1 partition, maybe you could start over by creating a new topic with:
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 2 --topic test_topic_2
And then try:
bin/kafka-topics.sh --describe --zookeeper localhost:2181 --topic test_topic_2
... and report back the findings.
You are just consuming from partition 0 but you also need to consume from partition 1. If you consume from 1 and commit you will see in column pid no also no 1.
But you also need a producer which writes into 1 also.