Kafka multiple consumer groups different threads not working as expected - apache-kafka

So I am fairly familiar with Kafka and how consumer groups work, in that 2 consumers in different consumer groups that subscribe to the same topic should both get their own copy of the published messages on a Kafka Topic.
consumer pre process
This holds true when using 2 different processes for the 2 consumers. Where I can use this code for the producer
using System;
using System.IO;
using System.Reflection;
using System.Threading.Tasks;
using Confluent.Kafka;
using Confluent.Kafka.Admin;
using NLog;
namespace Producer
{
class Program
{
private static ILogger _logger = LogManager.GetLogger("Global");
private static string topicName = "insane6";
public static async Task Main(string[] args)
{
var config = new ProducerConfig
{
Acks = Acks.Leader,
BootstrapServers = "XXXXXXXX"
};
using (var adminClient = new AdminClientBuilder(new AdminClientConfig { BootstrapServers = config.BootstrapServers }).Build())
{
try
{
adminClient.CreateTopicsAsync(new TopicSpecification[] {
new TopicSpecification { Name = topicName, ReplicationFactor = 1, NumPartitions = 1 } }).ConfigureAwait(false).GetAwaiter().GetResult();
}
catch (CreateTopicsException e)
{
_logger.Error($"An error occured creating topic {e.Results[0].Topic}: {e.Results[0].Error.Reason}");
}
}
// If serializers are not specified, default serializers from
// `Confluent.Kafka.Serializers` will be automatically used where
// available. Note: by default strings are encoded as UTF8.
using (var p = new ProducerBuilder<Null, string>(config)
// Note: All handlers are called on the main .Consume thread.
.SetErrorHandler((_, e) => _logger.Error($"Error: {e.Reason}"))
.SetStatisticsHandler((_, json) => _logger.Debug($"Statistics: {json}"))
.SetLogHandler((consumer, message) => _logger.Debug($"{message.Level} {message.Message}"))
.Build())
{
Console.WriteLine("Type 'Q' to quit");
while (true)
{
try
{
var dr = await p.ProduceAsync(topicName,
new Message<Null, string>
{ Value =DateTime.UtcNow.ToString("O") });
_logger.Debug($"Delivered '{dr.Value}' to '{dr.TopicPartitionOffset}'");
}
catch (ProduceException<Null, string> e)
{
_logger.Error($"Delivery failed: {e.Error.Reason}");
}
var key = Console.ReadKey();
if (key.Key == ConsoleKey.Q)
{
break;
}
}
}
Console.ReadLine();
}
}
}
And I have this consumer code
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Confluent.Kafka;
namespace Consumer
{
class Program
{
private CancellationTokenSource cts = new CancellationTokenSource();
private KafkaSubscriber kafkaSubscriber;
private string topicName = "insane6";
public static void Main(string[] args)
{
var p = new Program(args[0]);
}
public Program(string consumerGroup)
{
kafkaSubscriber = new KafkaSubscriber(new ConsumerSettings()
{
ConsumerConfig = CreateConfig(consumerGroup),
Topic = topicName,
});
kafkaSubscriber.ReceiveError += KafkaSubscriber_ReceiveError;
kafkaSubscriber.CreateConsumer(cts.Token);
Console.ReadLine();
}
private void KafkaSubscriber_ReceiveError(object sender, System.IO.ErrorEventArgs e)
{
cts = new CancellationTokenSource();
kafkaSubscriber.CreateConsumer(cts.Token);
}
public ConsumerConfig CreateConfig(string consumerGroup)
{
var conf = new ConsumerConfig
{
GroupId = consumerGroup,
BootstrapServers = "XXXXXX",
AutoOffsetReset = AutoOffsetReset.Earliest,
EnableAutoCommit = false,
ClientId = Guid.NewGuid().ToString("N")
};
return conf;
}
}
}
Where the actual subscriber code looks like this
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Confluent.Kafka;
using NLog;
namespace Consumer
{
class KafkaSubscriber
{
private readonly ConsumerSettings _consumerSettings;
private static ILogger _logger = LogManager.GetLogger("Global");
public KafkaSubscriber(ConsumerSettings consumerSettings)
{
_consumerSettings = consumerSettings;
}
public event EventHandler<ErrorEventArgs> ReceiveError;
protected virtual void OnReceiveError(ErrorEventArgs e)
{
ReceiveError?.Invoke(this, e);
}
private void ErrorHandler(IConsumer<Ignore, string> consumer, Error error)
{
_logger.Error("Kafka ErrorHandler", error);
if (error.IsFatal || error.Code == ErrorCode.Local_TimedOut)
{
_logger.Error("Throwing fatal error code as exception");
throw new KafkaException(error);
}
}
public void CreateConsumer(CancellationToken ct)
{
using (var c = new ConsumerBuilder<Ignore, string>(_consumerSettings.ConsumerConfig)
.SetErrorHandler(ErrorHandler)
.SetStatisticsHandler((_, json) => _logger.Debug($"Statistics: {json}"))
.SetLogHandler((consumer, message) => _logger.Debug($"{message.Level} {message.Message}"))
.SetPartitionsAssignedHandler((c, partitions) =>
{
_logger.Info($"Assigned partitions: [{string.Join(", ", partitions)}]");
})
.SetPartitionsRevokedHandler((c, partitions) =>
{
_logger.Info($"Revoking assignment: [{string.Join(", ", partitions)}]");
})
.Build())
{
c.Subscribe(_consumerSettings.Topic);
//c.Assign();
try
{
var count = 0;
var offsets = new List<ConsumeResult<Ignore, string>>();
while (true)
{
try
{
var cr = c.Consume(ct);
c.Commit(cr);
_logger.Debug(
$"\r\n{_consumerSettings.ConsumerConfig.GroupId} Consumed message '{cr.Message.Value}' at: '{cr.TopicPartitionOffset}'.\r\n");
}
catch (ConsumeException e)
{
_logger.Error($"Error occured: {e.Error.Reason}");
OnReceiveError(new ErrorEventArgs(e));
}
catch (InvalidProgramException e)
{
_logger.Error($"Error occured: {e.Message}");
OnReceiveError(new ErrorEventArgs(e));
}
catch (KafkaException kex)
{
_logger.Error($"Error occured: {kex.Message}");
OnReceiveError(new ErrorEventArgs(kex));
}
}
}
catch (OperationCanceledException)
{
c.Close();
}
}
}
}
}
So if I run a single producer, and 2 consumer processes (consumers run from command line like Consumer.exe "cg1" and Consumer.exe "cg2"
Everything works as expected, both consumers get the message from the publisher on the topic, as shown in the following screen shot
All good so far, but according to every other StackOverflow or Kafka doc I have seen it should be possible to have a consumer per thread.
consumer pre thread
So If I adjust my bootstrap consumer code to this, which should be identical to running the 2 separate processes, since each consumer is using a new consumer group name, each consumer is in its own thread, there really should be no difference to the 2 separate processes
public static void Main(string[] args)
{
var p = new Program(new [] { "cat","dog"});
}
public Program(string[] consumerGroups)
{
foreach (var consumerGroup in consumerGroups)
{
var thread = new Thread((x) =>
{
kafkaSubscriber = new KafkaSubscriber(new ConsumerSettings()
{
ConsumerConfig = CreateConfig(consumerGroup),
Topic = topicName,
});
kafkaSubscriber.ReceiveError += KafkaSubscriber_ReceiveError;
kafkaSubscriber.CreateConsumer(cts.Token);
});
thread.Start();
}
Console.ReadLine();
}
Yet when running this code, this is what is seen, where only 1 of the consumers actually picks up a message from the topic, this is not the expected behavior at all for me.
I really can't see anything weird, I don't think I have missed anything. All seems correct. Yet only 1 consumer "dog consumer group" in this case sees the produced topic messages, for me the "cat consumer group" should also see the produced messages.
What am I doing wrong?
I am using the Confluent.Kafka official C# driver : https://docs.confluent.io/clients-confluent-kafka-dotnet/current/overview.html

Was being complete spanner, the consumer variable was not private to the thread, all ok now. Phew

Related

How to Handle a Kafka Record with a Class-Level #KafkaListener with no #KafkaHandler for the Record Value

Normally, when we define a class-level #KafkaListener and method level #KafkaHandlers, we can define a default #KafkaHandler to handle unexpected payloads.
https://docs.spring.io/spring-kafka/docs/current/reference/html/#class-level-kafkalistener
But, what should we do if we don't have a default method?
With version 2.6 and later, you can configure a SeekToCurrentErrorHandler to immediately send such messages to a dead letter topic, by examining the exception.
Here is a simple Spring Boot application that demonstrates the technique:
#SpringBootApplication
public class So59256214Application {
public static void main(String[] args) {
SpringApplication.run(So59256214Application.class, args);
}
#Bean
public NewTopic topic1() {
return TopicBuilder.name("so59256214").partitions(1).replicas(1).build();
}
#Bean
public NewTopic topic2() {
return TopicBuilder.name("so59256214.DLT").partitions(1).replicas(1).build();
}
#KafkaListener(id = "so59256214.DLT", topics = "so59256214.DLT")
void listen(ConsumerRecord<?, ?> in) {
System.out.println("dlt: " + in);
}
#Bean
public ApplicationRunner runner(KafkaTemplate<String, Object> template) {
return args -> {
template.send("so59256214", 42);
template.send("so59256214", 42.0);
template.send("so59256214", "No handler for this");
};
}
#Bean
ErrorHandler eh(KafkaOperations<String, Object> template) {
SeekToCurrentErrorHandler eh = new SeekToCurrentErrorHandler(new DeadLetterPublishingRecoverer(template));
BackOff neverRetryOrBackOff = new FixedBackOff(0L, 0);
BackOff normalBackOff = new FixedBackOff(2000L, 3);
eh.setBackOffFunction((rec, ex) -> {
if (ex.getMessage().contains("No method found for class")) {
return neverRetryOrBackOff;
}
else {
return normalBackOff;
}
});
return eh;
}
}
#Component
#KafkaListener(id = "so59256214", topics = "so59256214")
class Listener {
#KafkaHandler
void integerHandler(Integer in) {
System.out.println("int: " + in);
}
#KafkaHandler
void doubleHandler(Double in) {
System.out.println("double: " + in);
}
}
spring.kafka.consumer.auto-offset-reset=earliest
spring.kafka.consumer.value-deserializer=org.springframework.kafka.support.serializer.JsonDeserializer
spring.kafka.producer.value-serializer=org.springframework.kafka.support.serializer.JsonSerializer
Result:
int: 42
double: 42.0
dlt: ConsumerRecord(topic = so59256214.DLT, ...

How to read the Header values in the Batch listener error handling scenario

I am trying to handle the exception at the listener
#KafkaListener(id = PropertiesUtil.ID,
topics = "#{'${kafka.consumer.topic}'}",
groupId = "${kafka.consumer.group.id.config}",
containerFactory = "containerFactory",
errorHandler = "errorHandler")
public void receiveEvents(#Payload List<ConsumerRecord<String, String>> recordList,
Acknowledgment acknowledgment) {
try {
log.info("Consuming the batch of size {} from kafka topic {}", consumerRecordList.size(),
consumerRecordList.get(0).topic());
processEvent(consumerRecordList);
incrementOffset(acknowledgment);
} catch (Exception exception) {
throwOrHandleExceptions(exception, recordList, acknowledgment);
.........
}
}
The Kafka container config:
#Bean
public KafkaListenerContainerFactory<ConcurrentMessageListenerContainer<String, String>>
containerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConcurrency(this.numberOfConsumers);
factory.getContainerProperties().setAckOnError(false);
factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL);
factory.setConsumerFactory(getConsumerFactory());
factory.setBatchListener(true);
return factory;
}
}
the listener error handler impl
#Bean
public ConsumerAwareListenerErrorHandler errorHandler() {
return (m, e, c) -> {
MessageHeaders headers = m.getHeaders();
List<String> topics = headers.get(KafkaHeaders.RECEIVED_TOPIC, List.class);
List<Integer> partitions = headers.get(KafkaHeaders.RECEIVED_PARTITION_ID, List.class);
List<Long> offsets = headers.get(KafkaHeaders.OFFSET, List.class);
Map<TopicPartition, Long> offsetsToReset = new HashMap<>();
for (int i = 0; i < topics.size(); i++) {
int index = i;
offsetsToReset.compute(new TopicPartition(topics.get(i), partitions.get(i)),
(k, v) -> v == null ? offsets.get(index) : Math.min(v, offsets.get(index)));
}
...
};
}
when i try to run the same without the batching processing then i am able to fetch the partition,topic and offset values but when i enable batch processing and try to test it then i am getting only two values inside the headers i.e id and timestamp and other values are not set. Am i missing anything here??
What version are you using? I just tested it with Boot 2.2.4 (SK 2.3.5) and it works fine...
#SpringBootApplication
public class So60152179Application {
public static void main(String[] args) {
SpringApplication.run(So60152179Application.class, args);
}
#KafkaListener(id = "so60152179", topics = "so60152179", errorHandler = "eh")
public void listen(List<String> in) {
throw new RuntimeException("foo");
}
#Bean
public ConsumerAwareListenerErrorHandler eh() {
return (m, e, c) -> {
System.out.println(m);
return null;
};
}
#Bean
public ApplicationRunner runner(KafkaTemplate<String, String> template) {
return args -> {
template.send("so60152179", "foo");
};
}
#Bean
public NewTopic topic() {
return TopicBuilder.name("so60152179").partitions(1).replicas(1).build();
}
}
spring.kafka.listener.type=batch
spring.kafka.consumer.auto-offset-reset=earliest
GenericMessage [payload=[foo], headers={kafka_offset=[0], kafka_nativeHeaders=[RecordHeaders(headers = [], isReadOnly = false)], kafka_consumer=org.apache.kafka.clients.consumer.KafkaConsumer#2f2e787f, kafka_timestampType=[CREATE_TIME], kafka_receivedMessageKey=[null], kafka_receivedPartitionId=[0], kafka_receivedTopic=[so60152179], kafka_receivedTimestamp=[1581351585253], kafka_groupId=so60152179}]

Kafka: Consumer api: Regression test fails if runs in a group (sequentially)

I have implemented a kafka application using consumer api. And I have 2 regression tests implemented with stream api:
To test happy path: by producing data from the test ( into the input topic that the application is listening to) that will be consumed by the application and application will produce data (into the output topic ) that the test will consume and validate against expected output data.
To test error path: behavior is the same as above. Although this time application will produce data into output topic and test will consume from application's error topic and will validate against expected error output.
My code and the regression-test codes are residing under the same project under expected directory structure. Both time ( for both tests) data should have been picked up by the same listener at the application side.
The problem is :
When I am executing the tests individually (manually), each test is passing. However, If I execute them together but sequentially ( for example: gradle clean build ) , only first test is passing. 2nd test is failing after the test-side-consumer polling for data and after some time it gives up not finding any data.
Observation:
From debugging, it looks like, the 1st time everything works perfectly ( test-side and application-side producers and consumers). However, during the 2nd test it seems that application-side-consumer is not receiving any data ( It seems that test-side-producer is producing data, but can not say that for sure) and hence no data is being produced into the error topic.
What I have tried so far:
After investigations, my understanding is that we are getting into race conditions and to avoid that found suggestions like :
use #DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
Tear off broker after each test ( Please see the ".destry()" on brokers)
use different topic names for each test
I applied all of them and still could not recover from my issue.
I am providing the code here for perusal. Any insight is appreciated.
Code for 1st test (Testing error path):
#DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
#EmbeddedKafka(
partitions = 1,
controlledShutdown = false,
topics = {
AdapterStreamProperties.Constants.INPUT_TOPIC,
AdapterStreamProperties.Constants.ERROR_TOPIC
},
brokerProperties = {
"listeners=PLAINTEXT://localhost:9092",
"port=9092",
"log.dir=/tmp/data/logs",
"auto.create.topics.enable=true",
"delete.topic.enable=true"
}
)
public class AbstractIntegrationFailurePathTest {
private final int retryLimit = 0;
#Autowired
protected EmbeddedKafkaBroker embeddedFailurePathKafkaBroker;
//To produce data
#Autowired
protected KafkaTemplate<PreferredMediaMsgKey, SendEmailCmd> inputProducerTemplate;
//To read from output error
#Autowired
protected Consumer<PreferredMediaMsgKey, ErrorCmd> outputErrorConsumer;
//Service to execute notification-preference
#Autowired
protected AdapterStreamProperties projectProerties;
protected void subscribe(Consumer consumer, String topic, int attempt) {
try {
embeddedFailurePathKafkaBroker.consumeFromAnEmbeddedTopic(consumer, topic);
} catch (ComparisonFailure ex) {
if (attempt < retryLimit) {
subscribe(consumer, topic, attempt + 1);
}
}
}
}
.
#TestConfiguration
public class AdapterStreamFailurePathTestConfig {
#Autowired
private EmbeddedKafkaBroker embeddedKafkaBroker;
#Value("${spring.kafka.adapter.application-id}")
private String applicationId;
#Value("${spring.kafka.adapter.group-id}")
private String groupId;
//Producer of records that the program consumes
#Bean
public Map<String, Object> sendEmailCmdProducerConfigs() {
Map<String, Object> results = KafkaTestUtils.producerProps(embeddedKafkaBroker);
results.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
AdapterStreamProperties.Constants.KEY_SERDE.serializer().getClass());
results.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
AdapterStreamProperties.Constants.INPUT_VALUE_SERDE.serializer().getClass());
return results;
}
#Bean
public ProducerFactory<PreferredMediaMsgKey, SendEmailCmd> inputProducerFactory() {
return new DefaultKafkaProducerFactory<>(sendEmailCmdProducerConfigs());
}
#Bean
public KafkaTemplate<PreferredMediaMsgKey, SendEmailCmd> inputProducerTemplate() {
return new KafkaTemplate<>(inputProducerFactory());
}
//Consumer of the error output, generated by the program
#Bean
public Map<String, Object> outputErrorConsumerConfig() {
Map<String, Object> props = KafkaTestUtils.consumerProps(
applicationId, Boolean.TRUE.toString(), embeddedKafkaBroker);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
AdapterStreamProperties.Constants.KEY_SERDE.deserializer().getClass()
.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
AdapterStreamProperties.Constants.ERROR_VALUE_SERDE.deserializer().getClass()
.getName());
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
return props;
}
#Bean
public Consumer<PreferredMediaMsgKey, ErrorCmd> outputErrorConsumer() {
DefaultKafkaConsumerFactory<PreferredMediaMsgKey, ErrorCmd> rpf =
new DefaultKafkaConsumerFactory<>(outputErrorConsumerConfig());
return rpf.createConsumer(groupId, "notification-failure");
}
}
.
#RunWith(SpringRunner.class)
#SpringBootTest(classes = AdapterStreamFailurePathTestConfig.class)
#ActiveProfiles(profiles = "errtest")
public class ErrorPath400Test extends AbstractIntegrationFailurePathTest {
#Autowired
private DataGenaratorForErrorPath400Test datagen;
#Mock
private AdapterHttpClient httpClient;
#Autowired
private ErroredEmailCmdDeserializer erroredEmailCmdDeserializer;
#Before
public void setup() throws InterruptedException {
Mockito.when(httpClient.callApi(Mockito.any()))
.thenReturn(
new GenericResponse(
400,
TestConstants.ERROR_MSG_TO_CHK));
Mockito.when(httpClient.createURI(Mockito.any(),Mockito.any(),Mockito.any())).thenCallRealMethod();
inputProducerTemplate.send(
projectProerties.getInputTopic(),
datagen.getKey(),
datagen.getEmailCmdToProduce());
System.out.println("producer: "+ projectProerties.getInputTopic());
subscribe(outputErrorConsumer , projectProerties.getErrorTopic(), 0);
}
#Test
public void testWithError() throws InterruptedException, InvalidProtocolBufferException, TextFormat.ParseException {
ConsumerRecords<PreferredMediaMsgKeyBuf.PreferredMediaMsgKey, ErrorCommandBuf.ErrorCmd> records;
List<ConsumerRecord<PreferredMediaMsgKeyBuf.PreferredMediaMsgKey, ErrorCommandBuf.ErrorCmd>> outputListOfErrors = new ArrayList<>();
int attempt = 0;
int expectedRecords = 1;
do {
records = KafkaTestUtils.getRecords(outputErrorConsumer);
records.forEach(outputListOfErrors::add);
attempt++;
} while (attempt < expectedRecords && outputListOfErrors.size() < expectedRecords);
//Verify the recipient event stream size
Assert.assertEquals(expectedRecords, outputListOfErrors.size());
//Validate output
}
#After
public void tearDown() {
outputErrorConsumer.close();
embeddedFailurePathKafkaBroker.destroy();
}
}
2nd test is almost the same in structure. Although this time the test-side-consumer is consuming from application-side-output-topic( instead of error topic). And I named the consumers,broker,producer,topics differently. Like :
#DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
#EmbeddedKafka(
partitions = 1,
controlledShutdown = false,
topics = {
AdapterStreamProperties.Constants.INPUT_TOPIC,
AdapterStreamProperties.Constants.OUTPUT_TOPIC
},
brokerProperties = {
"listeners=PLAINTEXT://localhost:9092",
"port=9092",
"log.dir=/tmp/data/logs",
"auto.create.topics.enable=true",
"delete.topic.enable=true"
}
)
public class AbstractIntegrationSuccessPathTest {
private final int retryLimit = 0;
#Autowired
protected EmbeddedKafkaBroker embeddedKafkaBroker;
//To produce data
#Autowired
protected KafkaTemplate<PreferredMediaMsgKey,SendEmailCmd> sendEmailCmdProducerTemplate;
//To read from output regular topic
#Autowired
protected Consumer<PreferredMediaMsgKey, NotifiedEmailCmd> ouputConsumer;
//Service to execute notification-preference
#Autowired
protected AdapterStreamProperties projectProerties;
protected void subscribe(Consumer consumer, String topic, int attempt) {
try {
embeddedKafkaBroker.consumeFromAnEmbeddedTopic(consumer, topic);
} catch (ComparisonFailure ex) {
if (attempt < retryLimit) {
subscribe(consumer, topic, attempt + 1);
}
}
}
}
Please let me know if I should provide any more information.,
"port=9092"
Don't use a fixed port; leave that out and the embedded broker will use a random port; the consumer configs are set up in KafkaTestUtils to point to the random port.
You shouldn't need to dirty the context after each test method - use a different group.id for each test and a different topic.
In my case the consumer was not closed properly. I had to do :
#After
public void tearDown() {
// shutdown hook to correctly close the streams application
Runtime.getRuntime().addShutdownHook(new Thread(ouputConsumer::close));
}
to resolve.

search for a very simple EsperIO Kafka example

I'm just desperately looking for example code for an Esper CEP Kafka Adapter code. I've already installed Kafka and wrote data to a Kafka topic using a producer and now I want to process it with Esper CEP. Unfortunately the documentation of Esper for the Kafka Adapter is not very meaningful. Does anyone have a very simple example?
Edit:
So far I added an adapter and it seems to work. However, I don't know how to read the adapter nor how to link a CEP pattern with this adapter. This is my code so far:
config.addImport(KafkaOutputDefault.class);
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
props.put(ConsumerConfig.GROUP_ID_CONFIG, "group.id");
props.put(EsperIOKafkaConfig.INPUT_SUBSCRIBER_CONFIG, EsperIOKafkaInputSubscriberByTopicList.class.getName());
props.put(EsperIOKafkaConfig.TOPICS_CONFIG, "test123");
props.put(EsperIOKafkaConfig.INPUT_PROCESSOR_CONFIG, EsperIOKafkaInputProcessorDefault.class.getName());
props.put(EsperIOKafkaConfig.INPUT_TIMESTAMPEXTRACTOR_CONFIG, EsperIOKafkaInputTimestampExtractorConsumerRecord.class.getName());
Configuration config2 = new Configuration();
config2.addPluginLoader("KafkaInput", EsperIOKafkaInputAdapterPlugin.class.getName(), props, null);
EsperIOKafkaInputAdapter adapter = new EsperIOKafkaInputAdapter(props, "default");
adapter.start();
I've had the same problem. I created a sample Project you could have a look at, especially the plain-esper branch.
An even more simplified Version would be:
public class KafkaExample implements Runnable {
private String runtimeURI;
public KafkaExample(String runtimeURI) {
this.runtimeURI = runtimeURI;
}
public static void main(String[] args){
new KafkaExample("KafkaExample").run();
}
#Override
public void run() {
Configuration configuration = new Configuration();
configuration.getCommon().addImport(KafkaOutputDefault.class);
configuration.getCommon().addEventType(String.class);
Properties consumerProps = new Properties();
// Kafka Consumer Properties
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.EARLIEST.toString().toLowerCase());
// EsperIO Kafka Input Adapter Properties
consumerProps.put(EsperIOKafkaConfig.INPUT_SUBSCRIBER_CONFIG, Consumer.class.getName());
consumerProps.put(EsperIOKafkaConfig.INPUT_PROCESSOR_CONFIG, InputProcessor.class.getName());
consumerProps.put(EsperIOKafkaConfig.INPUT_TIMESTAMPEXTRACTOR_CONFIG, EsperIOKafkaInputTimestampExtractorConsumerRecord.class.getName());
configuration.getRuntime().addPluginLoader("KafkaInput", EsperIOKafkaInputAdapterPlugin.class.getName(), consumerProps, null);
String stmt = "#name('sampleQuery') select * from String";
EPCompiled compiled;
try {
compiled = EPCompilerProvider.getCompiler().compile(stmt, new CompilerArguments(configuration));
} catch (EPCompileException ex) {
throw new RuntimeException(ex);
}
EPRuntime runtime = EPRuntimeProvider.getRuntime(runtimeURI, configuration);
EPDeployment deployment;
try {
deployment = runtime.getDeploymentService().deploy(compiled, new DeploymentOptions().setDeploymentId(UUID.randomUUID().toString()));
} catch (EPDeployException ex) {
throw new RuntimeException(ex);
}
EPStatement statement = runtime.getDeploymentService().getStatement(deployment.getDeploymentId(), "sampleQuery");
statement.addListener((newData, oldData, sta, run) -> {
for (EventBean nd : newData) {
System.out.println(nd.getUnderlying());
}
});
while (true) {}
}
}
public class Consumer implements EsperIOKafkaInputSubscriber {
#Override
public void subscribe(EsperIOKafkaInputSubscriberContext context) {
Collection<String> collection = new ArrayList<String>();
collection.add("input");
context.getConsumer().subscribe(collection);
}
}
public class InputProcessor implements EsperIOKafkaInputProcessor {
private EPRuntime runtime;
#Override
public void init(EsperIOKafkaInputProcessorContext context) {
this.runtime = context.getRuntime();
}
#Override
public void process(ConsumerRecords<Object, Object> records) {
for (ConsumerRecord record : records) {
if (record.value() != null) {
try {
runtime.getEventService().sendEventBean(record.value().toString(), "String");
} catch (Exception e) {
throw e;
}
}
}
}
public void close() {}
}
Sample code follows. This code assumes there are already some messages in the topic. This does not loop and wait for more messages.
Properties consumerProps = new Properties();
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, ip);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "mygroup");
KafkaConsumer consumer = new KafkaConsumer<>(consumerProps);
ConsumerRecords<String, String> rows = consumer.poll(1000);
Iterator<ConsumerRecord<String, String>> it = rows.iterator();
while (it.hasNext()) {
ConsumerRecord<String, String> row = it.next();
MyEvent event = new MyEvent(row.value()); // transform string to event
// process event
runtime.sendEvent(event);
}

KAFKA + FLINK 1.1.2 consumer group not working as excepted

When I tried to connect to one topic with 3 with partition and 3 FlinkKafkaConsumer09 consume from one topic and using Kafka consumer group property as below.
props.setProperty("group.id", "myGroup");
props.setProperty("auto.offset.reset", "latest");
but still 3 consumer receives all data. according to consumer group concept , data should send to only one consumer inside consumer group.
But it works good with normal Java consumer. issue with FlinkKafkaConsumer09 ?
This issue can be solved by writing on FlinkConsumer .
Steps : 1. you have to pass partitions as property to flink consumer
issue : according this you have one consumer for one partition
public class YourConsumer<T> extends FlinkKafkaConsumerBase<T>
{
public static final long DEFAULT_POLL_TIMEOUT = 100L;
private final long pollTimeout;
public FlinkKafkaConsumer09(String topic, DeserializationSchema<T> valueDeserializer, Properties props) {
this(Collections.singletonList(topic), valueDeserializer, props);
}
public FlinkKafkaConsumer09(String topic, KeyedDeserializationSchema<T> deserializer, Properties props) {
this(Collections.singletonList(topic), deserializer, props);
}
public FlinkKafkaConsumer09(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
this(topics, new KeyedDeserializationSchemaWrapper<>(deserializer), props);
}
public FlinkKafkaConsumer09(List<String> topics, KeyedDeserializationSchema<T> deserializer, Properties props) {
super(topics, deserializer);
this.properties = checkNotNull(props, "props");
setDeserializer(this.properties);
// configure the polling timeout
try {
if (properties.containsKey(KEY_POLL_TIMEOUT)) {
this.pollTimeout = Long.parseLong(properties.getProperty(KEY_POLL_TIMEOUT));
} else {
this.pollTimeout = DEFAULT_POLL_TIMEOUT;
}
}
catch (Exception e) {
throw new IllegalArgumentException("Cannot parse poll timeout for '" + KEY_POLL_TIMEOUT + '\'', e);
}
}
#Override
protected AbstractFetcher<T, ?> createFetcher(
SourceContext<T> sourceContext,
List<KafkaTopicPartition> thisSubtaskPartitions,
SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
StreamingRuntimeContext runtimeContext) throws Exception {
boolean useMetrics = !Boolean.valueOf(properties.getProperty(KEY_DISABLE_METRICS, "false"));
return new Kafka09Fetcher<>(sourceContext, thisSubtaskPartitions,
watermarksPeriodic, watermarksPunctuated,
runtimeContext, deserializer,
properties, pollTimeout, useMetrics);
}
#Override
protected List<KafkaTopicPartition> getKafkaPartitions(List<String> topics) {
// read the partitions that belong to the listed topics
final List<KafkaTopicPartition> partitions = new ArrayList<>();
int partition=Integer.valueOf(this.properties.get("partitions"));
try (KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(this.properties)) {
for (final String topic: topics) {
// get partitions for each topic
List<PartitionInfo> partitionsForTopic = consumer.partitionsFor(topic);
// for non existing topics, the list might be null.
if (partitionsForTopic != null) {
partitions.addAll(convertToFlinkKafkaTopicPartition(partitionsForTopic),partition);
}
}
}
if (partitions.isEmpty()) {
throw new RuntimeException("Unable to retrieve any partitions for the requested topics " + topics);
}
// we now have a list of partitions which is the same for all parallel consumer instances.
LOG.info("Got {} partitions from these topics: {}", partitions.size(), topics);
if (LOG.isInfoEnabled()) {
logPartitionInfo(LOG, partitions);
}
return partitions;
}
private static List<KafkaTopicPartition> convertToFlinkKafkaTopicPartition(List<PartitionInfo> partitions,int partition) {
checkNotNull(partitions);
List<KafkaTopicPartition> ret = new ArrayList<>(partitions.size());
//for (PartitionInfo pi : partitions) {
ret.add(new KafkaTopicPartition(partitions.get(partition).topic(), partitions.get(partition).partition()));
// }
return ret;
}
private static void setDeserializer(Properties props) {
final String deSerName = ByteArrayDeserializer.class.getCanonicalName();
Object keyDeSer = props.get(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
Object valDeSer = props.get(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
if (keyDeSer != null && !keyDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured key DeSerializer ({})", ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG);
}
if (valDeSer != null && !valDeSer.equals(deSerName)) {
LOG.warn("Ignoring configured value DeSerializer ({})", ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
}
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deSerName);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deSerName);
}
}