Spring Batch remote-partitioning worker-job not reading data from middleware

Spring Batch remote-partitioning worker-job not reading data from middleware - spring-batch

I wanted to checkout the remote partitioning setup in Spring Batch. I am using Spring Boot v2.7.2 & Kafka as middleware. I have used the ColumnRangePartitioner from the Spring Batch samples
I am seeing that the manager job is persisting the metadata in the Kafka Topic and partition values are persisted in the Spring Batch tables for the worker job.
Below are the questions:
My worker job is not able to read the data from the Kafka Topic and
it gets null values in the partition range injected in the
JDBCPagingItemReader bean created in the worker job.
What is the expected behavior if I run the worker job without
running the master job first? i.e. there is no data for worker job
to process in Kafka Topic and in the Spring Batch tables. My worker job creates a new job
instance and executes it (does not wait for the manager job to send invocation metadata)
Below is the Partitioner configuration:
#Override
public Map<String, ExecutionContext> partition(int gridSize) {
Integer min = jdbcTemplate.queryForObject("SELECT MIN(" + column + ") from ( " + table + ") as aliastab ", Integer.class);
Integer max = jdbcTemplate.queryForObject("SELECT MAX(" + column + ") from ( " + table + ") as aliastab ", Integer.class);
Integer targetSize = (max - min) / gridSize + 1;
Map<String, ExecutionContext> result = new HashMap<>();
Integer number = 0;
Integer start = min;
Integer end = start + targetSize - 1;
while (start <= max) {
ExecutionContext value = new ExecutionContext();
result.put("partition" + number, value);
if (end >= max) {
end = max;
}
value.putInt("minValue", start);
value.putInt("maxValue", end);
start += targetSize;
end += targetSize;
number++;
}
return result;
}
Manager Config:
#Configuration
#Profile("manager")
#EnableBatchProcessing
#EnableBatchIntegration
#PropertySource("classpath:application-manager.properties")
public class ManagerBeanConfig {
#Value("${spring.kafka.bootstrap-servers}")
private String bootStrapServers;
#Value("${kafka.request.topic}")
private String requestTopicName;
#Value("${kafka.response.topic}")
private String responseTopicName;
#Value("${spring.kafka.consumer.group-id}")
private String consumerGroup;
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private KafkaTemplate kafkaTemplate;
#Bean
public RemotePartitioningManagerStepBuilderFactory remotePartitioningManagerStepBuilderFactory(
JobRepository jobRepository, JobExplorer jobExplorer,
DataSourceTransactionManager springBatchDataSourceTransactionManager) {
RemotePartitioningManagerStepBuilderFactory remotePartitioningManagerStepBuilderFactory = new RemotePartitioningManagerStepBuilderFactory(
jobRepository, jobExplorer, springBatchDataSourceTransactionManager);
return remotePartitioningManagerStepBuilderFactory;
}
#Bean
public DirectChannel outboundRequests() {
return new DirectChannel();
}
#Bean
public DirectChannel inboundReplies() {
return new DirectChannel();
}
#Bean
public ConsumerFactory consumerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, this.bootStrapServers);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.GROUP_ID_CONFIG, consumerGroup);
props.put(JsonDeserializer.TRUSTED_PACKAGES, "*");
return new DefaultKafkaConsumerFactory(props);
}
#Bean
public IntegrationFlow inboundFlow(ConsumerFactory consumerFactory, DirectChannel inboundReplies) {
ContainerProperties containerProps = new ContainerProperties(new String[] {responseTopicName});
return IntegrationFlows
.from(Kafka.inboundChannelAdapter(consumerFactory, containerProps))
.channel(inboundReplies)
.get();
}
#Bean
public IntegrationFlow outboundFlow(DirectChannel outboundRequests) {
KafkaProducerMessageHandler messageHandler = new KafkaProducerMessageHandler(kafkaTemplate);
messageHandler.setTopicExpression(new LiteralExpression(ManagerConstants.REQUEST_TOPIC_NAME));
Function<Message<?>, Long> partitionFn = (m) -> {
StepExecutionRequest executionRequest = (StepExecutionRequest) m.getPayload();
return executionRequest.getStepExecutionId() % 3;
};
messageHandler.setPartitionIdExpression(new FunctionExpression<>(partitionFn));
return IntegrationFlows.from(outboundRequests).handle(messageHandler).get();
}
#Bean
public Job addressManagerJob(Step immsAddressManagerPartitionerStep) {
return jobBuilderFactory.get("addressManagerJob").start(immsAddressManagerPartitionerStep)
.incrementer(new RunIdIncrementer()).build();
}
#Bean
public ColumnRangePartitioner partitioner(#Qualifier("fcrmDataSource") DataSource fcrmDataSource) {
ColumnRangePartitioner partitioner = new ColumnRangePartitioner();
partitioner.setColumn("addr_id");
partitioner.setDataSource(fcrmDataSource);
partitioner.setTable("SELECT addr_id, lookup_Code, meaning from addr ");
return partitioner;
}
#Bean
public Step addressManagerPartitionerStep(RemotePartitioningManagerStepBuilderFactory remotePartitioningManagerStepBuilderFactory,
DirectChannel outboundRequests, DirectChannel inboundReplies, ColumnRangePartitioner partitioner) {
return remotePartitioningManagerStepBuilderFactory.get("addressManagerPartitionerStep")
.gridSize(3)
.partitioner("addressWorkerStep", partitioner)
.outputChannel(outboundRequests)
.inputChannel(inboundReplies)
.build();
}
}
Worker Job Config:
#Configuration
#Profile("worker")
#EnableBatchProcessing
#EnableBatchIntegration
#PropertySource("classpath:application-worker.properties")
public class WorkerBeanConfig {
#Value("${spring.kafka.bootstrap-servers}")
private String bootStrapServers;
#Value("${kafka.request.topic}")
private String requestTopicName;
#Value("${kafka.response.topic}")
private String responseTopicName;
#Value("${spring.kafka.consumer.group-id}")
private String consumerGroup;
#SuppressWarnings({ "rawtypes", "unused" })
#Autowired
private KafkaTemplate kafkaTemplate;
#Bean
public RemotePartitioningWorkerStepBuilderFactory remotePartitioningWorkerStepBuilderFactory(JobRepository jobRepository, JobExplorer jobExplorer,
DataSourceTransactionManager springBatchDataSourceTransactionManager) {
RemotePartitioningWorkerStepBuilderFactory remotePartitioningWorkerStepBuilderFactory = new RemotePartitioningWorkerStepBuilderFactory(jobRepository, jobExplorer,
springBatchDataSourceTransactionManager);
return remotePartitioningWorkerStepBuilderFactory;
}
#Bean
#StepScope
public JdbcPagingItemReader<AddressType> reader(
#Qualifier("fcrmDataSource") DataSource fcrmDataSource, #Value("#{stepExecutionContext[minValue]}") final Integer minVal, #Value("#{stepExecutionContext[maxValue]}") final Integer maxVal) {
Map<String, Order> sortMap = new HashMap<>();
sortMap.put("addr_id", Order.ASCENDING);
Map<String, Object> paramMap = new HashMap<>();
paramMap.put("minValue", minVal);
paramMap.put("maxValue", maxVal);
PostgresPagingQueryProvider queryProvider = new PostgresPagingQueryProvider();
queryProvider.setSelectClause(" SELECT addr_id, lookup_Code, meaning ");
queryProvider.setFromClause(" from addr ");
queryProvider.setWhereClause(" addr_id >= :minValue and addr_id <= :maxValue ");
queryProvider.setSortKeys(sortMap);
JdbcPagingItemReader<AddressType> reader = new JdbcPagingItemReader<AddressType>();
reader.setDataSource(fcrmDataSource);
reader.setFetchSize(100); // hint to db driver
reader.setPageSize(1); //actual rows read
reader.setRowMapper(new AddressTypeRowMapper());
reader.setParameterValues(paramMap);
reader.setQueryProvider(queryProvider);
return reader;
}
#Bean
public AddressTypeProcessor addressTypeProcessor() {
return new AddressTypeProcessor();
}
#Bean
public JdbcBatchItemWriter<AddressRelationType> writer(
#Qualifier("fcrmDataSource") DataSource fcrmDataSource) {
return new JdbcBatchItemWriterBuilder<AddressRelationType>()
.itemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<AddressRelationType>())
.sql("INSERT INTO public.address_relation_type( "
+ " address_relation_type_cd, tenant_cd, address_relation_type_desc, "
+ " batch_id, short_name, definition, address_type_dv, custom_boolean_01, "
+ " custom_medium_string_01, custom_small_string_01, custom_small_string_02, "
+ " custom_small_string_03, entity_sk)\r\n"
+ " VALUES (:addressRelationTypeCode, 'N/A', :addressRelationTypeDesc, "
+ " :batchId, :shortName, :definition, :addressTypeDv, :customBoolean01, "
+ " :customMediumString01, :customSmallString01, :customSmallString02, "
+ " :customSmallString03, :entitySk)")
.dataSource(fcrmDataSource).build();
}
#Bean
public QueueChannel inboundRequests() {
return new QueueChannel();
}
#Bean
public ConsumerFactory consumerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, this.bootStrapServers);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.LongDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.GROUP_ID_CONFIG, consumerGroup);
props.put(JsonDeserializer.TRUSTED_PACKAGES, "*");
return new DefaultKafkaConsumerFactory(props);
}
#Bean
public IntegrationFlow inboundFlow(ConsumerFactory consumerFactory, QueueChannel inboundRequests) {
return IntegrationFlows
.from(Kafka.inboundChannelAdapter(consumerFactory, new ConsumerProperties(WorkerConstants.REQUEST_TOPIC_NAME)))
.channel(inboundRequests)
.get();
}
#Bean
public QueueChannel outboundReplies() {
return new QueueChannel();
}
#Bean
public IntegrationFlow outboundFlow(KafkaTemplate kafkaTemplate, QueueChannel outboundReplies) {
KafkaProducerMessageHandler messageHandler = new KafkaProducerMessageHandler(kafkaTemplate);
messageHandler.setTopicExpression(new LiteralExpression(responseTopicName));
return IntegrationFlows
.from(outboundReplies)
.handle(messageHandler)
.get();
}
#Bean
public Job addressWorkerJob(Step immsAddressWorkerStep, JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("addressWorkerJob")
.incrementer(new RunIdIncrementer())
.flow(immsAddressWorkerStep)
.end()
.build();
}
#Bean
public Step addressWorkerStep(RemotePartitioningWorkerStepBuilderFactory remotePartitioningWorkerStepBuilderFactory, JdbcPagingItemReader<AddressType> reader,
AddressTypeProcessor addressTypeProcessor, JdbcBatchItemWriter<AddressRelationType> writer, QueueChannel outboundReplies, QueueChannel inboundRequests) {
return remotePartitioningWorkerStepBuilderFactory.get("addressWorkerStep")
.outputChannel(outboundReplies)
.inputChannel(inboundRequests)
.<AddressType, AddressRelationType>chunk(1)
.reader(reader)
.processor(addressTypeProcessor)
.writer(writer)
.build();
}
}
Update:
For Mahmoud's comment "First thing to check: is partitioning metadata (column ranges) correctly persisted in the database?"
My source table has 7 rows. I see that the partitions are created correctly. Please refer to the screenshot below. Step_Execution_id (694, 695, 696, 697) were created by the manager job. Step_Execution_id = 698 was created by the worker job.
Below is a screenshot from the Kafka request topic.
So we see that both DB & Kafka have right metadata. The stepExecutionIDs in the image do not match. Please ignore it. The older messages in the topic were drained when I took the screenshots
Update: Added the exception from the manager job startup
2022-11-10 18:50:19.021 WARN 20120 --- [ main] o.s.i.config.ReleaseStrategyFactoryBean : No ReleaseStrategy annotated method found on MessageChannelPartitionHandler; falling back to SimpleSequenceSizeReleaseStrategy, target: org.springframework.batch.integration.partition.MessageChannelPartitionHandler#15214920, methodName: null
2022-11-10 18:50:19.037 DEBUG 20120 --- [ main] o.s.i.h.s.MessagingMethodInvokerHelper : Method [public java.util.Collection org.springframework.batch.integration.partition.MessageChannelPartitionHandler.handle(org.springframework.batch.core.partition.StepExecutionSplitter,org.springframework.batch.core.StepExecution) throws java.lang.Exception] is not eligible for Message handling.
java.lang.IllegalArgumentException: org.springframework.integration.handler.support.MessagingMethodInvokerHelper$IneligibleMethodException: Found more than one parameter type candidate: [org.springframework.batch.core.partition.StepExecutionSplitter] and [org.springframework.batch.core.StepExecution].
Consider annotating one of the parameters with '#Payload'.
at org.springframework.integration.handler.support.MessagingMethodInvokerHelper.createHandlerMethod(MessagingMethodInvokerHelper.java:397) ~[spring-integration-core-5.5.12.jar:5.5.12]
at org.springframework.integration.handler.support.MessagingMethodInvokerHelper.obtainHandlerMethodIfAny(MessagingMethodInvokerHelper.java:801) ~[spring-integration-core-5.5.12.jar:5.5.12]
at
Update: Recreated the Manager & Worker batch jobs. Created the Mixin types as below but getting the infinite recursion due to Jackson
public abstract class JobExecutionMixin {
#JsonManagedReference
private Collection<StepExecution> stepExecutions;
}
public abstract class StepExecutionMixin {
#JsonManagedReference
private Collection<StepExecution> stepExecutions;
}
#Bean
public JobExplorer jobExplorer(DataSource dataSource, JdbcOperations jdbcOperations) throws Exception {
ObjectMapper mapper = new ObjectMapper();
mapper.addMixIn(org.springframework.batch.core.StepExecution.class, StepExecutionMixin.class);
mapper.addMixIn(org.springframework.batch.core.JobExecution.class, JobExecutionMixin.class);
Jackson2ExecutionContextStringSerializer jackson2ExecutionContextStringSerializer = new Jackson2ExecutionContextStringSerializer();
jackson2ExecutionContextStringSerializer.setObjectMapper(mapper);
JobExplorerFactoryBean jobExplorerFactoryBean = new JobExplorerFactoryBean();
jobExplorerFactoryBean.setDataSource(dataSource);
jobExplorerFactoryBean.setJdbcOperations(jdbcOperations);
jobExplorerFactoryBean.setSerializer(jackson2ExecutionContextStringSerializer);
return jobExplorerFactoryBean.getObject();
}
Below is the stack trace:
2022-11-16 17:22:34.812 INFO 1447 --- [ scheduling-1] o.s.i.h.s.MessagingMethodInvokerHelper : Overriding default instance of MessageHandlerMethodFactory with provided one.
minValue exists? true
maxValue exists? true
size of entry set 3
key: minValue; value: 6
key: #class; value: java.util.HashMap
key: maxValue; value: 10
2022-11-16 17:22:35.029 INFO 1447 --- [ scheduling-1] o.s.batch.core.step.AbstractStep : Step: [worker_step:partition1] executed in 138ms
org.springframework.kafka.support.serializer.JsonSerializer
2022-11-16 17:22:41.082 INFO 1447 --- [ scheduling-1] o.a.k.clients.producer.KafkaProducer : [Producer clientId=producer-1] Instantiated an idempotent producer.
2022-11-16 17:22:41.168 INFO 1447 --- [ scheduling-1] o.a.kafka.common.utils.AppInfoParser : Kafka version: 3.1.1
2022-11-16 17:22:41.169 INFO 1447 --- [ scheduling-1] o.a.kafka.common.utils.AppInfoParser : Kafka commitId: 97671528ba54a138
2022-11-16 17:22:41.169 INFO 1447 --- [ scheduling-1] o.a.kafka.common.utils.AppInfoParser : Kafka startTimeMs: 1668599561168
2022-11-16 17:22:41.180 INFO 1447 --- [ad | producer-1] org.apache.kafka.clients.Metadata : [Producer clientId=producer-1] Cluster ID: y8UoN-ELRlSN2xSqU0m-cA
2022-11-16 17:22:41.191 INFO 1447 --- [ad | producer-1] o.a.k.c.p.internals.TransactionManager : [Producer clientId=producer-1] ProducerId set to 3 with epoch 0
2022-11-16 17:22:41.213 INFO 1447 --- [ad | producer-1] org.apache.kafka.clients.Metadata : [Producer clientId=producer-1] Resetting the last seen epoch of partition reply_topic-0 to 0 since the associated topicId changed from null to yG01ZCsETiSbnu3SqUFKRg
2022-11-16 17:22:42.205 ERROR 1447 --- [ scheduling-1] o.s.integration.handler.LoggingHandler : org.springframework.messaging.MessageHandlingException: error occurred in message handler [bean 'outboundFlow.kafka:outbound-channel-adapter#0' for component 'outboundFlow.org.springframework.integration.config.ConsumerEndpointFactoryBean#0'; defined in: 'class path resource [com/spring/etl/worker/config/WorkerJobBeanConfig.class]'; from source: 'bean method outboundFlow']; nested exception is org.apache.kafka.common.errors.SerializationException: Can't serialize data [StepExecution: id=211, version=3, name=worker_step:partition1, status=COMPLETED, exitStatus=COMPLETED, readCount=0, filterCount=0, writeCount=0 readSkipCount=0, writeSkipCount=0, processSkipCount=0, commitCount=1, rollbackCount=0, exitDescription=] for topic [reply_topic], failedMessage=GenericMessage [payload=StepExecution: id=211, version=3, name=worker_step:partition1, status=COMPLETED, exitStatus=COMPLETED, readCount=0, filterCount=0, writeCount=0 readSkipCount=0, writeSkipCount=0, processSkipCount=0, commitCount=1, rollbackCount=0, exitDescription=, headers={sequenceNumber=0, sequenceSize=3, kafka_timestampType=CREATE_TIME, kafka_receivedTopic=request_topic, kafka_offset=0, acknowledgmentCallback=org.springframework.integration.kafka.inbound.KafkaMessageSource$KafkaAckCallback#50421a42, kafka_remainingRecords=0, kafka_consumer=org.apache.kafka.clients.consumer.KafkaConsumer#68612476, correlationId=94:worker_step, id=3c7d2b01-0275-6707-8524-7ecd64d255a4, kafka_receivedPartitionId=1, kafka_receivedTimestamp=1668599551751, kafka_acknowledgment=org.springframework.integration.kafka.inbound.KafkaMessageSource$KafkaAckCallback#50421a42, timestamp=1668599555039}]
at org.springframework.integration.support.utils.IntegrationUtils.wrapInHandlingExceptionIfNecessary(IntegrationUtils.java:191)
at org.springframework.integration.handler.AbstractMessageHandler.handleMessage(AbstractMessageHandler.java:65)
at org.springframework.integration.endpoint.PollingConsumer.handleMessage(PollingConsumer.java:158)
at org.springframework.integration.endpoint.AbstractPollingEndpoint.messageReceived(AbstractPollingEndpoint.java:475)
at org.springframework.integration.endpoint.AbstractPollingEndpoint.doPoll(AbstractPollingEndpoint.java:461)
at org.springframework.integration.endpoint.AbstractPollingEndpoint$$Lambda$624.0000000000000000.call(Unknown Source)
at org.springframework.integration.endpoint.AbstractPollingEndpoint.pollForMessage(AbstractPollingEndpoint.java:413)
at org.springframework.integration.endpoint.AbstractPollingEndpoint.lambda$createPoller$4(AbstractPollingEndpoint.java:348)
at org.springframework.integration.endpoint.AbstractPollingEndpoint$$Lambda$626.0000000000000000.run(Unknown Source)
at org.springframework.integration.util.ErrorHandlingTaskExecutor.lambda$execute$0(ErrorHandlingTaskExecutor.java:57)
at org.springframework.integration.util.ErrorHandlingTaskExecutor$$Lambda$627.0000000000000000.run(Unknown Source)
at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:50)
at org.springframework.integration.util.ErrorHandlingTaskExecutor.execute(ErrorHandlingTaskExecutor.java:55)
at org.springframework.integration.endpoint.AbstractPollingEndpoint.lambda$createPoller$5(AbstractPollingEndpoint.java:341)
at org.springframework.integration.endpoint.AbstractPollingEndpoint$$Lambda$625.0000000000000000.run(Unknown Source)
at org.springframework.scheduling.support.DelegatingErrorHandlingRunnable.run(DelegatingErrorHandlingRunnable.java:54)
at org.springframework.scheduling.concurrent.ReschedulingRunnable.run(ReschedulingRunnable.java:95)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:831)
Caused by: org.apache.kafka.common.errors.SerializationException: Can't serialize data [StepExecution: id=211, version=3, name=worker_step:partition1, status=COMPLETED, exitStatus=COMPLETED, readCount=0, filterCount=0, writeCount=0 readSkipCount=0, writeSkipCount=0, processSkipCount=0, commitCount=1, rollbackCount=0, exitDescription=] for topic [reply_topic]
at org.springframework.kafka.support.serializer.JsonSerializer.serialize(JsonSerializer.java:216)
at org.springframework.kafka.support.serializer.JsonSerializer.serialize(JsonSerializer.java:203)
at org.apache.kafka.clients.producer.KafkaProducer.doSend(KafkaProducer.java:954)
at org.apache.kafka.clients.producer.KafkaProducer.send(KafkaProducer.java:914)
at org.springframework.kafka.core.DefaultKafkaProducerFactory$CloseSafeProducer.send(DefaultKafkaProducerFactory.java:993)
at org.springframework.kafka.core.KafkaTemplate.doSend(KafkaTemplate.java:655)
at org.springframework.kafka.core.KafkaTemplate.send(KafkaTemplate.java:429)
at org.springframework.integration.kafka.outbound.KafkaProducerMessageHandler.handleRequestMessage(KafkaProducerMessageHandler.java:513)
at org.springframework.integration.handler.AbstractReplyProducingMessageHandler.handleMessageInternal(AbstractReplyProducingMessageHandler.java:136)
at org.springframework.integration.handler.AbstractMessageHandler.handleMessage(AbstractMessageHandler.java:56)
... 21 more
Caused by: com.fasterxml.jackson.databind.JsonMappingException: Infinite recursion (StackOverflowError) (through reference chain: org.springframework.batch.core.StepExecution["jobExecution"]-

With help of little googling, I needed to write a custom serialization for the worker job and a custom deserializer for the manager job.
Used the Stackoverflow question: Problem Serializing Spring batch Kafka ChunkRequest

Related

Spring Kafka Key serializer not working for object

I'm not being able to reproduce documentation or sample code in order to have a non String Key being serialized.
My goal is using the Key (field) to pass control actions alongside data.
Classes ControlChannel and SchedulerEntry are regular Pojo.
Environment is:
Java 11
Spring Boot 2.4.1
Kafka 2.6.0
Expected code to Serialize/Deserialize:
Listener and Template
#KafkaListener(topics = "Scheduler", groupId = "scheduler", containerFactory = "schedulerKafkaListenerContainerFactory")
public void listenForScheduler(
#Header(KafkaHeaders.RECEIVED_MESSAGE_KEY) ControlChannel control,
#Header(KafkaHeaders.RECEIVED_TIMESTAMP) long timestamp,
#Payload SchedulerEntry entry) {
log.info("received data KEY ='{}'", control);
log.info("received data PAYLOAD = '{}'", entry);
/* ... */
}
#Bean
public KafkaTemplate<ControlChannel, SchedulerEntry> schedulerKafkaTemplate() {
return new KafkaTemplate<>(schedulerProducerFactory());
}
**First Try - Consumer and Producer (Type Mapping and Trusted Packaged) **
#Bean
public ProducerFactory<ControlChannel, SchedulerEntry> schedulerProducerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(JsonSerializer.ADD_TYPE_INFO_HEADERS, false);
props.put(JsonSerializer.TYPE_MAPPINGS, "key:io.infolayer.aida.ControlChannel, value:io.infolayer.aida.entity.SchedulerEntry");
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
return new DefaultKafkaProducerFactory<>(props,
new JsonSerializer<ControlChannel>(),
new JsonSerializer<SchedulerEntry>());
}
public ConsumerFactory<ControlChannel, SchedulerEntry> consumerFactory(String groupId) {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(JsonDeserializer.REMOVE_TYPE_INFO_HEADERS, false);
props.put(JsonDeserializer.TRUSTED_PACKAGES, "*");
props.put(JsonDeserializer.TYPE_MAPPINGS, "key:io.infolayer.aida.ControlChannel, value:io.infolayer.aida.entity.SchedulerEntry");
JsonDeserializer<ControlChannel> k = new JsonDeserializer<ControlChannel>();
k.configure(props, true);
JsonDeserializer<SchedulerEntry> v = new JsonDeserializer<SchedulerEntry>();
k.configure(props, true);
return new DefaultKafkaConsumerFactory<>(props, k, v);
}
#Bean
public ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> schedulerKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory("scheduler"));
return factory;
}
Exception:
Caused by: org.apache.kafka.common.errors.SerializationException: Error deserializing key/value for partition Scheduler-0 at offset 25. If needed, please seek past the record to continue consumption.
Caused by: java.lang.IllegalStateException: No type information in headers and no default type provided
**Second Try - Consumer and Producer (Just setting Key serializer/deserializer as Json) **
#Bean
public ProducerFactory<ControlChannel, SchedulerEntry> schedulerProducerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
return new DefaultKafkaProducerFactory<>(props);
}
public ConsumerFactory<ControlChannel, SchedulerEntry> consumerFactory(String groupId) {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
return new DefaultKafkaConsumerFactory<>(props, new JsonDeserializer<>(ControlChannel.class), new JsonDeserializer<>(SchedulerEntry.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> schedulerKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory("scheduler"));
return factory;
}
Exception
org.springframework.kafka.KafkaException: Seek to current after exception; nested exception is org.springframework.kafka.listener.ListenerExecutionFailedException:
Listener method 'public void io.infolayer.aida.scheduler.KafkaSchedulerListener.listenForScheduler(io.infolayer.aida.ControlChannel,long,io.infolayer.aida.entity.SchedulerEntry)'
threw exception; nested exception is org.springframework.core.convert.ConverterNotFoundException:
No converter found capable of converting from type [io.infolayer.aida.entity.SchedulerEntry] to type [#org.springframework.messaging.handler.annotation.Header io.infolayer.aida.ControlChannel]; nested exception is org.springframework.core.convert.ConverterNotFoundException:
No converter found capable of converting from type [io.infolayer.aida.entity.SchedulerEntry] to type [#org.springframework.messaging.handler.annotation.Header io.infolayer.aida.ControlChannel]

There are several problems with your first attempt.
you need to call configure() on the serializers with add type info=true
you are calling configure() on k twice and not configuring v (deserializers)
This works as expected...
#SpringBootApplication
public class So65501295Application {
private static final Logger log = LoggerFactory.getLogger(So65501295Application.class);
public static void main(String[] args) {
SpringApplication.run(So65501295Application.class, args);
}
#Bean
public ProducerFactory<ControlChannel, SchedulerEntry> schedulerProducerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(JsonSerializer.ADD_TYPE_INFO_HEADERS, true);
props.put(JsonSerializer.TYPE_MAPPINGS,
"key:com.example.demo.So65501295Application.ControlChannel, "
+ "value:com.example.demo.So65501295Application.SchedulerEntry");
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
JsonSerializer<ControlChannel> k = new JsonSerializer<ControlChannel>();
k.configure(props, true);
JsonSerializer<SchedulerEntry> v = new JsonSerializer<SchedulerEntry>();
v.configure(props, false);
return new DefaultKafkaProducerFactory<>(props, k, v);
}
public ConsumerFactory<ControlChannel, SchedulerEntry> consumerFactory(String groupId) {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
props.put(JsonDeserializer.REMOVE_TYPE_INFO_HEADERS, false);
props.put(JsonDeserializer.TRUSTED_PACKAGES, "*");
props.put(JsonDeserializer.TYPE_MAPPINGS,
"key:com.example.demo.So65501295Application.ControlChannel, "
+ "value:com.example.demo.So65501295Application.SchedulerEntry");
JsonDeserializer<ControlChannel> k = new JsonDeserializer<ControlChannel>();
k.configure(props, true);
JsonDeserializer<SchedulerEntry> v = new JsonDeserializer<SchedulerEntry>();
v.configure(props, false);
return new DefaultKafkaConsumerFactory<>(props, k, v);
}
#KafkaListener(topics = "Scheduler", groupId = "scheduler", containerFactory = "schedulerKafkaListenerContainerFactory")
public void listenForScheduler(
#Header(KafkaHeaders.RECEIVED_MESSAGE_KEY) ControlChannel control,
#Header(KafkaHeaders.RECEIVED_TIMESTAMP) long timestamp,
#Payload SchedulerEntry entry) {
log.info("received data KEY ='{}'", control);
log.info("received data PAYLOAD = '{}'", entry);
/* ... */
}
#Bean
public KafkaTemplate<ControlChannel, SchedulerEntry> schedulerKafkaTemplate() {
return new KafkaTemplate<>(schedulerProducerFactory());
}
#Bean
public ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> schedulerKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory("scheduler"));
return factory;
}
#Bean
public ApplicationRunner runner(KafkaTemplate<ControlChannel, SchedulerEntry> template) {
return args -> {
template.send("Scheduler", new ControlChannel(), new SchedulerEntry());
};
}
#Bean
public NewTopic topic() {
return TopicBuilder.name("Scheduler").partitions(1).replicas(1).build();
}
public static class ControlChannel {
String foo;
public String getFoo() {
return this.foo;
}
public void setFoo(String foo) {
this.foo = foo;
}
}
public static class SchedulerEntry {
String foo;
public String getFoo() {
return this.foo;
}
public void setFoo(String foo) {
this.foo = foo;
}
}
}
2021-01-04 11:42:25.026 INFO 23905 --- [ntainer#0-0-C-1] com.example.demo.So65501295Application
: received data KEY ='com.example.demo.So65501295Application$ControlChannel#44a72886'
2021-01-04 11:42:25.026 INFO 23905 --- [ntainer#0-0-C-1] com.example.demo.So65501295Application
: received data PAYLOAD = 'com.example.demo.So65501295Application$SchedulerEntry#74461c59'

Spring Cloud Sleuth with Reactor Kafka

I'm using Reactor Kafka in a Spring Boot Reactive app, with Spring Cloud Sleuth for distributed tracing.
I've setup Sleuth to use a custom propagation key from a header named "traceId".
I've also customized the log format to print the header in my logs, so a request like
curl -H "traceId: 123456" -X POST http://localhost:8084/parallel
will print 123456 in every log anywhere downstream starting from the Controller.
I would now like this header to be propagated via Kafka too. I understand that Sleuth has built-in instrumentation for Kafka too, so the header should be propagated automatically, however I'm unable to get this to work.
From my Controller, I produce a message onto a Kafka topic, and then have another Kafka consumer pick it up for processing.
Here's my Controller:
#RestController
#RequestMapping("/parallel")
public class BasicController {
private Logger logger = Loggers.getLogger(BasicController.class);
KafkaProducerLoadGenerator generator = new KafkaProducerLoadGenerator();
#PostMapping
public Mono<ResponseEntity> createMessage() {
int data = (int)(Math.random()*100000);
return Flux.just(data)
.doOnNext(num -> logger.info("Generating document for {}", num))
.map(generator::generateDocument)
.flatMap(generator::sendMessage)
.doOnNext(result ->
logger.info("Sent message {}, offset is {} to partition {}",
result.getT2().correlationMetadata(),
result.getT2().recordMetadata().offset(),
result.getT2().recordMetadata().partition()))
.doOnError(error -> logger.error("Error in subscribe while sending message", error))
.single()
.map(tuple -> ResponseEntity.status(HttpStatus.OK).body(tuple.getT1()));
}
}
Here's the code that produces messages on to the Kafka topic
#Component
public class KafkaProducerLoadGenerator {
private static final Logger logger = Loggers.getLogger(KafkaProducerLoadGenerator.class);
private static final String bootstrapServers = "localhost:9092";
private static final String TOPIC = "load-topic";
private KafkaSender<Integer, String> sender;
private static int documentIndex = 0;
public KafkaProducerLoadGenerator() {
this(bootstrapServers);
}
public KafkaProducerLoadGenerator(String bootstrapServers) {
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
props.put(ProducerConfig.CLIENT_ID_CONFIG, "load-generator");
props.put(ProducerConfig.ACKS_CONFIG, "all");
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
SenderOptions<Integer, String> senderOptions = SenderOptions.create(props);
sender = KafkaSender.create(senderOptions);
}
#NewSpan("generator.sendMessage")
public Flux<Tuple2<DataDocument, SenderResult<Integer>>> sendMessage(DataDocument document) {
return sendMessage(TOPIC, document)
.map(result -> Tuples.of(document, result));
}
public Flux<SenderResult<Integer>> sendMessage(String topic, DataDocument document) {
ProducerRecord<Integer, String> producerRecord = new ProducerRecord<>(topic, document.getData(), document.toString());
return sender.send(Mono.just(SenderRecord.create(producerRecord, document.getData())))
.doOnNext(record -> logger.info("Sent message to partition={}, offset={} ", record.recordMetadata().partition(), record.recordMetadata().offset()))
.doOnError(e -> logger.error("Error sending message " + documentIndex, e));
}
public DataDocument generateDocument(int data) {
return DataDocument.builder()
.header("Load Data")
.data(data)
.traceId("trace"+data)
.timestamp(Instant.now())
.build();
}
}
My consumer looks like this:
#Component
#Scope(scopeName = ConfigurableBeanFactory.SCOPE_PROTOTYPE)
public class IndividualConsumer {
private static final Logger logger = Loggers.getLogger(IndividualConsumer.class);
private static final String bootstrapServers = "localhost:9092";
private static final String TOPIC = "load-topic";
private int consumerIndex = 0;
public ReceiverOptions setupConfig(String bootstrapServers) {
Map<String, Object> properties = new HashMap<>();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "load-topic-consumer-"+consumerIndex);
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "load-topic-multi-consumer-2");
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, DataDocumentDeserializer.class);
return ReceiverOptions.create(properties);
}
public void setIndex(int i) {
consumerIndex = i;
}
#EventListener(ApplicationReadyEvent.class)
public Disposable consumeMessage() {
ReceiverOptions<Integer, DataDocument> receiverOptions = setupConfig(bootstrapServers)
.subscription(Collections.singleton(TOPIC))
.addAssignListener(receiverPartitions -> logger.debug("onPartitionsAssigned {}", receiverPartitions))
.addRevokeListener(receiverPartitions -> logger.debug("onPartitionsRevoked {}", receiverPartitions));
Flux<ReceiverRecord<Integer, DataDocument>> messages = Flux.defer(() -> {
KafkaReceiver<Integer, DataDocument> receiver = KafkaReceiver.create(receiverOptions);
return receiver.receive();
});
Consumer<? super ReceiverRecord<Integer, DataDocument>> acknowledgeOffset = record -> record.receiverOffset().acknowledge();
return messages
.publishOn(Schedulers.newSingle("Parallel-Consumer"))
.doOnError(error -> logger.error("Error in the reactive chain", error))
.delayElements(Duration.ofMillis(100))
.doOnNext(record -> {
logger.info("Consumer {}: Received from partition {}, offset {}, data with index {}",
consumerIndex,
record.receiverOffset().topicPartition(),
record.receiverOffset().offset(),
record.value().getData());
})
.doOnNext(acknowledgeOffset)
.doOnError(error -> logger.error("Error receiving record", error))
.retryBackoff(100, Duration.ofSeconds(5), Duration.ofMinutes(5))
.subscribe();
}
}
I would expect Sleuth to automatically carry over the built-in Brave trace and the custom headers to the consumer, so that the trace covers the entire transaction.
However I have two problems.
The generator bean doesn't get the same trace as the one in the Controller. It uses a different (and new) trace for every message sent.
The trace isn't propagated from Kafka producer to Kafka consumer.
I can resolve #1 above by replacing the generator bean with a simple Java class and instantiating it in the controller. However that means I can't autowire other dependencies, and in any case it doesn't solve #2.
I am able to load an instance of the bean brave.kafka.clients.KafkaTracing so I know it's being loaded by Spring. However, it doesn't look the instrumentation is working. I inspected the content on Kafka using Kafka Tool, and no headers are populated on any message.
In fact the consumer doesn't have a trace at all.
2020-05-06 23:57:32.898 INFO parallel-consumer:local [123-21922,578c510e23567aec,578c510e23567aec] 8180 --- [reactor-http-nio-3] rja.parallelconsumers.BasicController : Generating document for 23965
2020-05-06 23:57:32.907 INFO parallel-consumer:local [52e02d36b59c5acd,52e02d36b59c5acd,52e02d36b59c5acd] 8180 --- [single-11] r.p.kafka.KafkaProducerLoadGenerator : Sent message to partition=17, offset=0
2020-05-06 23:57:32.908 INFO parallel-consumer:local [123-21922,578c510e23567aec,578c510e23567aec] 8180 --- [single-11] rja.parallelconsumers.BasicController : Sent message 23965, offset is 0 to partition 17
2020-05-06 23:57:33.012 INFO parallel-consumer:local [-,-,-] 8180 --- [parallel-5] r.parallelconsumers.IndividualConsumer : Consumer 8: Received from partition load-topic-17, offset 0, data with index 23965
In the log above, [123-21922,578c510e23567aec,578c510e23567aec] is [custom-trace-header, brave traceId, brave spanId]
What am I missing?

Transactional Kafka Producer

I am trying to make make my kafka producer transactional.
I am sending 10 messages .If any error occurs no message should be sent to kafka i.e none or all.
I am using Spring Boot KafkaTemplate.
#Configuration
#EnableKafka
public class KakfaConfiguration {
#Bean
public ProducerFactory<String, String> producerFactory() {
Map<String, Object> config = new HashMap<>();
// props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
// props.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG,
// appProps.getJksLocation());
// props.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG,
// appProps.getJksPassword());
config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
config.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
config.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
config.put(ProducerConfig.ACKS_CONFIG, acks);
config.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG, retryBackOffMsConfig);
config.put(ProducerConfig.RETRIES_CONFIG, retries);
config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true);
config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, "prod-99");
return new DefaultKafkaProducerFactory<>(config);
}
#Bean
public KafkaTemplate<String, String> kafkaTemplate() {
return new KafkaTemplate<>(producerFactory());
}
#Bean(name = "ktm")
public KafkaTransactionManager kafkaTransactionManager() {
KafkaTransactionManager ktm = new KafkaTransactionManager(producerFactory());
ktm.setTransactionSynchronization(AbstractPlatformTransactionManager.SYNCHRONIZATION_ON_ACTUAL_TRANSACTION);
return ktm;
}
}
I am sending 10 messages like below as mentioned in the document. 9 messages should be sent and I message has size over 1MB which gets rejected by Kafka broker due to RecordTooLargeException
https://docs.spring.io/spring-kafka/reference/html/#using-kafkatransactionmanager
#Component
#EnableTransactionManagement
class Sender {
#Autowired
private KafkaTemplate<String, String> template;
private static final Logger LOG = LoggerFactory.getLogger(Sender.class);
#Transactional("ktm")
public void sendThem(List<String> toSend) throws InterruptedException {
List<ListenableFuture<SendResult<String, String>>> futures = new ArrayList<>();
CountDownLatch latch = new CountDownLatch(toSend.size());
ListenableFutureCallback<SendResult<String, String>> callback = new ListenableFutureCallback<SendResult<String, String>>() {
#Override
public void onSuccess(SendResult<String, String> result) {
LOG.info(" message sucess : " + result.getProducerRecord().value());
latch.countDown();
}
#Override
public void onFailure(Throwable ex) {
LOG.error("Message Failed ");
latch.countDown();
}
};
toSend.forEach(str -> {
ListenableFuture<SendResult<String, String>> future = template.send("t_101", str);
future.addCallback(callback);
});
if (latch.await(12, TimeUnit.MINUTES)) {
LOG.info("All sent ok");
} else {
for (int i = 0; i < toSend.size(); i++) {
if (!futures.get(i).isDone()) {
LOG.error("No send result for " + toSend.get(i));
}
}
}
But when I see the topic t_hello_world 9 messages are there. My expectation was to see 0 messages as my producer is transactional.
How can I achieve it?
I am getting the following logs
2020-04-30 18:04:36.036 ERROR 18688 --- [ scheduling-1] o.s.k.core.DefaultKafkaProducerFactory : commitTransaction failed: CloseSafeProducer [delegate=org.apache.kafka.clients.producer.KafkaProducer#1eb5a312, txId=prod-990]
org.apache.kafka.common.KafkaException: Cannot execute transactional method because we are in an error state
at org.apache.kafka.clients.producer.internals.TransactionManager.maybeFailWithError(TransactionManager.java:923) ~[kafka-clients-2.4.1.jar:na]
at org.apache.kafka.clients.producer.internals.TransactionManager.lambda$beginCommit$2(TransactionManager.java:297) ~[kafka-clients-2.4.1.jar:na]
at org.apache.kafka.clients.producer.internals.TransactionManager.handleCachedTransactionRequestResult(TransactionManager.java:1013) ~[kafka-clients-2.4.1.jar:na]
at org.apache.kafka.clients.producer.internals.TransactionManager.beginCommit(TransactionManager.java:296) ~[kafka-clients-2.4.1.jar:na]
at org.apache.kafka.clients.producer.KafkaProducer.commitTransaction(KafkaProducer.java:713) ~[kafka-clients-2.4.1.jar:na]
at org.springframework.kafka.core.DefaultKafkaProducerFactory$CloseSafeProducer.commitTransaction(DefaultKafkaProducerFactory.java
Caused by: org.apache.kafka.common.errors.RecordTooLargeException: The request included a message larger than the max message size the server will accept.
2020-04-30 18:04:36.037 WARN 18688 --- [ scheduling-1] o.s.k.core.DefaultKafkaProducerFactory : Error during transactional operation; producer removed from cache; possible cause: broker restarted during transaction: CloseSafeProducer [delegate=org.apache.kafka.clients.producer.KafkaProducer#1eb5a312, txId=prod-990]
2020-04-30 18:04:36.038 INFO 18688 --- [ scheduling-1] o.a.k.clients.producer.KafkaProducer : [Producer clientId=producer-prod-990, transactionalId=prod-990] Closing the Kafka producer with timeoutMillis = 5000 **ms.
2020-04-30 18:04:36.038 INFO 18688 --- [oducer-prod-990] o.a.k.clients.producer.internals.Sender : [Producer clientId=producer-prod-990, transactionalId=prod-990] Aborting incomplete transaction due to shutdown**

Uncommitted records are written to the log; when a transaction commits or rolls back, an extra record is written to the log with the state of the transaction.
Consumers, by default, see all records, including the uncommitted records (but not the special commit/abort record).
For the console consumer, you need to set the isolation level to read_committed. See the help:
--isolation-level <String> Set to read_committed in order to
filter out transactional messages
which are not committed. Set to
read_uncommitted to read all
messages. (default: read_uncommitted)

If I provide below configurations in yml file will I need to create factory, template and tx bean as given in the example code ?
for the given tx example if I use simple Consumer ( java code) or Kafka Tools will I able to view any record for the above Tx example - hope fully not - Am I correct as per Tx example.
spring:
profiles: local
kafka:
producer:
client-id: book-event-producer-client
bootstrap-servers: localhost:9092,localhost:9093,localhost:9094
key-serializer: org.apache.kafka.common.serialization.IntegerSerializer
value-serializer: org.apache.kafka.common.serialization.StringSerializer
**transaction-id-prefix: tx-${random.uuid}**
properties:
**enable.idempotence: true**
**acks: all**
retries: 2
metadata.max.idle.ms: 10000

Kafka consumer poll with offset 0 not returning message

I am using spring-kafka to poll message, when I use the annotation for the consumer and set offset to 0 it will see all messages from the earliest. But when I try to use a injected ConsumerFactory to create consumer on my own, then poll will only return a few message or no message at all. Is there some other config I need in order to be able to pull message? The poll timeout is already set to 10 seconds.
#Component
public class GenericConsumer {
private static final Logger logger = LoggerFactory.getLogger(GenericConsumer.class);
#Autowired
ConsumerFactory<String, Record> consumerFactory;
public ConsumerRecords<String, Record> poll(String topic, String group){
logger.info("---------- Polling kafka recrods from topic " + topic + " group" + group);
Consumer<String, Record> consumer = consumerFactory.createConsumer(group, "");
consumer.subscribe(Arrays.asList(topic));
// need to make a dummy poll before we can seek
consumer.poll(1000);
consumer.seekToBeginning(consumer.assignment());
ConsumerRecords<String, Record> records;
records = consumer.poll(10000);
logger.info("------------ Total " + records.count() + " records polled");
consumer.close();
return records;
}
}

It works fine for me, this was with boot 2.0.5, Spring Kafka 2.1.10 ...
#SpringBootApplication
public class So52284259Application implements ConsumerAwareRebalanceListener {
private static final Logger logger = LoggerFactory.getLogger(So52284259Application.class);
public static void main(String[] args) {
SpringApplication.run(So52284259Application.class, args);
}
#Bean
public ApplicationRunner runner(KafkaTemplate<String, String> template, GenericConsumer consumer) {
return args -> {
// for (int i = 0; i < 1000; i++) { // load up the topic on first run
// template.send("so52284259", "foo" + i);
// }
consumer.poll("so52284259", "generic");
};
}
#KafkaListener(id = "listener", topics = "so52284259")
public void listen(String in) {
if ("foo999".equals(in)) {
logger.info("#KafkaListener: " + in);
}
}
#Override
public void onPartitionsAssigned(Consumer<?, ?> consumer, Collection<TopicPartition> partitions) {
consumer.seekToBeginning(partitions);
}
#Bean
public NewTopic topic() {
return new NewTopic("so52284259", 1, (short) 1);
}
}
#Component
class GenericConsumer {
private static final Logger logger = LoggerFactory.getLogger(GenericConsumer.class);
#Autowired
ConsumerFactory<String, String> consumerFactory;
public void poll(String topic, String group) {
logger.info("---------- Polling kafka recrods from topic " + topic + " group" + group);
Consumer<String, String> consumer = consumerFactory.createConsumer(group, "");
consumer.subscribe(Arrays.asList(topic));
// need to make a dummy poll before we can seek
consumer.poll(1000);
consumer.seekToBeginning(consumer.assignment());
ConsumerRecords<String, String> records;
boolean done = false;
while (!done) {
records = consumer.poll(10000);
logger.info("------------ Total " + records.count() + " records polled");
Iterator<ConsumerRecord<String, String>> iterator = records.iterator();
while (iterator.hasNext()) {
String value = iterator.next().value();
if ("foo999".equals(value)) {
logger.info("Consumer: " + value);
done = true;
}
}
}
consumer.close();
}
}
and
2018-09-12 09:35:25.929 INFO 61390 --- [ main] com.example.GenericConsumer : ------------ Total 500 records polled
2018-09-12 09:35:25.931 INFO 61390 --- [ main] com.example.GenericConsumer : ------------ Total 500 records polled
2018-09-12 09:35:25.932 INFO 61390 --- [ main] com.example.GenericConsumer : Consumer: foo999
2018-09-12 09:35:25.942 INFO 61390 --- [ listener-0-C-1] com.example.So52284259Application : #KafkaListener: foo999

Spring batch partitioning is not working

I am using Spring Batch Partitioning to merge data from group of related flat files to a single file. The batch is failing with below two issues:
First slave step thread is failing as the data to file writer is written before it is opened. The value for variable inputFileNames (step context data provided by partitioner) for this thread is[20002", 20003]
Second slave step thread is failing as the partitioning data is missing from the step context. The value for variable inputFileNames for this thread is null
Please let me know if I am missing some thing in the configuration.
// log with Error info
2015-12-26 17:59:14,165 DEBUG [SimpleAsyncTaskExecutor-1] c.d.d.b.r.ReaderConfiguration [ReaderBatchConfiguration.java:473] inputFileNames ----[20002", 20003]
2015-12-26 17:59:14,165 DEBUG [SimpleAsyncTaskExecutor-1] c.d.d.b.r.BatchConfiguration [BatchConfiguration.java:389] consumer ----p2
2015-12-26 17:59:14,275 ERROR [SimpleAsyncTaskExecutor-1] o.s.b.c.s.AbstractStep [AbstractStep.java:225] Encountered an error executing step testConsumersInputFileMergeStep in job testFileForInputJob
org.springframework.batch.item.WriterNotOpenException: Writer must be open before it can be written to
at org.springframework.batch.item.file.FlatFileItemWriter.write(FlatFileItemWriter.java:255) ~[spring-batch-infrastructure-3.0.3.RELEASE.jar:3.0.3.RELEASE]
2015-12-26 18:00:14,421 DEBUG [SimpleAsyncTaskExecutor-2] c.d.d.b.r.ReaderBatchConfiguration [ReaderConfiguration.java:474] inputFileNames ----null
// Partitioner
public class ProvisioningInputFilePartitioner implements Partitioner {
#Override
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> filesToProcess = getFilesToProcess(outboundSourceFolder);
Map<String, ExecutionContext> execCtxs = new HashMap<>();
for(Entry<String, ExecutionContext> entry : filesToProcess.entrySet()) {
execCtxs.put(entry.getKey(), entry.getValue());
}
return execCtxs;
}
private Map<String, ExecutionContext> getFilesToProcess(String outboundSourceFolder2) {
Map<String, ExecutionContext> contexts = new HashMap<>();
ExecutionContext execCtx1 = new ExecutionContext();
List<String> inputFileNames1 = Arrays.asList("20001", "22222");
execCtx1.put("consumer", "p1");
execCtx1.put("inputFileNames", inputFileNames1);
contexts.put("p1", execCtx1);
ExecutionContext execCtx2 = new ExecutionContext();
List<String> inputFileNames2 = Arrays.asList("20002", "20003");
execCtx1.put("consumer", "p2");
execCtx1.put("inputFileNames", inputFileNames2);
contexts.put("p2", execCtx2);
return contexts;
}
}
// Writer
#Bean
#StepScope
public ItemWriter<String> testConsumerFileItemWriter (#Value("#{stepExecutionContext[consumer]}") String consumer){
logger.debug("consumer ----"+ consumer);
FileSystemResource fileSystemResource = new FileSystemResource(new File(outboundSourceFolder, consumer + ".txt"));
FlatFileItemWriter<String> fileItemWriter = new FlatFileItemWriter<>();
fileItemWriter.setResource(fileSystemResource);
fileItemWriter.setLineAggregator(new PassThroughLineAggregator<String>());
return fileItemWriter;
}
#Bean
public Partitioner provisioningInputFilePartitioner() {
return new ProvisioningInputFilePartitioner();
}
#Bean
public TaskExecutor taskExecutor() {
return new SimpleAsyncTaskExecutor();
}
// Reader
#Bean
#StepScope
public ItemReader<String> testInputFilesReader (#Value("#{stepExecutionContext[inputFileNames]}") List<String> inputFileNames) {
logger.debug("inputFileNames ----" + inputFileNames);
MultiResourceItemReader<String> multiResourceItemReader = new MultiResourceItemReader<String>();
...
return multiResourceItemReader;
}
// slave step
#Bean
public Step testConsumersInputFileMergeStep(StepBuilderFactory stepBuilder, ItemReader<String> testInputFilesReader,
ItemWriter<String> testConsumerFileItemWriter){
return stepBuilder.get("testConsumersInputFileMergeStep").<String, String>chunk(1).reader(testInputFilesReader)
.writer(testConsumerFileItemWriter).build();
}
// master step
#Bean
public Step testConsumersFilePartitionerStep(StepBuilderFactory stepBuilder, Step testConsumersInputFileMergeStep, Partitioner provisioningInputFilePartitioner,
TaskExecutor taskExecutor ){
return stepBuilder.get("testConsumersFilePartitionerStep").partitioner(testConsumersInputFileMergeStep)
.partitioner("testConsumersInputFileMergeStep", provisioningInputFilePartitioner)
.taskExecutor(taskExecutor)
.build();
}
//Job
#Bean
public Job testFileForInputJob(JobBuilderFactory factory, Step testFileForInputStep, Step testConsumersFilePartitionerStep) {
return factory.get("testFileForInputJob").incrementer(new RunIdIncrementer()).start(testConsumersFilePartitionerStep).build();
}

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Spring Batch remote-partitioning worker-job not reading data from middleware - spring-batch

With help of little googling, I needed to write a custom serialization for the worker job and a custom deserializer for the manager job. Used the Stackoverflow question: Problem Serializing Spring batch Kafka ChunkRequest

Related

Spring Kafka Key serializer not working for object

Spring Cloud Sleuth with Reactor Kafka

Transactional Kafka Producer

Kafka consumer poll with offset 0 not returning message

Spring batch partitioning is not working

Categories

Resources