Spring Batch - Job executions launching with same Task execution id - spring-batch

I'm using Spring Batch with Spring Cloud Task for remote partitioning. But for each new Job execution it is created with the same task execution id. Is there any way to create a new task execution id for new job execution?
In the following Task Execution table, each job is running with same parent execution id.
For each new job execution it is starting within the same task execution. The code for Batch configuration is as follows:
#Bean
public PartitionHandler partitionHandler(TaskLauncher taskLauncher, JobExplorer jobExplorer, Environment environment, DelegatingResourceLoader delegatingResourceLoader, TaskRepository taskRepository) {
Resource resource = delegatingResourceLoader.getResource(jarLocation);
DeployerPartitionHandler partitionHandler = new DeployerPartitionHandler(taskLauncher, jobExplorer, resource, "workerStep", taskRepository);
List<String> commandLineArguments = new ArrayList<>(5);
commandLineArguments.add("--spring.profiles.active=worker");
commandLineArguments.add("--spring.cloud.task.initialize.enable=false");
commandLineArguments.add("--spring.batch.initializer.enabled=false");
commandLineArguments.add("--spring.cloud.task.closecontext_enabled=true");
commandLineArguments.add("--logging.level=DEBUG");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArguments));
partitionHandler.setEnvironmentVariablesProvider(new SimpleEnvironmentVariablesProvider(environment));
partitionHandler.setMaxWorkers(2);
partitionHandler.setApplicationName("BatchApplicationWorker");
return partitionHandler;
}
#Bean
#StepScope
public Partitioner partitioner(#Value("#{jobParameters['inputFiles']}") String file, #Value("#{jobParameters['partitionSize']}") String partitionSize1){
int partitionSize = Integer.parseInt(partitionSize1);
return new Partitioner() {
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> partitions = new HashMap<>();
String[] ids = fetchAllPrimaryKeys(file);
List<List<String>> partitionPayloads = splitPayLoad(ids, partitionSize);
int size = partitionPayloads.size();
for(int i = 0 ; i < size ; i++) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.put("partitionNumber", i);
executionContext.put("partitionPayLoad", new ArrayList<>(partitionPayloads.get(i)));
partitions.put("partition" + i, executionContext);
}
return partitions;
}
};
}
#Bean
public Step masterStep(Step workerStep, PartitionHandler partitionHandler) {
return this.stepBuilderFactory.get("masterStep")
.partitioner(workerStep.getName(), partitioner(null, null))
.step(workerStep)
.partitionHandler(partitionHandler)
.build();
}
#Bean
public Step workerStep(CustomWriter customWriter, CustomProcessor customProcessor) {
return this.stepBuilderFactory.get("workerStep")
.<User,User>chunk(10000)
.reader(reader(null))
.processor(customProcessor)
.writer(customWriter)
.build();
}
#Bean
public Job batchJob(Step masterStep, JobExecutionListnerClass jobExecutionListnerClass, JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("batchJob")
.incrementer(new RunIdIncrementer())
.start(masterStep)
.listener(jobExecutionListnerClass)
.build();
public Long jobRunner(JobParams jobParams) throws BatchException {
Map<String, JobParameter> maps = new HashMap<>();
maps.put(Constants.TIME, new JobParameter(System.currentTimeMillis()));
maps.put(Constants.INPUT_FILES, new JobParameter(jobParams.getInputSource()));
maps.put(Constants.PARTITION_SIZE, new JobParameter(Integer.toString(jobParams.getPartitionSize())));
maps.put(Constants.MAIL_RECIPIENTS, new JobParameter(jobParams.getMailRecipients()));
maps.put(Constants.JOB_NAME, new JobParameter(jobParams.getJobName()));
maps.put(Constants.JOB_DESCRIPTION, new JobParameter(jobParams.getJobDescription()));
maps.put(Constants.JOB_RESTART, new JobParameter(Boolean.toString(jobParams.getRestart())));
JobParameters jobParameters = new JobParameters(maps);
JobExecution jobExecution;
try {
jobExecution = jobLauncher.run(job, jobParameters);
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobInstanceAlreadyCompleteException
| JobParametersInvalidException e) {
throw new BatchException(e.getMessage());
}
return jobExecution.getId();
}

Related

Spring Kafka Key serializer not working for object

I'm not being able to reproduce documentation or sample code in order to have a non String Key being serialized.
My goal is using the Key (field) to pass control actions alongside data.
Classes ControlChannel and SchedulerEntry are regular Pojo.
Environment is:
Java 11
Spring Boot 2.4.1
Kafka 2.6.0
Expected code to Serialize/Deserialize:
Listener and Template
#KafkaListener(topics = "Scheduler", groupId = "scheduler", containerFactory = "schedulerKafkaListenerContainerFactory")
public void listenForScheduler(
#Header(KafkaHeaders.RECEIVED_MESSAGE_KEY) ControlChannel control,
#Header(KafkaHeaders.RECEIVED_TIMESTAMP) long timestamp,
#Payload SchedulerEntry entry) {
log.info("received data KEY ='{}'", control);
log.info("received data PAYLOAD = '{}'", entry);
/* ... */
}
#Bean
public KafkaTemplate<ControlChannel, SchedulerEntry> schedulerKafkaTemplate() {
return new KafkaTemplate<>(schedulerProducerFactory());
}
**First Try - Consumer and Producer (Type Mapping and Trusted Packaged) **
#Bean
public ProducerFactory<ControlChannel, SchedulerEntry> schedulerProducerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(JsonSerializer.ADD_TYPE_INFO_HEADERS, false);
props.put(JsonSerializer.TYPE_MAPPINGS, "key:io.infolayer.aida.ControlChannel, value:io.infolayer.aida.entity.SchedulerEntry");
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
return new DefaultKafkaProducerFactory<>(props,
new JsonSerializer<ControlChannel>(),
new JsonSerializer<SchedulerEntry>());
}
public ConsumerFactory<ControlChannel, SchedulerEntry> consumerFactory(String groupId) {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(JsonDeserializer.REMOVE_TYPE_INFO_HEADERS, false);
props.put(JsonDeserializer.TRUSTED_PACKAGES, "*");
props.put(JsonDeserializer.TYPE_MAPPINGS, "key:io.infolayer.aida.ControlChannel, value:io.infolayer.aida.entity.SchedulerEntry");
JsonDeserializer<ControlChannel> k = new JsonDeserializer<ControlChannel>();
k.configure(props, true);
JsonDeserializer<SchedulerEntry> v = new JsonDeserializer<SchedulerEntry>();
k.configure(props, true);
return new DefaultKafkaConsumerFactory<>(props, k, v);
}
#Bean
public ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> schedulerKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory("scheduler"));
return factory;
}
Exception:
Caused by: org.apache.kafka.common.errors.SerializationException: Error deserializing key/value for partition Scheduler-0 at offset 25. If needed, please seek past the record to continue consumption.
Caused by: java.lang.IllegalStateException: No type information in headers and no default type provided
**Second Try - Consumer and Producer (Just setting Key serializer/deserializer as Json) **
#Bean
public ProducerFactory<ControlChannel, SchedulerEntry> schedulerProducerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
return new DefaultKafkaProducerFactory<>(props);
}
public ConsumerFactory<ControlChannel, SchedulerEntry> consumerFactory(String groupId) {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapAddress);
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
return new DefaultKafkaConsumerFactory<>(props, new JsonDeserializer<>(ControlChannel.class), new JsonDeserializer<>(SchedulerEntry.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> schedulerKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory("scheduler"));
return factory;
}
Exception
org.springframework.kafka.KafkaException: Seek to current after exception; nested exception is org.springframework.kafka.listener.ListenerExecutionFailedException:
Listener method 'public void io.infolayer.aida.scheduler.KafkaSchedulerListener.listenForScheduler(io.infolayer.aida.ControlChannel,long,io.infolayer.aida.entity.SchedulerEntry)'
threw exception; nested exception is org.springframework.core.convert.ConverterNotFoundException:
No converter found capable of converting from type [io.infolayer.aida.entity.SchedulerEntry] to type [#org.springframework.messaging.handler.annotation.Header io.infolayer.aida.ControlChannel]; nested exception is org.springframework.core.convert.ConverterNotFoundException:
No converter found capable of converting from type [io.infolayer.aida.entity.SchedulerEntry] to type [#org.springframework.messaging.handler.annotation.Header io.infolayer.aida.ControlChannel]
There are several problems with your first attempt.
you need to call configure() on the serializers with add type info=true
you are calling configure() on k twice and not configuring v (deserializers)
This works as expected...
#SpringBootApplication
public class So65501295Application {
private static final Logger log = LoggerFactory.getLogger(So65501295Application.class);
public static void main(String[] args) {
SpringApplication.run(So65501295Application.class, args);
}
#Bean
public ProducerFactory<ControlChannel, SchedulerEntry> schedulerProducerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(JsonSerializer.ADD_TYPE_INFO_HEADERS, true);
props.put(JsonSerializer.TYPE_MAPPINGS,
"key:com.example.demo.So65501295Application.ControlChannel, "
+ "value:com.example.demo.So65501295Application.SchedulerEntry");
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
JsonSerializer<ControlChannel> k = new JsonSerializer<ControlChannel>();
k.configure(props, true);
JsonSerializer<SchedulerEntry> v = new JsonSerializer<SchedulerEntry>();
v.configure(props, false);
return new DefaultKafkaProducerFactory<>(props, k, v);
}
public ConsumerFactory<ControlChannel, SchedulerEntry> consumerFactory(String groupId) {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
props.put(JsonDeserializer.REMOVE_TYPE_INFO_HEADERS, false);
props.put(JsonDeserializer.TRUSTED_PACKAGES, "*");
props.put(JsonDeserializer.TYPE_MAPPINGS,
"key:com.example.demo.So65501295Application.ControlChannel, "
+ "value:com.example.demo.So65501295Application.SchedulerEntry");
JsonDeserializer<ControlChannel> k = new JsonDeserializer<ControlChannel>();
k.configure(props, true);
JsonDeserializer<SchedulerEntry> v = new JsonDeserializer<SchedulerEntry>();
v.configure(props, false);
return new DefaultKafkaConsumerFactory<>(props, k, v);
}
#KafkaListener(topics = "Scheduler", groupId = "scheduler", containerFactory = "schedulerKafkaListenerContainerFactory")
public void listenForScheduler(
#Header(KafkaHeaders.RECEIVED_MESSAGE_KEY) ControlChannel control,
#Header(KafkaHeaders.RECEIVED_TIMESTAMP) long timestamp,
#Payload SchedulerEntry entry) {
log.info("received data KEY ='{}'", control);
log.info("received data PAYLOAD = '{}'", entry);
/* ... */
}
#Bean
public KafkaTemplate<ControlChannel, SchedulerEntry> schedulerKafkaTemplate() {
return new KafkaTemplate<>(schedulerProducerFactory());
}
#Bean
public ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> schedulerKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<ControlChannel, SchedulerEntry> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory("scheduler"));
return factory;
}
#Bean
public ApplicationRunner runner(KafkaTemplate<ControlChannel, SchedulerEntry> template) {
return args -> {
template.send("Scheduler", new ControlChannel(), new SchedulerEntry());
};
}
#Bean
public NewTopic topic() {
return TopicBuilder.name("Scheduler").partitions(1).replicas(1).build();
}
public static class ControlChannel {
String foo;
public String getFoo() {
return this.foo;
}
public void setFoo(String foo) {
this.foo = foo;
}
}
public static class SchedulerEntry {
String foo;
public String getFoo() {
return this.foo;
}
public void setFoo(String foo) {
this.foo = foo;
}
}
}
2021-01-04 11:42:25.026 INFO 23905 --- [ntainer#0-0-C-1] com.example.demo.So65501295Application
: received data KEY ='com.example.demo.So65501295Application$ControlChannel#44a72886'
2021-01-04 11:42:25.026 INFO 23905 --- [ntainer#0-0-C-1] com.example.demo.So65501295Application
: received data PAYLOAD = 'com.example.demo.So65501295Application$SchedulerEntry#74461c59'

How to commit the offsets when using KafkaItemReader in spring batch job, once all the messages are processed and written to the .dat file?

I have developed a Spring Batch Job which read from Kafka topic using KafkaItemReader class. I want to commit the offset only when the messages read in defined chunk are Processed and written successfully to an Output .dat file.
#Bean
public Job kafkaEventReformatjob(
#Qualifier("MaintStep") Step MainStep,
#Qualifier("moveFileToFolder") Step moveFileToFolder,
#Qualifier("compressFile") Step compressFile,
JobExecutionListener listener)
{
return jobBuilderFactory.get("kafkaEventReformatJob")
.listener(listener)
.incrementer(new RunIdIncrementer())
.flow(MainStep)
.next(moveFileToFolder)
.next(compressFile)
.end()
.build();
}
#Bean
Step MainStep(
ItemProcessor<IncomingRecord, List<Record>> flatFileItemProcessor,
ItemWriter<List<Record>> flatFileWriter)
{
return stepBuilderFactory.get("mainStep")
.<InputRecord, List<Record>> chunk(5000)
.reader(kafkaItemReader())
.processor(flatFileItemProcessor)
.writer(writer())
.listener(basicStepListener)
.build();
}
//Reader reads all the messages from akfka topic and sending back in form of IncomingRecord.
#Bean
KafkaItemReader<String, IncomingRecord> kafkaItemReader() {
Properties props = new Properties();
props.putAll(this.properties.buildConsumerProperties());
List<Integer> partitions = new ArrayList<>();
partitions.add(0);
partitions.add(1);
return new KafkaItemReaderBuilder<String, IncomingRecord>()
.partitions(partitions)
.consumerProperties(props)
.name("records")
.saveState(true)
.topic(topic)
.pollTimeout(Duration.ofSeconds(40L))
.build();
}
#Bean
public ItemWriter<List<Record>> writer() {
ListUnpackingItemWriter<Record> listUnpackingItemWriter = new ListUnpackingItemWriter<>();
listUnpackingItemWriter.setDelegate(flatWriter());
return listUnpackingItemWriter;
}
public ItemWriter<Record> flatWriter() {
FlatFileItemWriter<Record> fileWriter = new FlatFileItemWriter<>();
String tempFileName = "abc";
LOGGER.info("Output File name " + tempFileName + " is in working directory ");
String workingDir = service.getWorkingDir().toAbsolutePath().toString();
Path outputFile = Paths.get(workingDir, tempFileName);
fileWriter.setName("fileWriter");
fileWriter.setResource(new FileSystemResource(outputFile.toString()));
fileWriter.setLineAggregator(lineAggregator());
fileWriter.setForceSync(true);
fileWriter.setFooterCallback(customFooterCallback());
fileWriter.close();
LOGGER.info("Successfully created the file writer");
return fileWriter;
}
#StepScope
#Bean
public TransformProcessor processor() {
return new TransformProcessor();
}
==============================================================================
Writer Class
#BeforeStep
public void beforeStep(StepExecution stepExecution) {
this.stepExecution = stepExecution;
}
#AfterStep
public void afterStep(StepExecution stepExecution) {
this.stepExecution.setWriteCount(count);
}
#Override
public void write(final List<? extends List<Record>> lists) throws Exception {
List<Record> consolidatedList = new ArrayList<>();
for (List<Record> list : lists) {
if (!list.isEmpty() && null != list)
consolidatedList.addAll(list);
}
delegate.write(consolidatedList);
count += consolidatedList.size(); // to count Trailer record count
}
===============================================================
Item Processor
#Override
public List process(IncomingRecord record) {
List<Record> recordList = new ArrayList<>();
if (null != record.getEventName() and a few other conditions inside this section) {
// setting values of Record Class by extracting from the IncomingRecord.
recordList.add(the valid records which matching the condition);
}else{
return null;
}
Synchronizing a read operation and a write operation between two transactional resources (a queue and a database for instance)
is possible by using a JTA transaction manager that coordinates both transaction managers (2PC protocol).
However, this approach is not possible if one of the resources is not transactional (like the majority of file systems). So unless you use
a transactional file system and a JTA transaction manager that coordinates a kafka transaction manager and a file system transaction manager..
you need another approach, like the Compensating Transaction pattern. In your case, the "undo" operation (compensating action) would be rewinding the offset where it was before the failed chunk.

Spring kafka do not retry not committed offsets

How can i stop spring kafka do not retry not readed messages from topic. For example is i kill application and then restart it my consumer is starting consuming not consumed messages. How can i prevent it?
#Bean
public ConsumerFactory<String, String> manualConsumerFactory() {
Map<String, Object> configs = consumerConfigs();
configs.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
configs.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
return new DefaultKafkaConsumerFactory<>(configs);
}
/**
* Kafka manual ack listener container factory kafka listener container factory.
*
* #return the kafka listener container factory
*/
#Bean
public KafkaListenerContainerFactory<ConcurrentMessageListenerContainer<String, String>> kafkaManualAckListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(manualConsumerFactory());
ContainerProperties props = factory.getContainerProperties();
props.setAckMode(ContainerProperties.AckMode.MANUAL_IMMEDIATE);
return factory;
}
#Override
#EventListener
public void processSettlementFile(final Notification notification) {
LOG.info("Handling message [{}]", notification);
try {
final Map<String, JobParameter> parameters = new HashMap<>();
parameters.put("fileName", new JobParameter("1-101-D-2017-212-volume-per-transaction.csv"));
parameters.put("bucket", new JobParameter("bucket-name-can-be-passed-also-from-kafka-todo"));
final JobParameters jobParameters = new JobParameters(parameters);
final JobExecution execution = jobLauncher.run(succeededTransactionCsvFileToDatabaseJob, jobParameters);
LOG.info("Job Execution Status: " + execution.getStatus());
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobInstanceAlreadyCompleteException | JobParametersInvalidException e) {
LOG.error("Failed to process job..", e);
}
}
#KafkaListener(topics = "topic", groupId = "processor-service", clientIdPrefix = "string", containerFactory = "kafkaManualAckListenerContainerFactory")
public void listenAsString(#Payload final String payload, Acknowledgment acknowledgment, final ConsumerRecord<String, String> consumerRecord) throws TopicEventException {
applicationEventPublisher.publishEvent(object);
acknowledgment.acknowledge();
}
You can add a ConsumerAwareRebalanceListener to the container configuration and call consumer.seekToEnd(partitions) in onPartitionsAssigned().

Save on JDBC connections by using JdbcCursorItemReader or JdbcPagingItemReader

In the spring batch project, I used JdbcCursorItemReader to read data to process them in parallel. I can run the batch locally without any problem.
I also heard that JdbcPagingItemReader is recommended for parallel processing against JdbcCursorItemReader, as cursor reader will hold the connection too long while paging reader can release connection once the page size is reached.
I then switched to JdbcPagingItemReader in step2, but out of surprise, I got the exception below when running locally.
Caused by: java.sql.SQLTransientConnectionException: HikariPool-1 -
Connection is not available, request timed out after 300001ms.
However, it seems the above exception occurs in step1 before the paging reader in step2 is executed, and that is the only change made. Please shed some light on why the exception is thrown and if it is good practice to use paging reader instead of cursor in parallel processing. Much appreciated your help!
The code snippet is pasted below:
#Bean
#StepScope
public Flow createParallelSubFlow() {
List<Flow> subFlowList = new ArrayList<>();
List<Stream> streamList;
try {
streamList = dataSourceConfig.streamMapper().
getStreamListByStatus(Constants.PENDING_STATUS_CD);
} catch (Exception e) {
}
streamList.forEach(stream -> {
long id = stream.getStreamId();
String flowName = "stream" + id + "_flow";
Flow subFlow = new FlowBuilder<Flow>(flowName)
.start(step1(id))
.next(step2(id))
.end();
subFlowList.add(subFlow);
});
return new FlowBuilder<Flow>("splitFlow").split(new SimpleAsyncTaskExecutor())
.add(subFlowList.toArray(new Flow[0])).build();
}
public Step step1(long id) {
return stepBuilderFactory.get("step1")
.<Domain, Domain>chunk(100)
.reader(reader1(id))
.writer(writer1())
.build();
}
//#StepScope
//#Bean
public Step step2(long id) {
return stepBuilderFactory.get("step2")
.<Domain, Domain>chunk(100)
.reader(cursorReader2(id))
.processor(processor2)
.writer(writer2())
.build();
}
public JdbcCursorItemReader<Domain> cursorReader2(Long id) {
return new JdbcCursorItemReaderBuilder<Domain>()
.dataSource(dataSourceConfig.dataSource())
.name("cursorReader")
.sql(Constants.QUERY_SQL)
.preparedStatementSetter(new PreparedStatementSetter() {
#Override
public void setValues(PreparedStatement ps) throws SQLException {
ps.setLong(1, id);
}})
.rowMapper(new RowMapper())
.build();
}
//Switch from cursorReader2 to pagingReader2 in step2
public JdbcPagingItemReader<Domain> pagingReader2(Long id) {
return new JdbcPagingItemReaderBuilder<Domain>()
.dataSource(dataSourceConfig.dataSource())
.name("pagingReader")
.queryProvider(queryProvider())
.parameterValues(parameterValues(id))
.rowMapper(new RowMapper())
.pageSize(100)
.build();
}
#Bean
public PagingQueryProvider queryProvider() {
SqlPagingQueryProviderFactoryBean providerFactory = new SqlPagingQueryProviderFactoryBean();
Map<String, Order> sortKeys = new HashMap<>(2);
sortKeys.put("ID", Order.ASCENDING);
providerFactory.setDataSource(dataSourceConfig.dataSource());
providerFactory.setSelectClause("SELECT Clause");
providerFactory.setFromClause("FROM Clause");
providerFactory.setWhereClause("WHERE Clause");
providerFactory.setSortKeys(sortKeys);
PagingQueryProvider pagingQueryProvider = null;
try {
pagingQueryProvider = providerFactory.getObject();
} catch (Exception e) {
logger.error("Failed to get PagingQueryProvider", e);
throw new RuntimeException("Failed to get PagingQueryProvider", e);
}
return pagingQueryProvider;
}
private Map<String, Object> parameterValues(Long id) {
Map<String, Object> parameterValues = new HashMap<>();
parameterValues.put("1", id);
return parameterValues;
}

Spring batch partitioning is not working

I am using Spring Batch Partitioning to merge data from group of related flat files to a single file. The batch is failing with below two issues:
First slave step thread is failing as the data to file writer is written before it is opened. The value for variable inputFileNames (step context data provided by partitioner) for this thread is[20002", 20003]
Second slave step thread is failing as the partitioning data is missing from the step context. The value for variable inputFileNames for this thread is null
Please let me know if I am missing some thing in the configuration.
// log with Error info
2015-12-26 17:59:14,165 DEBUG [SimpleAsyncTaskExecutor-1] c.d.d.b.r.ReaderConfiguration [ReaderBatchConfiguration.java:473] inputFileNames ----[20002", 20003]
2015-12-26 17:59:14,165 DEBUG [SimpleAsyncTaskExecutor-1] c.d.d.b.r.BatchConfiguration [BatchConfiguration.java:389] consumer ----p2
2015-12-26 17:59:14,275 ERROR [SimpleAsyncTaskExecutor-1] o.s.b.c.s.AbstractStep [AbstractStep.java:225] Encountered an error executing step testConsumersInputFileMergeStep in job testFileForInputJob
org.springframework.batch.item.WriterNotOpenException: Writer must be open before it can be written to
at org.springframework.batch.item.file.FlatFileItemWriter.write(FlatFileItemWriter.java:255) ~[spring-batch-infrastructure-3.0.3.RELEASE.jar:3.0.3.RELEASE]
2015-12-26 18:00:14,421 DEBUG [SimpleAsyncTaskExecutor-2] c.d.d.b.r.ReaderBatchConfiguration [ReaderConfiguration.java:474] inputFileNames ----null
// Partitioner
public class ProvisioningInputFilePartitioner implements Partitioner {
#Override
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> filesToProcess = getFilesToProcess(outboundSourceFolder);
Map<String, ExecutionContext> execCtxs = new HashMap<>();
for(Entry<String, ExecutionContext> entry : filesToProcess.entrySet()) {
execCtxs.put(entry.getKey(), entry.getValue());
}
return execCtxs;
}
private Map<String, ExecutionContext> getFilesToProcess(String outboundSourceFolder2) {
Map<String, ExecutionContext> contexts = new HashMap<>();
ExecutionContext execCtx1 = new ExecutionContext();
List<String> inputFileNames1 = Arrays.asList("20001", "22222");
execCtx1.put("consumer", "p1");
execCtx1.put("inputFileNames", inputFileNames1);
contexts.put("p1", execCtx1);
ExecutionContext execCtx2 = new ExecutionContext();
List<String> inputFileNames2 = Arrays.asList("20002", "20003");
execCtx1.put("consumer", "p2");
execCtx1.put("inputFileNames", inputFileNames2);
contexts.put("p2", execCtx2);
return contexts;
}
}
// Writer
#Bean
#StepScope
public ItemWriter<String> testConsumerFileItemWriter (#Value("#{stepExecutionContext[consumer]}") String consumer){
logger.debug("consumer ----"+ consumer);
FileSystemResource fileSystemResource = new FileSystemResource(new File(outboundSourceFolder, consumer + ".txt"));
FlatFileItemWriter<String> fileItemWriter = new FlatFileItemWriter<>();
fileItemWriter.setResource(fileSystemResource);
fileItemWriter.setLineAggregator(new PassThroughLineAggregator<String>());
return fileItemWriter;
}
#Bean
public Partitioner provisioningInputFilePartitioner() {
return new ProvisioningInputFilePartitioner();
}
#Bean
public TaskExecutor taskExecutor() {
return new SimpleAsyncTaskExecutor();
}
// Reader
#Bean
#StepScope
public ItemReader<String> testInputFilesReader (#Value("#{stepExecutionContext[inputFileNames]}") List<String> inputFileNames) {
logger.debug("inputFileNames ----" + inputFileNames);
MultiResourceItemReader<String> multiResourceItemReader = new MultiResourceItemReader<String>();
...
return multiResourceItemReader;
}
// slave step
#Bean
public Step testConsumersInputFileMergeStep(StepBuilderFactory stepBuilder, ItemReader<String> testInputFilesReader,
ItemWriter<String> testConsumerFileItemWriter){
return stepBuilder.get("testConsumersInputFileMergeStep").<String, String>chunk(1).reader(testInputFilesReader)
.writer(testConsumerFileItemWriter).build();
}
// master step
#Bean
public Step testConsumersFilePartitionerStep(StepBuilderFactory stepBuilder, Step testConsumersInputFileMergeStep, Partitioner provisioningInputFilePartitioner,
TaskExecutor taskExecutor ){
return stepBuilder.get("testConsumersFilePartitionerStep").partitioner(testConsumersInputFileMergeStep)
.partitioner("testConsumersInputFileMergeStep", provisioningInputFilePartitioner)
.taskExecutor(taskExecutor)
.build();
}
//Job
#Bean
public Job testFileForInputJob(JobBuilderFactory factory, Step testFileForInputStep, Step testConsumersFilePartitionerStep) {
return factory.get("testFileForInputJob").incrementer(new RunIdIncrementer()).start(testConsumersFilePartitionerStep).build();
}