I want to implement the flow structure as below in spring batch.
Job
/ \
Flow1 Flow2
/ \
Step1 Step2
/ / \
/ Step3 Flow3
/ \
/ Step4
\ /
\ /
\ /
Step5
The job configuration pseudo code is as below:
#Configuration
public class JobConfiguration {
......
#Bean
public Job Job() {
Flow flow1 = new FlowBuilder<SimpleFlow>("flow1")
.start(step1())
.build();
Flow flow2 = new FlowBuilder<SimpleFlow>("flow2")
.start(step2())
.next(step3())
.split(new SimpleAsyncTaskExecutor()).add(flow3)
.build();
Flow flow3 = new FlowBuilder<SimpleFlow>("flow3")
.start(step4())
.build();
return jobBuilderFactory.get("job")
.incrementer(new RunIdIncrementer())
.start(flow1)
.split(new SimpleAsyncTaskExecutor()).add(flow2)
.next(Step5())
.end()
.build();
}
......
}
When I run the batch, the log shows that step1, step2, step3 and step5 are executed, but step4 is not run.
I am wondering how to define sub-flow inside another flow, is the above code the right way to implement it?
Thanks in advance!
Running each flow in isolation shows that Flow1 and Flow3 are correct, but Flow2 isn't. Running only Flow2 with:
return jobBuilderFactory.get("job")
.incrementer(new RunIdIncrementer())
.start(flow2)
.build()
.build();
shows that step2 and step3 are executed, but not step4. So the problem is with the definition of this flow.
You need to define a parallel flow between Step3 and Flow3 as you did for Flow1 and Flow2. Here is an example:
#Bean
public Job Job() {
Flow flow1 = new FlowBuilder<SimpleFlow>("flow1")
.start(step1())
.build();
Flow flow3 = new FlowBuilder<SimpleFlow>("flow3")
.start(step4())
.build();
Flow parallelFlow = new FlowBuilder<SimpleFlow>("parallelFlow")
.start(step3())
.split(new SimpleAsyncTaskExecutor()).add(flow3)
.build();
Flow flow2 = new FlowBuilder<SimpleFlow>("flow2")
.start(step2())
.next(parallelFlow)
.build();
return jobs.get("job")
.incrementer(new RunIdIncrementer())
.start(flow1)
.split(new SimpleAsyncTaskExecutor()).add(flow2)
.next(step5())
.end()
.build();
}
Hope this helps.
Related
I'm using Spring Batch with Spring Cloud Task for remote partitioning. But for each new Job execution it is created with the same task execution id. Is there any way to create a new task execution id for new job execution?
In the following Task Execution table, each job is running with same parent execution id.
For each new job execution it is starting within the same task execution. The code for Batch configuration is as follows:
#Bean
public PartitionHandler partitionHandler(TaskLauncher taskLauncher, JobExplorer jobExplorer, Environment environment, DelegatingResourceLoader delegatingResourceLoader, TaskRepository taskRepository) {
Resource resource = delegatingResourceLoader.getResource(jarLocation);
DeployerPartitionHandler partitionHandler = new DeployerPartitionHandler(taskLauncher, jobExplorer, resource, "workerStep", taskRepository);
List<String> commandLineArguments = new ArrayList<>(5);
commandLineArguments.add("--spring.profiles.active=worker");
commandLineArguments.add("--spring.cloud.task.initialize.enable=false");
commandLineArguments.add("--spring.batch.initializer.enabled=false");
commandLineArguments.add("--spring.cloud.task.closecontext_enabled=true");
commandLineArguments.add("--logging.level=DEBUG");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArguments));
partitionHandler.setEnvironmentVariablesProvider(new SimpleEnvironmentVariablesProvider(environment));
partitionHandler.setMaxWorkers(2);
partitionHandler.setApplicationName("BatchApplicationWorker");
return partitionHandler;
}
#Bean
#StepScope
public Partitioner partitioner(#Value("#{jobParameters['inputFiles']}") String file, #Value("#{jobParameters['partitionSize']}") String partitionSize1){
int partitionSize = Integer.parseInt(partitionSize1);
return new Partitioner() {
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> partitions = new HashMap<>();
String[] ids = fetchAllPrimaryKeys(file);
List<List<String>> partitionPayloads = splitPayLoad(ids, partitionSize);
int size = partitionPayloads.size();
for(int i = 0 ; i < size ; i++) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.put("partitionNumber", i);
executionContext.put("partitionPayLoad", new ArrayList<>(partitionPayloads.get(i)));
partitions.put("partition" + i, executionContext);
}
return partitions;
}
};
}
#Bean
public Step masterStep(Step workerStep, PartitionHandler partitionHandler) {
return this.stepBuilderFactory.get("masterStep")
.partitioner(workerStep.getName(), partitioner(null, null))
.step(workerStep)
.partitionHandler(partitionHandler)
.build();
}
#Bean
public Step workerStep(CustomWriter customWriter, CustomProcessor customProcessor) {
return this.stepBuilderFactory.get("workerStep")
.<User,User>chunk(10000)
.reader(reader(null))
.processor(customProcessor)
.writer(customWriter)
.build();
}
#Bean
public Job batchJob(Step masterStep, JobExecutionListnerClass jobExecutionListnerClass, JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("batchJob")
.incrementer(new RunIdIncrementer())
.start(masterStep)
.listener(jobExecutionListnerClass)
.build();
public Long jobRunner(JobParams jobParams) throws BatchException {
Map<String, JobParameter> maps = new HashMap<>();
maps.put(Constants.TIME, new JobParameter(System.currentTimeMillis()));
maps.put(Constants.INPUT_FILES, new JobParameter(jobParams.getInputSource()));
maps.put(Constants.PARTITION_SIZE, new JobParameter(Integer.toString(jobParams.getPartitionSize())));
maps.put(Constants.MAIL_RECIPIENTS, new JobParameter(jobParams.getMailRecipients()));
maps.put(Constants.JOB_NAME, new JobParameter(jobParams.getJobName()));
maps.put(Constants.JOB_DESCRIPTION, new JobParameter(jobParams.getJobDescription()));
maps.put(Constants.JOB_RESTART, new JobParameter(Boolean.toString(jobParams.getRestart())));
JobParameters jobParameters = new JobParameters(maps);
JobExecution jobExecution;
try {
jobExecution = jobLauncher.run(job, jobParameters);
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobInstanceAlreadyCompleteException
| JobParametersInvalidException e) {
throw new BatchException(e.getMessage());
}
return jobExecution.getId();
}
I'm processing multiple input files with multi-format lines using ClassifierCompositeItemProcessor. But when using StepBuilderFactory stream to write the files, I'm unable to pass the Resource filename dynamically. Filename should be the respective input file name. Any help would be much appreciated.
Input File 1 (data-111111-12323.txt)
1#9999999#00001#2#RecordType1
2#00002#June#Statement#2020#9#RecordType2
3#7777777#RecordType3
Input File 2 (data-22222-23244.txt)
1#435435#00002#2#RecordType1
2#345435#July#Statement#2021#9#RecordType2
3#645456#RecordType3
Expected output file 1 (data-111111-12323.txt)
1#9999999#00001#2#RecordType1#mobilenumber1
2#00002#June#Statement#2020#9#RecordType2#mobilenumber2
3#7777777#RecordType3#mobilenumber3
Expected output file 2 (data-22222-23244.txt)
1#9999999#00001#2#RecordType1#mobilenumber1
2#00002#June#Statement#2020#9#RecordType2#mobilenumber2
3#7777777#RecordType3#mobilenumber3
Step
public Step partitionStep() throws Exception {
ItemReader reader = context.getBean(FlatFileItemReader.class);
ClassifierCompositeItemWriter writer = context.getBean(ClassifierCompositeItemWriter.class);
return stepBuilderFactory.get("statementProcessingStep.slave").<String, String>chunk(12).reader(reader).processor(processor()).writer(writer)
.stream(recordType0FlatFileItemWriter())
.stream(recordType1FlatFileItemWriter())
.build();
}
Processor
#Bean
#StepScope
public ItemProcessor processor() {
ClassifierCompositeItemProcessor<? extends RecordType, ? extends RecordType> processor = new ClassifierCompositeItemProcessor<>();
SubclassClassifier classifier = new SubclassClassifier();
Map typeMap = new HashMap();
typeMap.put(RecordType0.class, recordType0Processor);
typeMap.put(RecordType1.class, recordType1Processor);
classifier.setTypeMap(typeMap);
processor.setClassifier(classifier);
return processor;
}
Writer
#Bean
public FlatFileItemWriter<RecordType1> recordType1FlatFileItemWriter() throws Exception{
FlatFileItemWriter<RecordType1> writer = new FlatFileItemWriter<>();
writer.setResource( new FileSystemResource("record1.txt")); //This filename should be dynamic
writer.setAppendAllowed(true);
writer.setLineAggregator(new DelimitedLineAggregator<RecordType1>() {{
setDelimiter("#");
setFieldExtractor(new BeanWrapperFieldExtractor<RecordType1>() {
{
setNames(new String[] { "RecordType", "ID1", "ID2", "ID3"});
}
});
}});
return writer;
}
You can make your item reader/writer step-scoped and inject values from job parameters or step/job execution context using late-binding. For example:
#StepScope
#Bean
public FlatFileItemReader flatFileItemReader(#Value("#{jobParameters['input.file.name']}") String name) {
return new FlatFileItemReaderBuilder<Foo>()
.name("flatFileItemReader")
.resource(new FileSystemResource(name))
.build();
}
You can find more details in the Late Binding of Job and Step Attributes section of the reference documentation.
Recently started working with Spring Batch chunk based processing. I need to create batch for creating random 3 million strings, 1 million each of different type and count. Like a million strings starting with A, next 1.5 million ending with GH. Cannot do it with for loop as it will block a thread. I have to write them in db also. No idea how to make my ItemReader read each iteration.
I have understood the custom ItemReader through this
but not getting what should be the "item" here.
If i create a chunk of 1000, then how the counters will be handled, counting the chunk entry and the string generated counter.
The ItemReader interface is quite similar to an Iterator and like that has to maintain the iteration state internally. The Spring Batch framework even provides an IteratorItemReader.
Since Java 8 there are Streams, which I find quite versatile also for generating data and which can be converted into an Iterator.
Here is a possible solution along the lines of what you described:
#RunWith(SpringRunner.class)
#SpringBootTest(classes = Java8StreamReaderTest.TestConfig.class, properties = {"spring.batch.job.enabled=false"})
#EnableBatchProcessing
public class Java8StreamReaderTest {
#Configuration
static class TestConfig {
#Bean
JobBuilderFactory jobBuilderFactory(final JobRepository jobRepository) {
return new JobBuilderFactory(jobRepository);
}
#Bean
StepBuilderFactory stepBuilderFactory(final JobRepository jobRepository, final PlatformTransactionManager transactionManager) {
return new StepBuilderFactory(jobRepository, transactionManager);
}
#Bean
Job streamJob() {
return jobBuilderFactory(null).get("streamJob")
.start(stepBuilderFactory(null, null).get("streamStep")
.<String, String>chunk(1000)
.reader(streamReader())
.writer(listWriter())
.build()
)
.build();
}
#Bean
ListItemWriter<String> listWriter() {
return new ListItemWriter<>();
}
#Bean
ItemReader<String> streamReader() {
return new IteratorItemReader<String>(stream().iterator());
}
#Bean
Stream<String> stream() {
return Stream.of(
IntStream.range(0, 100000).boxed().map(i -> {
return new String("A"+ RandomStringUtils.random(10, true, false));
}),
IntStream.range(0, 100000).boxed().map(i -> {
return new String("B"+ RandomStringUtils.random(10, true, false));
}),
IntStream.range(0, 100000).boxed().map(i -> {
return new String(RandomStringUtils.random(10, true, false) + "GH");
})
)
.flatMap(s -> s);
}
}
#Autowired
Job streamJob;
#Autowired
JobLauncher jobLauncher;
#Autowired
ListItemWriter<String> listWriter;
#Test
public void shouldExecuteTestJob() throws Exception {
JobExecution execution = jobLauncher.run(streamJob, new JobParametersBuilder().toJobParameters());
assertEquals(ExitStatus.COMPLETED, execution.getExitStatus());
assertThat(listWriter.getWrittenItems(), hasSize(300000));
}
}
i'm triyng to retry spring batch FlatFileItemReader but no success.
FlatFileItemReader<Transaction> reader = new FlatFileItemReader<>();
Resource resource = new FileSystemResource("input/1-101-D-2017-212-volume-per-transaction.csv");
try {
resource.contentLength();
} catch (IOException e) {
e.printStackTrace();
}
reader.setResource(resource);
reader.setRecordSeparatorPolicy(new BlankLineRecordSeparatorPolicy());
DefaultLineMapper<Transaction> lineMapper = new DefaultLineMapper<>();
reader.setLineMapper(lineMapper);
reader.setStrict(false);
reader.setLinesToSkip(NUMBER_OF_HEADER_LINES);
DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer();
reader.setSkippedLinesCallback(line -> tokenizer.setNames(line.split(",")));
lineMapper.setLineTokenizer(tokenizer);
lineMapper.setFieldSetMapper(new TransactionFieldSetMapper());
reader.setLineMapper(lineMapper);
return reader;
then into my step i have
.faultTolerant()
.retryLimit(3)
.retry(FileNotFoundException.class)
can someone give my some hint how can retry it?
Hopefully it's still useful for you, please refer to this example where I put an example of the retry reader using FlatFileItemReader:
https://github.com/jeronimogalicia/batch-retry-flatfile-item-reader
Basically you have to annotate your application class with #EnableRetry and annotate your reader like this:
#Bean
#StepScope
#Retryable(include = { ItemStreamException.class }, maxAttempts = 5)
ItemReader<Player> loadRecordsReader() throws Exception {
String filePath = "src/main/resources/players.csv";
System.out.println("Loading records from " + filePath + " try " + counter);
FlatFileItemReader<Player> itemReader = new FlatFileItemReader<>();
itemReader.setResource(new FileSystemResource(filePath));
itemReader.setLinesToSkip(1);
//DelimitedLineTokenizer defaults to comma as its delimiter
DefaultLineMapper<Player> lineMapper = new DefaultLineMapper<>();
lineMapper.setLineTokenizer(new DelimitedLineTokenizer());
lineMapper.setFieldSetMapper(new PlayerFieldSetMapper());
itemReader.setLineMapper(lineMapper);
itemReader.open(new ExecutionContext());
return itemReader;
}
I am using Spring Batch Partitioning to merge data from group of related flat files to a single file. The batch is failing with below two issues:
First slave step thread is failing as the data to file writer is written before it is opened. The value for variable inputFileNames (step context data provided by partitioner) for this thread is[20002", 20003]
Second slave step thread is failing as the partitioning data is missing from the step context. The value for variable inputFileNames for this thread is null
Please let me know if I am missing some thing in the configuration.
// log with Error info
2015-12-26 17:59:14,165 DEBUG [SimpleAsyncTaskExecutor-1] c.d.d.b.r.ReaderConfiguration [ReaderBatchConfiguration.java:473] inputFileNames ----[20002", 20003]
2015-12-26 17:59:14,165 DEBUG [SimpleAsyncTaskExecutor-1] c.d.d.b.r.BatchConfiguration [BatchConfiguration.java:389] consumer ----p2
2015-12-26 17:59:14,275 ERROR [SimpleAsyncTaskExecutor-1] o.s.b.c.s.AbstractStep [AbstractStep.java:225] Encountered an error executing step testConsumersInputFileMergeStep in job testFileForInputJob
org.springframework.batch.item.WriterNotOpenException: Writer must be open before it can be written to
at org.springframework.batch.item.file.FlatFileItemWriter.write(FlatFileItemWriter.java:255) ~[spring-batch-infrastructure-3.0.3.RELEASE.jar:3.0.3.RELEASE]
2015-12-26 18:00:14,421 DEBUG [SimpleAsyncTaskExecutor-2] c.d.d.b.r.ReaderBatchConfiguration [ReaderConfiguration.java:474] inputFileNames ----null
// Partitioner
public class ProvisioningInputFilePartitioner implements Partitioner {
#Override
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> filesToProcess = getFilesToProcess(outboundSourceFolder);
Map<String, ExecutionContext> execCtxs = new HashMap<>();
for(Entry<String, ExecutionContext> entry : filesToProcess.entrySet()) {
execCtxs.put(entry.getKey(), entry.getValue());
}
return execCtxs;
}
private Map<String, ExecutionContext> getFilesToProcess(String outboundSourceFolder2) {
Map<String, ExecutionContext> contexts = new HashMap<>();
ExecutionContext execCtx1 = new ExecutionContext();
List<String> inputFileNames1 = Arrays.asList("20001", "22222");
execCtx1.put("consumer", "p1");
execCtx1.put("inputFileNames", inputFileNames1);
contexts.put("p1", execCtx1);
ExecutionContext execCtx2 = new ExecutionContext();
List<String> inputFileNames2 = Arrays.asList("20002", "20003");
execCtx1.put("consumer", "p2");
execCtx1.put("inputFileNames", inputFileNames2);
contexts.put("p2", execCtx2);
return contexts;
}
}
// Writer
#Bean
#StepScope
public ItemWriter<String> testConsumerFileItemWriter (#Value("#{stepExecutionContext[consumer]}") String consumer){
logger.debug("consumer ----"+ consumer);
FileSystemResource fileSystemResource = new FileSystemResource(new File(outboundSourceFolder, consumer + ".txt"));
FlatFileItemWriter<String> fileItemWriter = new FlatFileItemWriter<>();
fileItemWriter.setResource(fileSystemResource);
fileItemWriter.setLineAggregator(new PassThroughLineAggregator<String>());
return fileItemWriter;
}
#Bean
public Partitioner provisioningInputFilePartitioner() {
return new ProvisioningInputFilePartitioner();
}
#Bean
public TaskExecutor taskExecutor() {
return new SimpleAsyncTaskExecutor();
}
// Reader
#Bean
#StepScope
public ItemReader<String> testInputFilesReader (#Value("#{stepExecutionContext[inputFileNames]}") List<String> inputFileNames) {
logger.debug("inputFileNames ----" + inputFileNames);
MultiResourceItemReader<String> multiResourceItemReader = new MultiResourceItemReader<String>();
...
return multiResourceItemReader;
}
// slave step
#Bean
public Step testConsumersInputFileMergeStep(StepBuilderFactory stepBuilder, ItemReader<String> testInputFilesReader,
ItemWriter<String> testConsumerFileItemWriter){
return stepBuilder.get("testConsumersInputFileMergeStep").<String, String>chunk(1).reader(testInputFilesReader)
.writer(testConsumerFileItemWriter).build();
}
// master step
#Bean
public Step testConsumersFilePartitionerStep(StepBuilderFactory stepBuilder, Step testConsumersInputFileMergeStep, Partitioner provisioningInputFilePartitioner,
TaskExecutor taskExecutor ){
return stepBuilder.get("testConsumersFilePartitionerStep").partitioner(testConsumersInputFileMergeStep)
.partitioner("testConsumersInputFileMergeStep", provisioningInputFilePartitioner)
.taskExecutor(taskExecutor)
.build();
}
//Job
#Bean
public Job testFileForInputJob(JobBuilderFactory factory, Step testFileForInputStep, Step testConsumersFilePartitionerStep) {
return factory.get("testFileForInputJob").incrementer(new RunIdIncrementer()).start(testConsumersFilePartitionerStep).build();
}