Spring Batch process different types in one step

Spring Batch process different types in one step - spring-batch

I have a batch job that reads records from a file. I want to convert said records to PojoA (all strings). I want to run each record throw a validator ensure all fields are present. I then want to transform PojoA to PojoB. The issue I have is that I am unable to change the type of object mid-step.
return getStepBuilder("downloadData")
.<PojoA, PojoA>chunk(1000)
.reader(pojoAReader())
.processor(pojoAValidator)
.writer(pojoAWriter)
.processor(pojoAToPojoBTransformer) <- issue here, <PojoA, PojoB>
.write(pojoBWriter)
.build();
The reason PojoB exists is because PojoA is all strings; I want to persist all records regardless if they're invalid. PojoB has the accurate data types, e.g. Dates, numbers.
I think I need another step that deals with but how do I pass the PojoA's to step 2?

You cannot declare two processors/writers like:
.processor(pojoAValidator)
.writer(pojoAWriter)
.processor(pojoAToPojoBTransformer) <- issue here, <PojoA, PojoB>
.write(pojoBWriter)
You need to use a composite processor/writer for that.
Here is a quick example for a composite processor with processor1 (Integer -> Integer) then processor2 (Integer -> String):
import java.util.Arrays;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.support.CompositeItemProcessor;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
#Configuration
#EnableBatchProcessing
public class MyJob {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
#Bean
public ItemReader<Integer> itemReader() {
return new ListItemReader<>(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
}
#Bean
public ItemWriter<String> itemWriter() {
return items -> {
for (String item : items) {
System.out.println("item = " + item);
}
};
}
#Bean
public ItemProcessor<Integer, Integer> itemProcessor1() {
return item -> item + 1;
}
#Bean
public ItemProcessor<Integer, String> itemProcessor2() {
return String::valueOf;
}
#Bean
public ItemProcessor<Integer, String> compositeItemProcessor() {
CompositeItemProcessor<Integer, String> compositeItemProcessor = new CompositeItemProcessor<>();
compositeItemProcessor.setDelegates(Arrays.asList(itemProcessor1(), itemProcessor2()));
return compositeItemProcessor;
}
#Bean
public Step step() {
return steps.get("step")
.<Integer, String>chunk(5)
.reader(itemReader())
.processor(compositeItemProcessor())
.writer(itemWriter())
.build();
}
#Bean
public Job job() {
return jobs.get("job")
.start(step())
.build();
}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(MyJob.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
jobLauncher.run(job, new JobParameters());
}
}

Related

Why does JobBuilder says could not autowire?

I am following this Udemy course(Batch Processing with Spring Batch & Spring Boot
) for Spring Batch. In the course JBF(JobBuilderFactory) is depracated so I googled what to use instead and it says use JobBuilder.
Right now jobBuilder and stepBuilder are underlined red and says could not autowired.
package com.example.springbatch;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.ComponentScan;
#SpringBootApplication
//1st step
#EnableBatchProcessing
//2nd //use of this?
#ComponentScan("com.example.config") //job and steps will go in this packet
public class SpringBatchApplication {
public static void main(String[] args) {
SpringApplication.run(SpringBatchApplication.class, args);
}
}
package com.example.config;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.job.builder.JobBuilder;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.builder.StepBuilder;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
#Configuration //add annot
public class SampleJob { //3rd creating first samp job
//5th Create the job, spring batch provides one class - job builder. And create the obj named jobBuilder
#Autowired
private JobBuilder jobBuilder;
#Autowired
private StepBuilder stepBuilder;
#Bean //4th define #Bean for the first job
public Job firstJob() { //Job(interface) imports core.Job
//6th use
return jobBuilder.get("First Job") //use get First Job is 1st job name
.start(firstStep()) //Inside the start, pass in your step. Job can hava single or multiple step
.build();
}
#Bean //7th Adding the Step interface. Autowire it as well.
Step firstStep() {
return stepBuilder.get("First Step")
.tasklet(firstTask()) //Will need to call Tasklet.
.build(); //call build to create the step.
}
//8th
private Tasklet firstTask() {
return new Tasklet() {
#Override
public RepeatStatus execute(StepContribution stepContribution, ChunkContext chunkContext) throws Exception {
System.out.println("This is first tasklet step");
return RepeatStatus.FINISHED; //need this
}
};
}
}
I tried to search on google and this is suppose to print System.out.println("This is first tasklet step");

The course is probably using Spring Batch 4. In Spring Batch 5, those builder factories were deprecated for removal and are not exposed as beans in the application context by the #EnableBatchProcessing annotation. Here is the relevant section in the migration guide about that: JobBuilderFactory and StepBuilderFactory bean exposure/configuration.
The typical migration path from v4 to v5 in that regard is as follows:
// Sample with v4
#Configuration
#EnableBatchProcessing
public class MyJobConfig {
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Bean
public Job myJob(Step step) {
return this.jobBuilderFactory.get("myJob")
.start(step)
.build();
}
}
// Sample with v5
#Configuration
#EnableBatchProcessing
public class MyJobConfig {
#Bean
public Job myJob(JobRepository jobRepository, Step step) {
return new JobBuilder("myJob", jobRepository)
.start(step)
.build();
}
}

Spring Batch removeJobExecutions fails

I'm trying to explore Spring batch with Spring Boot 2.3.3, and obviously the tests are very important
The batch doesnt' read / process / write anything, I've just created the skeleton.
On the tests side I've the following
#Autowired
private IntegrationTestsNeeds integrationTestsNeeds;
#Autowired
private JobLauncherTestUtils jobLauncherTestUtils;
#Autowired
private JobRepositoryTestUtils jobRepositoryTestUtils;
#AfterEach
void tearDown() throws InterruptedException {
jobRepositoryTestUtils.removeJobExecutions();
}
#Test
void testUpdateStatisticsBatch() throws Exception {
JobExecution jobExecution = jobLauncherTestUtils.launchJob();
ExitStatus exitStatus = jobExecution.getExitStatus();
Assertions.assertThat(exitStatus).isEqualTo(ExitStatus.COMPLETED);
}
The test pass but in the #AfterEach method I've the following error
org.springframework.dao.DataIntegrityViolationException: StatementCallback; SQL [delete from BATCH_STEP_EXECUTION];
Cannot delete or update a parent row: a foreign key constraint fails (`cvl`.`BATCH_STEP_EXECUTION_CONTEXT`, CONSTRAINT `STEP_EXEC_CTX_FK` FOREIGN KEY (`STEP_EXECUTION_ID`) REFERENCES `BATCH_STEP_EXECUTION` (`STEP_EXECUTION_ID`));
nested exception is java.sql.SQLIntegrityConstraintViolationException: Cannot delete or update a parent row: a foreign key constraint fails (`cvl`.`BATCH_STEP_EXECUTION_CONTEXT`, CONSTRAINT `STEP_EXEC_CTX_FK` FOREIGN KEY (`STEP_EXECUTION_ID`) REFERENCES `BATCH_STEP_EXECUTION` (`STEP_EXECUTION_ID`))
Which kind of error I'm doing?

I don't know why but the problem is solved using the transactionTemplate.
import org.springframework.transaction.support.TransactionTemplate
#Autowired
private TransactionTemplate transactionTemplate;
#AfterEach
void tearDown() {
transactionTemplate.execute(ts -> {
jobRepositoryTestUtils.removeJobExecutions();
return null;
});
}
Even though the jdbcTemplate is able to perform the delete statements, for some reason is not able to really delete the rows from the database.

I'm not able to reproduce the issue, here is an example that passes without the exception:
import javax.sql.DataSource;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.batch.test.JobLauncherTestUtils;
import org.springframework.batch.test.JobRepositoryTestUtils;
import org.springframework.batch.test.context.SpringBatchTest;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseBuilder;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseType;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;
#RunWith(SpringRunner.class)
#SpringBatchTest
#ContextConfiguration(classes = {MyJobTests.MyJobConfig.class})
public class MyJobTests {
#Autowired
private JobLauncherTestUtils jobLauncherTestUtils;
#Autowired
private JobRepositoryTestUtils jobRepositoryTestUtils;
#Test
public void testMyJob() throws Exception {
JobExecution jobExecution = jobLauncherTestUtils.launchJob();
ExitStatus exitStatus = jobExecution.getExitStatus();
Assert.assertEquals(ExitStatus.COMPLETED, exitStatus);
}
#After
public void tearDown() {
jobRepositoryTestUtils.removeJobExecutions();
}
#Configuration
#EnableBatchProcessing
public static class MyJobConfig {
#Bean
public Job job(JobBuilderFactory jobs, StepBuilderFactory steps) {
return jobs.get("job")
.start(steps.get("step")
.tasklet((contribution, chunkContext) -> {
System.out.println("hello world");
return RepeatStatus.FINISHED;
})
.build())
.build();
}
#Bean
public DataSource dataSource() {
return new EmbeddedDatabaseBuilder()
.setType(EmbeddedDatabaseType.H2)
.addScript("/org/springframework/batch/core/schema-drop-h2.sql")
.addScript("/org/springframework/batch/core/schema-h2.sql")
.build();
}
}
}
Spring Batch version 4.2.4

Error : Field job in com.example.partioner.DemoApplication required a bean of type 'org.springframework.batch.core.Job' that could not be found

I try a spring batch partitioning database program but I have this message when I try to run the batch:
APPLICATION FAILED TO START
Description: Field job in com.example.partioner.DemoApplication required a bean of type
'org.springframework.batch.core.Job' that could not be found.
The injection point has the following annotations: - #org.springframework.beans.factory.annotation.Autowired(required=true)
Action: Consider defining a bean of type
'org.springframework.batch.core.Job' in your configuration.
This is my main class :
package com.example.partioner;
import java.util.Date;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
#SpringBootApplication
#EnableBatchProcessing
public class DemoApplication implements CommandLineRunner {
#Autowired
private JobLauncher jobLauncher;
#Autowired
private Job job;
public static void main(String[] args) {
SpringApplication.run(DemoApplication.class, args);
}
#Override
public void run(String... args) throws Exception {
System.out.println("STATUS STARTED===================");
JobParameters jobParameters = new JobParametersBuilder()
.addString("JobId", String.valueOf(System.currentTimeMillis()))
.addDate("date", new Date())
.addLong("time",System.currentTimeMillis()).toJobParameters();
JobExecution execution = jobLauncher.run(job, jobParameters);
System.out.println("STATUS :: "+execution.getStatus());
}
}
This my jobConfig class :
package com.example.config;
import java.util.HashMap;
import java.util.Map;
import javax.sql.DataSource;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider;
import org.springframework.batch.item.database.JdbcBatchItemWriter;
import org.springframework.batch.item.database.JdbcPagingItemReader;
import org.springframework.batch.item.database.Order;
import org.springframework.batch.item.database.support.MySqlPagingQueryProvider;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
import com.example.mapper.CustomerRowMapper;
import com.example.model.Customer;
import com.example.partitioner.ColumnRangePartitioner;
#Configuration
public class JobConfiguration {
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
#Autowired
private DataSource dataSource;
#Bean
public ColumnRangePartitioner partitioner()
{
ColumnRangePartitioner columnRangePartitioner = new ColumnRangePartitioner();
columnRangePartitioner.setColumn("id");
columnRangePartitioner.setDataSource(dataSource);
columnRangePartitioner.setTable("customer");
return columnRangePartitioner;
}
#Bean
#StepScope
public JdbcPagingItemReader<Customer> pagingItemReader(
#Value("#{stepExecutionContext['minValue']}") Long minValue,
#Value("#{stepExecutionContext['maxValue']}") Long maxValue)
{
System.out.println("reading " + minValue + " to " + maxValue);
Map<String, Order> sortKeys = new HashMap<>();
sortKeys.put("id", Order.ASCENDING);
MySqlPagingQueryProvider queryProvider = new MySqlPagingQueryProvider();
queryProvider.setSelectClause("id, firstName, lastName, birthdate");
queryProvider.setFromClause("from customer");
queryProvider.setWhereClause("where id >= " + minValue + " and id < " + maxValue);
queryProvider.setSortKeys(sortKeys);
JdbcPagingItemReader<Customer> reader = new JdbcPagingItemReader<>();
reader.setDataSource(this.dataSource);
reader.setFetchSize(10);
reader.setRowMapper(new CustomerRowMapper());
reader.setQueryProvider(queryProvider);
return reader;
}
#Bean
#StepScope
public JdbcBatchItemWriter<Customer> customerItemWriter()
{
JdbcBatchItemWriter<Customer> itemWriter = new JdbcBatchItemWriter<>();
itemWriter.setDataSource(dataSource);
itemWriter.setSql("INSERT INTO NEW_CUSTOMER VALUES (:id, :firstName, :lastName, :birthdate)");
itemWriter.setItemSqlParameterSourceProvider
(new BeanPropertyItemSqlParameterSourceProvider<>());
itemWriter.afterPropertiesSet();
return itemWriter;
}
#Bean
public Step slaveStep()
{
return stepBuilderFactory.get("slaveStep")
.<Customer, Customer>chunk(10)
.reader(pagingItemReader(null, null))
.writer(customerItemWriter())
.build();
}
#Bean
public Step step1()
{
return stepBuilderFactory.get("step1")
.partitioner(slaveStep().getName(), partitioner())
.step(slaveStep())
.gridSize(4)
.taskExecutor(new SimpleAsyncTaskExecutor())
.build();
}
#Bean
public Job job()
{
return jobBuilderFactory.get("job")
.start(step1())
.build();
}
}
This is my partitioner class:
package com.example.partitioner;
import java.util.HashMap;
import java.util.Map;
import javax.sql.DataSource;
import org.springframework.batch.core.partition.support.Partitioner;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.jdbc.core.JdbcOperations;
import org.springframework.jdbc.core.JdbcTemplate;
public class ColumnRangePartitioner implements Partitioner
{
private JdbcOperations jdbcTemplate;
private String table;
private String column;
public void setTable(String table) {
this.table = table;
}
public void setColumn(String column) {
this.column = column;
}
public void setDataSource(DataSource dataSource) {
jdbcTemplate = new JdbcTemplate(dataSource);
}
#Override
public Map<String, ExecutionContext> partition(int gridSize)
{
int min = jdbcTemplate.queryForObject("SELECT MIN(" + column + ") FROM " + table, Integer.class);
int max = jdbcTemplate.queryForObject("SELECT MAX(" + column + ") FROM " + table, Integer.class);
int targetSize = (max - min) / gridSize + 1;
Map<String, ExecutionContext> result = new HashMap<>();
int number = 0;
int start = min;
int end = start + targetSize - 1;
while (start <= max)
{
ExecutionContext value = new ExecutionContext();
result.put("partition" + number, value);
if(end >= max) {
end = max;
}
value.putInt("minValue", start);
value.putInt("maxValue", end);
start += targetSize;
end += targetSize;
number++;
}
return result;
}
}
I don't understand the reason of this message and can't find a solution. I think I have put all the necessary annotations. I am a beginner and I hope you will help me.

You DemoApplication is in package com.example.partioner while your job configuration class JobConfiguration is in package com.example.config.
In order for Spring Boot to find your job, you need to move your JobConfiguration class to the same package as your main class DemoApplication or a package underneath it.
Please refer to the Structuring Your Code section of the reference documentation.

How to archive processed file in Multi-threaded Step in Spring batch?

I'm using multi-threaded step while reading a file from the resources. Let's say I have several files to be processed & multiple-threads are processing the same file so, I'm not sure at which point in time my whole files get processed.
Once my file successfully processed, I need to archive/delete the file. Can someone guide me what should I use?
Here is my sample code.
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.stream.Stream;
import javax.sql.DataSource;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecutionListener;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.core.configuration.annotation.BatchConfigurer;
import org.springframework.batch.core.configuration.annotation.DefaultBatchConfigurer;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.NonTransientResourceException;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import org.springframework.batch.support.transaction.ResourcelessTransactionManager;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
import org.springframework.core.task.TaskExecutor;
import com.iana.spring.batch.dao.GenericDAO;
import com.iana.spring.batch.listener.BatchJobCompletionListener;
#Configuration
public class BatchConfig {
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
#Autowired
private JobLauncher jobLauncher;
#Autowired
private Job processJob;
#Value("classpath*:/final/HYUMER_SI_*.txt")
private Resource[] inputFiles;
#Autowired
#Qualifier("test2DataSource")
private DataSource test2DataSource;
public void saveFileLog(String fileLog) throws Exception{
String query = "INSERT INTO FILE_LOG(LOG_INFO) VALUES (?)";
new GenericDAO().saveOrUpdate(test2DataSource, query, false, fileLog);
}
// This job runs in every 5 seconds
//#Scheduled(fixedRate = 150000000)
public void fixedRatedCallingMethod() {
try {
JobParameters jobParameters = new JobParametersBuilder()
.addLong("time", System.currentTimeMillis())
.toJobParameters();
jobLauncher.run(processJob, jobParameters);
System.out.println("I have been scheduled with Spring scheduler");
} catch (Exception e) {
e.printStackTrace();
}
}
/* In case of multiple DataSources configuration- we need to add following code.
* - It is a good practice to provide Spring Batch database as #Primary to get the benefits of all default functionalities
* implemented by Spring Batch Statistics.
* - All insert and update batch job running statistics will be maintained by Spring Batch Itself.
* - No need to write any extra line of codes.
* Error: To use the default BatchConfigurer the context must contain no more than one DataSource, found 2
*/
#Bean
BatchConfigurer configurer(#Qualifier("testDataSource") DataSource dataSource){
return new DefaultBatchConfigurer(dataSource);
}
#Bean
public Job processJob() throws Exception{
return jobBuilderFactory.get("processJob")
.incrementer(new RunIdIncrementer())
.listener(listener())
.flow(orderStep1())
.end()
.build();
}
#Bean
public TaskExecutor taskExecutor(){
SimpleAsyncTaskExecutor asyncTaskExecutor=new SimpleAsyncTaskExecutor("spring_batch");
asyncTaskExecutor.setConcurrencyLimit(20);
return asyncTaskExecutor;
}
#Bean
public ItemReader<String> batchItemReader() {
Queue<String> dataList = new LinkedList<String>();
return new ItemReader<String>() {
#BeforeStep
public void beforeStep(StepExecution stepExecution) {
System.err.println("in before step...");
try {
if(inputFiles != null) {
for (int i = 0; i < inputFiles.length; i++) {
String fileName = inputFiles[i].getFile().getAbsolutePath();
try (Stream<String> stream = Files.lines(Paths.get(fileName))) {
stream.forEach( s -> dataList.add(s));
} catch (IOException e) {
e.printStackTrace();
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("fileList Size::"+dataList.size());
}
#Override
public synchronized String read()throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {
System.out.println("--> in item reader.........");
String fileName = null;
if (dataList.size() > 0) {
fileName = dataList.remove();
file_reading_cnt++;
}
return fileName;
}
#AfterStep
public void afterStep(StepExecution stepExecution) {
System.err.println("in after step..."+file_reading_cnt);
}
};
}
volatile int file_reading_cnt = 0;
#Bean
public ItemWriter<String> batchItemWriter(){
return new ItemWriter<String>() {
#Override
public void write(List<? extends String> fileList) throws Exception {
System.out.println("----- in item writer.........");
fileList.forEach(data -> {
try {
saveFileLog(data);
} catch (Exception e) {
e.printStackTrace();
}
});
}
};
}
/**
* To create a step, reader, processor and writer has been passed serially
*
* #return
*/
#Bean
public Step orderStep1() throws Exception{
return stepBuilderFactory.get("orderStep1").<String, String>chunk(20)
.reader(batchItemReader())
.writer(batchItemWriter())
.taskExecutor(taskExecutor())
.throttleLimit(20)
.build();
}
#Bean
public JobExecutionListener listener() {
return new BatchJobCompletionListener();
}
#Bean
public ResourcelessTransactionManager transactionManager() {
return new ResourcelessTransactionManager();
}
}

Reading Multiple excel File Using Spring Batch Extension

I am trying to read multiple excel files using Spring-Bath-Excel. In my scenario i don't know i advance how many files client will process i.e. if data would be very large, excel file will be split into multiple files like records1.xls ,records2.xls, records3.xls..
Is there any kind of MultiResourceItemReader available in Spring-Batch-Excel? I tried to set multiple resources at run time and also tried to use the patterns records*.xls but PoiItemReader did't allow me to do that .
I am using PoiItemReader for that .

To Read Multiple Excel
package com.abc.ingestion.job.dci;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.extensions.excel.RowMapper;
import org.springframework.batch.extensions.excel.streaming.StreamingXlsxItemReader;
import org.springframework.batch.extensions.excel.support.rowset.DefaultRowSetFactory;
import org.springframework.batch.extensions.excel.support.rowset.StaticColumnNameExtractor;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.file.MultiResourceItemReader;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
#Configuration
#EnableBatchProcessing
public class BatchConfig {
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
// Create input folder in resources
#Value("input/DCI*.xlsx")
private Resource[] inputResources;
#Bean
public MultiResourceItemReader<CouncilMapper> multiResourceItemReader() {
MultiResourceItemReader<CouncilMapper> resourceItemReader = new MultiResourceItemReader<>();
resourceItemReader.setResources(inputResources);
resourceItemReader.setDelegate(reader());
return resourceItemReader;
}
private RowMapper<CouncilMapper> excelRowMapper() {
return new Mapper();
}
#SuppressWarnings({ "rawtypes", "unchecked" })
#Bean
public StreamingXlsxItemReader<CouncilMapper> reader() {
final String[] COLUMNS = {"Reg_Type","RegUnder","registration_no","registration_date","course","Other_Course","LRegDate","council_name","full_name","CatName","Other_Category","father_name","mother_name","gender","nationality","date_of_birth","place_of_birth","permanent_address","business_address","current_city","current_state","permanent_city","mobile_number","OfficialTelephone","email","aadhar_number","PanNo","IsDeleted","CreatedDate","UpdatedDate","speciality_name"};
var factory = new DefaultRowSetFactory();
factory.setColumnNameExtractor(new StaticColumnNameExtractor(COLUMNS));
StreamingXlsxItemReader<CouncilMapper> reader = new StreamingXlsxItemReader<>();
reader.setLinesToSkip(1);
reader.setRowSetFactory(factory);
reader.setRowMapper(excelRowMapper());
return reader;
}
#Bean
ItemWriter<CouncilMapper> writer() {
return new Writer();
}
#Bean
public Job readFilesJob() {
return jobBuilderFactory
.get("readFilesJob")
.incrementer(new RunIdIncrementer())
.start(excelFileStep())
.build();
}
#Bean
public Step excelFileStep() {
return stepBuilderFactory.get("excelFileStep")
.<CouncilMapper, CouncilMapper>chunk(5)
.reader(multiResourceItemReader())
.writer(writer())
.build();
}
}
Mapper Class
package com.abc.ingestion.job.dci;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.IntStream;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.springframework.batch.extensions.excel.RowMapper;
import org.springframework.batch.extensions.excel.support.rowset.RowSet;
public class Mapper implements RowMapper<CouncilMapper> {
#Override
public CouncilMapper mapRow(RowSet rowSet) throws Exception {
var rowSetMetaData = rowSet.getMetaData();
String[] columnNames = rowSetMetaData.getColumnNames();
String[] rowData = rowSet.getCurrentRow();
var mapper = new ObjectMapper();
Map<String, String> excelData = new HashMap<>();
IntStream.range(0, columnNames.length).forEach(index -> excelData.put(columnNames[index], rowData[index]));
return mapper.convertValue(excelData, CouncilMapper.class);
}
}

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Spring Batch process different types in one step - spring-batch

Related

Why does JobBuilder says could not autowire?

Spring Batch removeJobExecutions fails

Error : Field job in com.example.partioner.DemoApplication required a bean of type 'org.springframework.batch.core.Job' that could not be found

How to archive processed file in Multi-threaded Step in Spring batch?

Reading Multiple excel File Using Spring Batch Extension

Categories

Resources