Submit Spring job to same Spring context - spring-batch

Is it possible that I load Spring context in standalone JVM and submit all Spring batch to the loaded Spring context?
I am trying to avoid spring context loading for each spring job submission as I have resource which takes long time to load which ultimately slows down spring context loading.
I have below code in main method of class. I invoke the class through CLI by passing jobname which I want to invoke
public class SpringBatchJobRunner {
public static void main(String[] args) {
try {
parseArguments(args);
ApplicationContext context = new ClassPathXmlApplicationContext("classpath:com/demo/batch/context/spring-batch-context.xml");
JobLauncher jobLauncher = (JobLauncher) context.getBean("jobLauncher");
Job job = (Job) context.getBean(jobName);
JobParametersBuilder builder = new JobParametersBuilder();
builder.addDate("date", new Date());
JobExecution execution = jobLauncher.run(job, builder.toJobParameters());
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
static void parseArguments(String[] args) {
if (args.length > 0) {
jobName = args[0];
}
else {
throw new IllegalArgumentException("Please specify job name.");
}
if (args.length > 1) {
for (int i = 1; i < args.length; i++) {
String[] keyValue = args[i].split("=");
jobParameters.setProperty(keyValue[0], keyValue[1]);
}
}
}
}

Yes, a Spring application context can contain multiple job definitions. Once your application context is loaded, you can get job beans and launch them as needed.

Related

Spring Batch - Job executions launching with same Task execution id

I'm using Spring Batch with Spring Cloud Task for remote partitioning. But for each new Job execution it is created with the same task execution id. Is there any way to create a new task execution id for new job execution?
In the following Task Execution table, each job is running with same parent execution id.
For each new job execution it is starting within the same task execution. The code for Batch configuration is as follows:
#Bean
public PartitionHandler partitionHandler(TaskLauncher taskLauncher, JobExplorer jobExplorer, Environment environment, DelegatingResourceLoader delegatingResourceLoader, TaskRepository taskRepository) {
Resource resource = delegatingResourceLoader.getResource(jarLocation);
DeployerPartitionHandler partitionHandler = new DeployerPartitionHandler(taskLauncher, jobExplorer, resource, "workerStep", taskRepository);
List<String> commandLineArguments = new ArrayList<>(5);
commandLineArguments.add("--spring.profiles.active=worker");
commandLineArguments.add("--spring.cloud.task.initialize.enable=false");
commandLineArguments.add("--spring.batch.initializer.enabled=false");
commandLineArguments.add("--spring.cloud.task.closecontext_enabled=true");
commandLineArguments.add("--logging.level=DEBUG");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArguments));
partitionHandler.setEnvironmentVariablesProvider(new SimpleEnvironmentVariablesProvider(environment));
partitionHandler.setMaxWorkers(2);
partitionHandler.setApplicationName("BatchApplicationWorker");
return partitionHandler;
}
#Bean
#StepScope
public Partitioner partitioner(#Value("#{jobParameters['inputFiles']}") String file, #Value("#{jobParameters['partitionSize']}") String partitionSize1){
int partitionSize = Integer.parseInt(partitionSize1);
return new Partitioner() {
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> partitions = new HashMap<>();
String[] ids = fetchAllPrimaryKeys(file);
List<List<String>> partitionPayloads = splitPayLoad(ids, partitionSize);
int size = partitionPayloads.size();
for(int i = 0 ; i < size ; i++) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.put("partitionNumber", i);
executionContext.put("partitionPayLoad", new ArrayList<>(partitionPayloads.get(i)));
partitions.put("partition" + i, executionContext);
}
return partitions;
}
};
}
#Bean
public Step masterStep(Step workerStep, PartitionHandler partitionHandler) {
return this.stepBuilderFactory.get("masterStep")
.partitioner(workerStep.getName(), partitioner(null, null))
.step(workerStep)
.partitionHandler(partitionHandler)
.build();
}
#Bean
public Step workerStep(CustomWriter customWriter, CustomProcessor customProcessor) {
return this.stepBuilderFactory.get("workerStep")
.<User,User>chunk(10000)
.reader(reader(null))
.processor(customProcessor)
.writer(customWriter)
.build();
}
#Bean
public Job batchJob(Step masterStep, JobExecutionListnerClass jobExecutionListnerClass, JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("batchJob")
.incrementer(new RunIdIncrementer())
.start(masterStep)
.listener(jobExecutionListnerClass)
.build();
public Long jobRunner(JobParams jobParams) throws BatchException {
Map<String, JobParameter> maps = new HashMap<>();
maps.put(Constants.TIME, new JobParameter(System.currentTimeMillis()));
maps.put(Constants.INPUT_FILES, new JobParameter(jobParams.getInputSource()));
maps.put(Constants.PARTITION_SIZE, new JobParameter(Integer.toString(jobParams.getPartitionSize())));
maps.put(Constants.MAIL_RECIPIENTS, new JobParameter(jobParams.getMailRecipients()));
maps.put(Constants.JOB_NAME, new JobParameter(jobParams.getJobName()));
maps.put(Constants.JOB_DESCRIPTION, new JobParameter(jobParams.getJobDescription()));
maps.put(Constants.JOB_RESTART, new JobParameter(Boolean.toString(jobParams.getRestart())));
JobParameters jobParameters = new JobParameters(maps);
JobExecution jobExecution;
try {
jobExecution = jobLauncher.run(job, jobParameters);
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobInstanceAlreadyCompleteException
| JobParametersInvalidException e) {
throw new BatchException(e.getMessage());
}
return jobExecution.getId();
}

Reset scheduled job after completion

I have an scheduled job implemented with Spring batch. Right now when it finishes it doesn't start again because it is detected as completed, is it possible to reset its state after completion?
#Component
class JobScheduler {
#Autowired
private Job job1;
#Autowired
private JobLauncher jobLauncher;
#Scheduled(cron = "0 0/15 * * * ?")
public void launchJob1() throws Exception {
this.jobLauncher.run(this.job1, new JobParameters());
}
}
#Configuration
public class Job1Configuration{
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
#Bean
public Job job1() {
return this.jobBuilderFactory.get("job1")
.start(this.step1()).on(STEP1_STATUS.NOT_READY.get()).end()
.from(this.step1()).on(STEP1_STATUS.READY.get()).to(this.step2())
.next(this.step3())
.end()
.build();
}
}
I know I can set a job parameter with the time or the id, but this will launch a new execution every 15 minutes. I want to repeat the same execution until is completed without errors, and then, execute a new one.
You can't restart your job because you're setting the job status to COMPLETE by calling end() in .start(this.step1()).on(STEP1_STATUS.NOT_READY.get()).end().
You should instead either fail the job by calling .start(this.step1()).on(STEP1_STATUS.NOT_READY.get()).fail()
or stop the job by calling .start(this.step1()).on(STEP1_STATUS.NOT_READY.get()).stopAndRestart(step1())
Those options will mean the job status is either FAILED or STOPPED instead of COMPLETE which means that if you launch the job with the same JobParameters, it will restart the previous job execution.
See https://docs.spring.io/spring-batch/docs/current/reference/html/step.html#configuringForStop
To launch the job in a way that handles restarting previous instances or starting a new instance, you could look at how the SimpleJobService in spring-batch-admin does it and modify the launch method slightly for your purposes. This requires you to specify an incremental job parameter that is used to launch new instances of your job.
https://github.com/spring-attic/spring-batch-admin/blob/master/spring-batch-admin-manager/src/main/java/org/springframework/batch/admin/service/SimpleJobService.java#L250
#Override
public JobExecution launch(String jobName, JobParameters jobParameters) throws NoSuchJobException,
JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException,
JobParametersInvalidException {
JobExecution jobExecution = null;
if (jobLocator.getJobNames().contains(jobName)) {
Job job = jobLocator.getJob(jobName);
JobExecution lastJobExecution = jobRepository.getLastJobExecution(jobName, jobParameters);
boolean restart = false;
if (lastJobExecution != null) {
BatchStatus status = lastJobExecution.getStatus();
if (status.isUnsuccessful() && status != BatchStatus.ABANDONED) {
restart = true;
}
}
if (job.getJobParametersIncrementer() != null && !restart) {
jobParameters = job.getJobParametersIncrementer().getNext(jobParameters);
}
jobExecution = jobLauncher.run(job, jobParameters);
if (jobExecution.isRunning()) {
activeExecutions.add(jobExecution);
}
} else {
if (jsrJobOperator != null) {
// jobExecution = this.jobExecutionDao
// .getJobExecution(jsrJobOperator.start(jobName, jobParameters.toProperties()));
jobExecution = new JobExecution(jsrJobOperator.start(jobName, jobParameters.toProperties()));
} else {
throw new NoSuchJobException(String.format("Unable to find job %s to launch",
String.valueOf(jobName)));
}
}
return jobExecution;
}
I think the difficulty here comes from mixing scheduling with restartability. I would make each schedule execute a distinct job instance (for example by adding the run time as an identifying job parameter).
Now if a given schedule fails, it could be restarted separately until completion without affecting subsequent schedules. This can be done manually or programmtically in another scheduled method.
This is the solution I came up with after all the comments:
#Component
class JobScheduler extends JobSchedulerLauncher {
#Autowired
private Job job1;
#Scheduled(cron = "0 0/15 * * * ?")
public void launchJob1() throws Exception {
this.launch(this.job1);
}
}
public abstract class JobSchedulerLauncher {
#Autowired
private JobOperator jobOperator;
#Autowired
private JobExplorer jobExplorer;
public void launch(Job job) throws JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException,
JobParametersInvalidException, NoSuchJobException, NoSuchJobExecutionException, JobExecutionNotRunningException, JobParametersNotFoundException, UnexpectedJobExecutionException {
// Get the last instance
final List<JobInstance> jobInstances = this.jobExplorer.findJobInstancesByJobName(job.getName(), 0, 1);
if (CollectionUtils.isNotEmpty(jobInstances)) {
// Get the last executions
final List<JobExecution> jobExecutions = this.jobExplorer.getJobExecutions(jobInstances.get(0));
if (CollectionUtils.isNotEmpty(jobExecutions)) {
final JobExecution lastJobExecution = jobExecutions.get(0);
if (lastJobExecution.isRunning()) {
this.jobOperator.stop(lastJobExecution.getId().longValue());
this.jobOperator.abandon(lastJobExecution.getId().longValue());
} else if (lastJobExecution.getExitStatus().equals(ExitStatus.FAILED) || lastJobExecution.getExitStatus().equals(ExitStatus.STOPPED)) {
this.jobOperator.restart(lastJobExecution.getId().longValue());
return;
}
}
}
this.jobOperator.startNextInstance(job.getName());
}
}
My job now uses an incrementer, based on this one https://docs.spring.io/spring-batch/docs/current/reference/html/job.html#JobParametersIncrementer:
#Bean
public Job job1() {
return this.jobBuilderFactory.get("job1")
.incrementer(new CustomJobParameterIncrementor())
.start(this.step1()).on(STEP1_STATUS.NOT_READY.get()).end()
.from(this.step1()).on(STEP1_STATUS.READY.get()).to(this.step2())
.next(this.step3())
.end()
.build();
}
In my case my scheduler won't start 2 instances of the same job at the same time, so if I detect a running job in this code it means that the server restarted leaving the job with status STARTED, that's why I stop it and abandon it.

Spring Batch 4.0 ~ Code under JdbcCursorItemReader method doesn't run with #StepScope defined

I have a sql query defined in my batch job that needs to get input at runtime from the user.
I have the following item reader in my batch job defined as follows
#StepScope
#Bean
public JdbcCursorItemReader<QueryCount> queryCountItemReader() throws Exception {
ListPreparedStatementSetter preparedStatementSetter = new ListPreparedStatementSetter() {
#Override
public void setValues(PreparedStatement pstmt) throws SQLException {
pstmt.setString(1, "#{jobparameters[fromDate]}");
pstmt.setString(2, "#{jobparameters[toDate]}");
pstmt.setString(3, "#{jobparameters[fromDate]}");
pstmt.setString(4, "#{jobparameters[toDate]}");
pstmt.setString(5, "#{jobparameters[fromDate]}");
pstmt.setString(6, "#{jobparameters[toDate]}");
pstmt.setString(7, "#{jobparameters[eventType]}");
pstmt.setString(8, "#{jobparameters[businessUnit]}");
pstmt.setString(9, "#{jobparameters[deviceCategory]}");
pstmt.setString(10, "#{jobparameters[numberOfSearchIds]}");
}
};
JdbcCursorItemReader<QueryCount> queryCountJdbcCursorItemReader = new JdbcCursorItemReader<>();
queryCountJdbcCursorItemReader.setDataSource(dataSource);
queryCountJdbcCursorItemReader.setSql(sqlQuery);
queryCountJdbcCursorItemReader.setRowMapper(new QueryCountMapper());
queryCountJdbcCursorItemReader.setPreparedStatementSetter(preparedStatementSetter);
int counter = 0;
ExecutionContext executionContext = new ExecutionContext();
queryCountJdbcCursorItemReader.open(executionContext);
try {
QueryCount queryCount;
while ((queryCount = queryCountJdbcCursorItemReader.read()) != null) {
System.out.println(queryCount.toString());
counter++;
}
}catch (Exception e){
e.printStackTrace();
}finally {
queryCountJdbcCursorItemReader.close();
}
return queryCountJdbcCursorItemReader;
}
I am sending in the job parameters from my application class as follows
JobParameters jobParameters = new JobParametersBuilder()
.addString("fromDate", "20180410")
.addString("toDate", "20180410")
.addString("eventType", "WEB")
.addString("businessUnit", "UPT")
.addString("numberOfSearchIds", "10")
.toJobParameters();
JobExecution execution = jobLauncher.run(job, jobParameters);
The issue is, when I run my batch job the code inside the queryCountItemReader() method is never executed and the job completes with no errors. Essentially the sql query I am trying to run never executes. If I remove the #StepScope annotation the code will then run but fail with an error since it is enable to bind the parameters sent in from the application class to the sql query. I realize that #StepScope is necessary to use job parameters but why doesn't the code in my method execute?
Solved this by adding #EnableBatchProcessing & #EnableAutoConfigurationannotations and changing the item reader method definition as follows,
#StepScope
#Bean
public JdbcCursorItemReader<QueryCount> queryCountItemReader(#Value("#{jobParameters['fromDate']}") String fromDate,
#Value("#{jobParameters['toDate']}") String toDate,
#Value("#{jobParameters['eventType']}") String eventType,
#Value("#{jobParameters['businessUnit']}") String businessUnit,
#Value("#{jobParameters['deviceCategory']}") String deviceCategory,
#Value("#{jobParameters['numberOfSearchIds']}") String numberOfSearchIds) throws Exception {

How do I populate mongo repositories automatically?

Using Spring Data MongoDB with MongoRepository. I have this bean
#Bean
public Jackson2RepositoryPopulatorFactoryBean repositoryPopulator() {
Jackson2RepositoryPopulatorFactoryBean factory = new Jackson2RepositoryPopulatorFactoryBean();
try {
factory.setResources(resourceResolver.getResources("classpath:static/collections/*.json"));
} catch (IOException e) {
log.error("Could not load data", e);
}
return factory;
}
which just works fine with fongo (db is dropped at the end of a test run) but not with real mongo. If I leave the bean as it is and I switch to real mongo instance, then I get my data base populated but only the first run, if I re-run the project (+tests) then it fails because it's already populated (getting DuplicateKeyException).
How do I populate only on the case the repositories are empty?
Consider using data migration tools like Mongobee. This is basically Liquibase/Flyway for MongoDB.
#Bean
public Jackson2RepositoryPopulatorFactoryBean repositoryPopulator() throws Exception {
Jackson2RepositoryPopulatorFactoryBean factory = new Jackson2RepositoryPopulatorFactoryBean();
try {
Resource[] resources = resourceResolver.getResources("classpath:static/collections/*.json");
//resources to list so I can add only the necessary resources
List<Resource> resourcesToFill = new ArrayList<>();
for (Resource r : resources) {
String collection = r.getFilename().substring(0, r.getFilename().length() - 5);
if (!mongoTemplate().collectionExists(collection))
resourcesToFill.add(r);
}
//back to Array...
resources = new Resource[resourcesToFill.size()];
for(int i=0; i<resources.length; i++)
resources[i] = resourcesToFill.get(i);
factory.setResources(resources); // <-- the reason of this shitty code, why the hell use Array?
} catch (IOException e) {
log.error("Could not load data", e);
}
return factory;
}

How to exclude job parameter from uniqueness in Spring Batch?

I am trying to launch a job in Spring Batch 2, and I need to pass some information in the job parameters, but I do not want it to count for the uniqueness of the job instance. For example, I'd want these two sets of parameters to be considered unique:
file=/my/file/path,session=1234
file=/my/file/path,session=5678
The idea is that there will be two different servers trying to start the same job, but with different sessions attached to them. I need that session number in both cases. Any ideas?
Thanks!
So, if 'file' is the only attribute that's supposed to be unique and 'session' is used by downstream code, then your problem matches almost exactly what I had. I had a JMSCorrelationId that i needed to store in the execution context for later use and I didn't want it to play into the job parameters' uniqueness. Per Dave Syer, this really wasn't possible, so I took the route of creating the job with the parameters (not the 'session' in your case), and then adding the 'session' attribute to the execution context before anything actually runs.
This gave me access to 'session' downstream but it was not in the job parameters so it didn't affect uniqueness.
References
https://jira.springsource.org/browse/BATCH-1412
http://forum.springsource.org/showthread.php?104440-Non-Identity-Job-Parameters&highlight=
You'll see from this forum that there's no good way to do it (per Dave Syer), but I wrote my own launcher based on the SimpleJobLauncher (in fact I delegate to the SimpleLauncher if a non-overloaded method is called) that has an overloaded method for starting a job that takes a callback interface that allows contribution of parameters to the execution context while not being 'true' job parameters. You could do something very similar.
I think the applicable LOC for you is right here:
jobExecution = jobRepository.createJobExecution(job.getName(),
jobParameters);
if (contributor != null) {
if (contributor.contributeTo(jobExecution.getExecutionContext())) {
jobRepository.updateExecutionContext(jobExecution);
}
}
which is where, after execution context creatin, the execution context is added to. Hopefully this helps you in your implementation.
public class ControlMJobLauncher implements JobLauncher, InitializingBean {
private JobRepository jobRepository;
private TaskExecutor taskExecutor;
private SimpleJobLauncher simpleLauncher;
private JobFilter jobFilter;
public void setJobRepository(JobRepository jobRepository) {
this.jobRepository = jobRepository;
}
public void setTaskExecutor(TaskExecutor taskExecutor) {
this.taskExecutor = taskExecutor;
}
/**
* Optional filter to prevent job launching based on some specific criteria.
* Jobs that are filtered out will return success to ControlM, but will not run
*/
public void setJobFilter(JobFilter jobFilter) {
this.jobFilter = jobFilter;
}
public JobExecution run(final Job job, final JobParameters jobParameters, ExecutionContextContributor contributor)
throws JobExecutionAlreadyRunningException, JobRestartException,
JobInstanceAlreadyCompleteException, JobParametersInvalidException, JobFilteredException {
Assert.notNull(job, "The Job must not be null.");
Assert.notNull(jobParameters, "The JobParameters must not be null.");
//See if job is filtered
if(this.jobFilter != null && !jobFilter.launchJob(job, jobParameters)) {
throw new JobFilteredException(String.format("Job has been filtered by the filter: %s", jobFilter.getFilterName()));
}
final JobExecution jobExecution;
JobExecution lastExecution = jobRepository.getLastJobExecution(job.getName(), jobParameters);
if (lastExecution != null) {
if (!job.isRestartable()) {
throw new JobRestartException("JobInstance already exists and is not restartable");
}
logger.info(String.format("Restarting job %s instance %d", job.getName(), lastExecution.getId()));
}
// Check the validity of the parameters before doing creating anything
// in the repository...
job.getJobParametersValidator().validate(jobParameters);
/*
* There is a very small probability that a non-restartable job can be
* restarted, but only if another process or thread manages to launch
* <i>and</i> fail a job execution for this instance between the last
* assertion and the next method returning successfully.
*/
jobExecution = jobRepository.createJobExecution(job.getName(),
jobParameters);
if (contributor != null) {
if (contributor.contributeTo(jobExecution.getExecutionContext())) {
jobRepository.updateExecutionContext(jobExecution);
}
}
try {
taskExecutor.execute(new Runnable() {
public void run() {
try {
logger.info("Job: [" + job
+ "] launched with the following parameters: ["
+ jobParameters + "]");
job.execute(jobExecution);
logger.info("Job: ["
+ job
+ "] completed with the following parameters: ["
+ jobParameters
+ "] and the following status: ["
+ jobExecution.getStatus() + "]");
} catch (Throwable t) {
logger.warn(
"Job: ["
+ job
+ "] failed unexpectedly and fatally with the following parameters: ["
+ jobParameters + "]", t);
rethrow(t);
}
}
private void rethrow(Throwable t) {
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
} else if (t instanceof Error) {
throw (Error) t;
}
throw new IllegalStateException(t);
}
});
} catch (TaskRejectedException e) {
jobExecution.upgradeStatus(BatchStatus.FAILED);
if (jobExecution.getExitStatus().equals(ExitStatus.UNKNOWN)) {
jobExecution.setExitStatus(ExitStatus.FAILED
.addExitDescription(e));
}
jobRepository.update(jobExecution);
}
return jobExecution;
}
static interface ExecutionContextContributor {
boolean CONTRIBUTED_SOMETHING = true;
boolean CONTRIBUTED_NOTHING = false;
/**
*
* #param executionContext
* #return true if the exeuctioncontext was contributed to
*/
public boolean contributeTo(ExecutionContext executionContext);
}
#Override
public void afterPropertiesSet() throws Exception {
Assert.state(jobRepository != null, "A JobRepository has not been set.");
if (taskExecutor == null) {
logger.info("No TaskExecutor has been set, defaulting to synchronous executor.");
taskExecutor = new SyncTaskExecutor();
}
this.simpleLauncher = new SimpleJobLauncher();
this.simpleLauncher.setJobRepository(jobRepository);
this.simpleLauncher.setTaskExecutor(taskExecutor);
this.simpleLauncher.afterPropertiesSet();
}
#Override
public JobExecution run(Job job, JobParameters jobParameters)
throws JobExecutionAlreadyRunningException, JobRestartException,
JobInstanceAlreadyCompleteException, JobParametersInvalidException {
return simpleLauncher.run(job, jobParameters);
}
}
Starting from spring batch 2.2.x, there is support for non-identifying parameters. If you are using CommandLineJobRunner, you can specify non-identifying parameters with '-' prefix.
For example:
java org.springframework.batch.core.launch.support.CommandLineJobRunner file=/my/file/path -session=5678
If you are using old version of spring batch, you need to migrate your database schema. See 'Migrating to 2.x.x' section at http://docs.spring.io/spring-batch/getting-started.html.
This is the Jira page of the feature https://jira.springsource.org/browse/BATCH-1412, and here are the change that implement it https://fisheye.springsource.org/changelog/spring-batch?cs=557515df45c0f596588418d53c3f2bae3781c1c3
In more recent versions of Spring Batch (I am using spring-batch-core:4.3.3), you can use the JobParametersBuilder to specify whether a parameter is identifying or not. For example:
new JobParametersBuilder()
.addString("identifying-param-name", paramValue1)
.addString("non-identifying-param-name", paramValue2, false)
.toJobParameters();
The 'false' in the third argument makes the parameter non-identifying.