Database to File program query - spring-batch

I have a spring batch program which reads from a database and writes to a file.
Job is:
<job id="MyTransactionJob" job-repository="jobRepository" incrementer="dynamicJobParameters">
<step id="TransactionfileGenerator">
<tasklet transaction-manager="jobRepository-transactionManager">
<chunk reader="MyItemReader" writer="MyItemWriter" commit-interval="1000" skip-policy="skipPolicy"/>
</tasklet>
<listeners>
<listener ref="MySkipListener"/>
</listeners>
</step>
</job>
Item Reader is:
<beans:bean id="MyItemReader" class="org.springframework.batch.item.database.JdbcCursorItemReader">
<beans:property name="dataSource" ref="jobRepository-dataSource" />
<beans:property name="sql" value="${dbTofileDataReadSQL}"/>
<beans:property name="rowMapper">
<beans:bean class="com.mypackage.MyRowMapper" />
</beans:property>
</beans:bean>
dbTofileDataReadSQL is a simple select sql based on some condition. So if condition is not satisfied, 0 rows will be returned.
Item writer is:
<beans:bean id="MyItemWriter" class="com.mypackage.MyDbToFileItemWriter">
<beans:property name="delegate">
<beans:bean class="org.springframework.batch.item.file.FlatFileItemWriter">
<beans:property name="resource" value="file:c:\output.dat" />
<beans:property name="shouldDeleteIfExists" value="true"/>
<beans:property name="lineAggregator">
<beans:bean class="org.springframework.batch.item.file.transform.PassThroughLineAggregator" />
</beans:property>
</beans:bean>
</beans:property>
</beans:bean>
Issue is even if the number of rows returned are 0 an empty file will be created as the writer will always run.
Is it possible to put in a condition such that the file should be created if and only if there is atleast one row to be written. Else just skip the ItemWriter part completely.
Thanks for reading!!

the file will be opened when the step starts, you can
create a custom FlatFileItemWriter which uses the file in a lazy manner
create an afterstep which deletes the file if there are no written lines, this could be a more general solution, if you use a flow
use a simple shell script which checks the file after the job and deletes it if it is empty

Related

Spring Batch persist data from step1 to step2

I have thoroughly searched the Spring docs and supporting sites, but have not found and answer to this inquiry; if I want to access and store some values in the ExecutionContext, do I have to write custom databaseItemReader and ItemWriter's that implement the ItemStream or can I use the "out-of-the-box" readers and writers and edit the beans in the spring-batch-context.xml file to do this? Any code examples would be greatly appreciated. Thanks!
http://www.springframework.org/schema/batch/spring-batch-3.0.xsd
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.0.xsd">
<import resource="classpath:context-datasource.xml" />
<!-- JobRepository and JobLauncher are configuration/setup classes -->
<bean id="jobRepository"
class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean" />
<bean id="jobLauncher"
class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository" />
</bean>
<!-- ItemReader which reads from database and returns the row mapped by
rowMapper -->
<bean id="databaseItemReader"
class="org.springframework.batch.item.database.JdbcCursorItemReader">
<property name="dataSource" ref="dataSource" />
<property name="sql"
value="SELECT PartnerID, ftpUserName, ftpPassword, ftpPath, jobRunTime, jobFrequency FROM tblRosterJobParams" />
<property name="rowMapper">
<bean class="com.explorelearning.batch.ParamResultRowMapper" />
</property>
</bean>
<!-- This was supposed to change to a SavingItemWriter that persists these values to the Step ExecutionContext -->
<bean id="flatFileItemWriter" class="org.springframework.batch.item.file.FlatFileItemWriter"
scope="step">
<property name="resource" value="file:csv/ParamResult.txt" />
<property name="lineAggregator">
<!--An Aggregator which converts an object into delimited list of strings -->
<bean
class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
<property name="delimiter" value="," />
<property name="fieldExtractor">
<!-- Extractor which returns the value of beans property through reflection -->
<bean
class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
<property name="names" value="PartnerID" />
</bean>
</property>
</bean>
</property>
</bean>
<!-- Optional JobExecutionListener to perform business logic before and after the job -->
<bean id="jobListener" class="com.explorelearning.batch.RosterBatchJobListener" />
<!-- Optional StepExecutionListener to perform business logic before and after the job -->
<bean id="stepExecutionListener" class="com.explorelearning.batch.ParamResultStepExecutionListener" />
<!-- Optional ItemProcessor to perform business logic/filtering on the input records -->
<bean id="itemProcessor" class="com.explorelearning.batch.ParamResultItemProcessor" />
<!-- Step will need a transaction manager -->
<bean id="transactionManager"
class="org.springframework.batch.support.transaction.ResourcelessTransactionManager" />
<!-- Actual Job -->
<batch:job-repository id="jobRepository" data-source="dataSource" table-prefix="BATCH_"
transaction-manager="transactionManager" isolation-level-for-create="SERIALIZABLE" />
<batch:job id="RosterBatchJob" job-repository="jobRepository">
<batch:step id="readParams" >
<batch:tasklet transaction-manager="transactionManager" allow-start-if-complete="true">
<batch:chunk reader="databaseItemReader" writer="flatFileItemWriter"
processor="itemProcessor" commit-interval="10" />
</batch:tasklet>
</batch:step>
<!--<batch:step id="grabCSVs" next="validateCSVs">
</batch:step>
<batch:step id="validateCSVs" next="filterRecords">
</step>
<batch:step id="filterRecords" next="determineActions">
</batch:step>
<batch:step id="determineActions" next="executeActions">
</batch:step>
<batch:step id="executeActions" next="">
</batch:step> -->
</batch:job>
Updated now that the question has more detail...
Okay, the way I'm reading your context file, you need to:
Get some FTP login info from the Database
Download some CSV files
Validate the files (file-level? or just record-level validation?)
Filter out some garbage records
Determine some "actions"
Execute some "actions"
The best way to accomplish this (in my opinion) is to create the following steps:
Step 1: A simple Tasklet that queries the database for FTP login info and then downloads the CSV files to a local folder
Step 2: Partitioned step that creates one partition per CSV in the folder
Each partition will have a reader (FlatFileItemReader) that reads out records.
Records will then go to an ItemProcessor that returns null if the record is garbage.
Valid items will be written to some DB staging table for further action
Optionally you can use a ClassifierItemWriter to do something w/ the junk records
Step 3: The next step reads the valid data in the staging table and does "Actions"
Step 4: Maybe another step to do something with the junk records

spring batch remote chunking doesn't queue messages but runs it locally?

Below is my configuration for spring batch remote chunking. My steps are running locally instead of remotely. I cant see messages in rabbitmq.
<beans:bean id="importExchangesChunkItemWriter"
class="org.springframework.batch.integration.chunk.ChunkMessageChannelItemWriter"
scope="step" p:messagingOperations-ref="importExchangesMessagingTemplate"
p:replyChannel-ref="importExchangesReplyChannel">
</beans:bean>
<beans:bean id="importExchangesChunkHandler"
class="org.springframework.batch.integration.chunk.RemoteChunkHandlerFactoryBean"
p:chunkWriter-ref="importExchangesChunkItemWriter" p:step-ref="importExchangesStep">
</beans:bean>
<job id="importExchangesJob" restartable="true">
<step id="importExchangesStep" next="importEclsStep">
<tasklet transaction-manager="transactionManager">
<chunk reader="importExchangesFileItemReader" writer="importExchangesItemWriter"
commit-interval="${import.exchanges.commit.interval}" />
</tasklet>
</step>
</job>
<beans:bean id="passThroughItemProcessor" class="org.springframework.batch.item.support.PassThroughItemProcessor" />
<rabbit:connection-factory id="connectionFactory"
port="${rabbitmq.port}" host="${rabbitmq.host}" username="${rabbitmq.username}"
password="${rabbitmq.password}" />
<rabbit:template id="amqpTemplate" connection-factory="connectionFactory" />
<rabbit:admin id="rmqAdmin" connection-factory="connectionFactory" />
<rabbit:queue name="${import.exchanges.queue}" />
<rabbit:queue name="${import.exchanges.reply.queue}" />
<int:channel id="importExchangesChannel" />
<int:channel id="importExchangesReplyChannel" />
<beans:bean id="importExchangesMessagingTemplate"
class="org.springframework.integration.core.MessagingTemplate"
p:defaultChannel-ref="importExchangesChannel" p:receiveTimeout="${import.exchanges.reply.timeout}" />
<amqp:outbound-channel-adapter id="importExchangesOutboundAdapter"
channel="importExchangesChannel" />
<amqp:inbound-channel-adapter id="importExchangesInboundAdapter"
connection-factory="connectionFactory" channel="importExchangesReplyChannel"
queue-names="${import.exchanges.reply.queue}" />
<amqp:inbound-channel-adapter id="importExchangesSlaveInboundAdapter"
connection-factory="connectionFactory" channel="importExchangesChannel"
queue-names="${import.exchanges.queue}" />
<amqp:outbound-channel-adapter id="importExchangesSlaveOutboundAdapter"
channel="importExchangesReplyChannel" />
<int:service-activator id="serviceActivatorExchanges"
input-channel="importExchangesChannel" output-channel="importExchangesReplyChannel"
ref="chunkProcessorChunkHandlerExchanges" method="handleChunk" />
<beans:bean id="importExchangesItemWriter"
class="com.st.batch.foundation.ImportExchangesItemWriter" p:symfony-ref="symfony" p:replyTimeout="${import.exchanges.reply.timeout}"/>
<beans:bean id="chunkProcessorExchanges"
class="org.springframework.batch.core.step.item.SimpleChunkProcessor"
p:itemWriter-ref="importExchangesItemWriter" p:itemProcessor-ref="passThroughItemProcessor"/>
<beans:bean id="chunkProcessorChunkHandlerExchanges"
class="org.springframework.batch.integration.chunk.ChunkProcessorChunkHandler"
p:chunkProcessor-ref="chunkProcessorExchanges" />
Changed configuration to this, now it queue single message at a time and doesn't process multiple (should process number of messages equal to listener concurrency).
<beans:bean id="simpleThreadScope"
class="org.springframework.context.support.SimpleThreadScope" />
<util:map id="scopesMap">
<beans:entry key="thread" value-ref="simpleThreadScope" />
</util:map>
<beans:bean
class="org.springframework.beans.factory.config.CustomScopeConfigurer"
p:scopes-ref="scopesMap" />
<int:channel id="importExchangesChannel" />
<int:channel id="importExchangesReplyChannel" scope="thread">
<int:queue />
</int:channel>
<beans:bean id="importExchangesMessagingTemplate"
class="org.springframework.integration.core.MessagingTemplate"
p:defaultChannel-ref="importExchangesChannel" p:receiveTimeout="${import.exchanges.reply.timeout}" />
<amqp:outbound-channel-adapter
amqp-template="amqpTemplate" channel="importExchangesChannel"
exchange-name="${import.exchanges.exchange}" routing-key="${import.exchanges.routing.key}" />
<rabbit:listener-container
connection-factory="rabbitConnectionFactory" concurrency="${import.exchanges.listener.concurrency}"
requeue-rejected="false" prefetch="1">
<rabbit:listener queues="${import.exchanges.queue}"
ref="importExchangesChunkHandler" method="handleChunk" />
</rabbit:listener-container>
<int:channel id="importEclsChannel" />
<int:channel id="importEclsReplyChannel" scope="thread">
<int:queue />
</int:channel>
<beans:bean id="importEclsMessagingTemplate"
class="org.springframework.integration.core.MessagingTemplate"
p:defaultChannel-ref="importEclsChannel" p:receiveTimeout="${import.ecls.reply.timeout}" />
<amqp:outbound-channel-adapter
amqp-template="amqpTemplate" channel="importEclsChannel"
exchange-name="${import.ecls.exchange}" routing-key="${import.ecls.routing.key}" />
<rabbit:listener-container
connection-factory="rabbitConnectionFactory" concurrency="${import.ecls.listener.concurrency}"
requeue-rejected="false" prefetch="1">
<rabbit:listener queues="${import.ecls.queue}"
ref="importEclsChunkHandler" method="handleChunk" />
</rabbit:listener-container>
<beans:bean id="importExchangesItemWriter"
class="com.st.batch.foundation.ImportExchangesItemWriter"
p:symfony-ref="symfony" p:replyTimeout="${import.exchanges.reply.timeout}" />
<beans:bean id="importExchangesChunkItemWriter"
class="org.springframework.batch.integration.chunk.ChunkMessageChannelItemWriter"
scope="step" p:messagingOperations-ref="importExchangesMessagingTemplate"
p:replyChannel-ref="importExchangesReplyChannel">
</beans:bean>
<beans:bean id="importExchangesChunkHandler"
class="org.springframework.batch.integration.chunk.RemoteChunkHandlerFactoryBean"
p:chunkWriter-ref="importExchangesChunkItemWriter" p:step-ref="importExchangesStep">
</beans:bean>
<rabbit:queue name="${import.exchanges.queue}" />
<rabbit:queue name="${import.exchanges.reply.queue}" />
<rabbit:direct-exchange name="${import.exchanges.exchange}">
<rabbit:bindings>
<rabbit:binding queue="${import.exchanges.queue}"
key="${import.exchanges.routing.key}" />
</rabbit:bindings>
</rabbit:direct-exchange>
I can see only 1 message in queue at a time. I should sent messages = ${import.exchanges.commit.interval} and all should be picked up by concurrent listeners and processed parallely.
I am not sure what you mean by "running locally" but you don't have any routing information on the outbound adapters; if rabbit doesn't know how to route messages, he simply drops them.
You need to add routing-key="${import.exchanges.queue}" and routing-key="${import.exchanges.reply.queue}" to the adapters. This will use the default exchange ("") where the queues are bound using their names.
Also, you can't use the same channel name on both sides (importExchangesChannel). That way, the outbound adapter and service activator will both be subscribed and messages will be distributed in round-robin fashion.
So, some chunks will run locally; others will be dropped because of the routing key problem.
You need to fix the routing key and use a different channel on service side.

Passing data through two steps - Custom Field Mapper and Custom Tasklet

This is my job configuration:
<batch:job id="clientesJob" job-repository="jobRepository">
<batch:step id="step1" next="renameFiles">
<tasklet>
<chunk reader="multiResourceReader" writer="sqlWriter"
commit-interval="1" />
</tasklet>
</batch:step>
<batch:step id="renameFiles">
<tasklet ref="fileRenamingTasklet" />
</batch:step>
</batch:job>
<bean id="multiResourceReader"
class=" org.springframework.batch.item.file.MultiResourceItemReader">
<property name="resources" value="file:c:/cvs/basecli*" />
<property name="delegate" ref="flatFileItemReader" />
</bean>
<bean id="flatFileItemReader" class="org.springframework.batch.item.file.FlatFileItemReader">
<property name="lineMapper">
<bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
<property name="fieldSetMapper" ref="clienteMapper" />
<property name="lineTokenizer" ref="tickerLineTokenizer" />
</bean>
</property>
</bean>
<bean name="tickerLineTokenizer"
class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer" />
<bean id="clienteMapper" class="com.bind.mapper.ClienteFieldSetMapper">
</bean>
<bean id="fileRenamingTasklet" class="com.bind.tasklet.FileRenamingTasklet">
<property name="directory" value="file:c:/cvs/" />
</bean>
In the first step I'm reading the folder with a MultiResourceItemReader, then write it to a SQL Server.
The second one rename the files like "PROCESSFILE-{originalname}".
I thing I want to archive is in the first step there was a problem rename the file in a diferent way like "PROCESSERROR-{originalname}".
So I have to know the status of the first step in my FileRenamingTasklet.
I read about setting the data to the stepExecutionContext. But I cant access in ClienteFieldSetMapper.
I also try using listeners, but there i can't pass the data through.
For further considerations I need the file name and the status.
Any ideas?
Make your fileRenamingTasklet a StepExecutionListener and listen step1 afterStep result; in StepExecutionListener.afterStep(StepExecution stepExecution) check stepExecution.getExitStatus() and you are able to rename correctly your files.
To add listener you have to modify your xml as:
<batch:step id="step1" next="renameFiles">
<tasklet>
<chunk reader="multiResourceReader" writer="sqlWriter" commit-interval="1" />
</tasklet>
<listeners>
<listener ref="fileRenamingTasklet" />
</listeners>
</batch:step>

Spring Batch File Writer Exception handling

I have a Spring Batch process which has following kind of code.
<step id="step1" xmlns="http://www.springframework.org/schema/batch">
<tasklet allow-start-if-complete="true">
<chunk reader="reader1" writer="writer1" commit-interval="10"/>
</tasklet>
</step>
<bean id="writer1" class="org.springframework.batch.item.file.FlatFileItemWriter">
<property name="resource" ref="resourceFlatFile" />
<property name="shouldDeleteIfExists" value="true" />
<property name="transactional" value="true" />
<property name = "lineAggregator">
<bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator" >
<property name="delimiter" value=""/>
<property name ="fieldExtractor">
<bean class="com.path.MyExtractor" />
</property>
</bean>
</property>
</bean>
Basically my reader gives set of records from database. My writer (writer1) writes it to a flat file. If there is any problem in writing a record to the file, I would like to mark that record status as failed in database. So how to handle these kind of scenarios? Any help is appreciated.
Thanks
My question is if I get any kind of exception
I would recommend you look into using a ItemWriteListener and update the status of the failed records in the onWriteError implementation.

Making Spring Batch ItemReader restartable

The spring Batch program which I am working on is reading data from a table. It’s using ‘org.springframework.batch.item.database.JdbcCursorItemReader’ itemReader . Earlier the plan was to Alter table and add a PROCESSED_INDICATOR flag and prepopulate it with status ‘PENDING’. Once the record is processed and writer will update the status of PROCESSED_INDICATOR flag to ‘Processed’. This is to support re-startability . For example if batch picks up 1 million records and died in ½ million records then when I restart the batch; it should start where I have left off.
But unfortunately, management didn’t approve this solution. I am digging in ways to make itemreader re-startable. As per Spring documentation “Most ItemReaders have much more sophisticated restart logic. The JdbcCursorItemReader, for example, stores the row id of the last processed row in the Cursor.”
Does anyone have any sample example of such custom reader which implements JdbcCursorItemReader and stores last processed row in the cursor.
https://docs.spring.io/spring-batch/trunk/reference/html/readersAndWriters.html
==FULL XML CONFIGURATION==
<import resource="classpath:/batch/utility/skip/batch_skip.xml" />
<import resource="classpath:/batch/config/context-postgres.xml" />
<import resource="classpath:/batch/config/oracle-database.xml" />
<context:property-placeholder
location="classpath:/batch/jobs/TPF-1001-DD-01/TPF-1001-DD-01.properties" />
<bean id="gridSizePartitioner"
class="com.tpf.partitioner.GridSizePartitioner" />
<task:executor id="taskExecutor" pool-size="${pool.size}" />
<batch:job id="XYZJob" job-repository="jobRepository"
restartable="true">
<batch:step id="XYZSTEP">
<batch:description>Convert TIF files to PDF</batch:description>
<batch:partition partitioner="gridSizePartitioner">
<batch:handler task-executor="taskExecutor"
grid-size="${pool.size}" />
<batch:step>
<batch:tasklet allow-start-if-complete="true">
<batch:chunk commit-interval="${commit.interval}"
skip-limit="${job.skip.limit}">
<batch:reader>
<bean id="timeReader"
class="org.springframework.batch.item.database.JdbcCursorItemReader"
scope="step">
<property name="dataSource" ref="oracledataSource" />
<property name="sql">
<value>
select TIME_ID as timesheetId,count(*),max(CREATION_DATETIME) as creationDateTime , ILN_NUMBER as ilnNumber
from TS_FAKE_NAME
where creation_datetime >= '#{jobParameters['creation_start_date1']} 12.00.00.000000000 AM'
and creation_datetime < '#{jobParameters['creation_start_date2']} 11.59.59.999999999 PM'
and mod(time_id,${pool.size})=#{stepExecutionContext['partition.id']}
group by time_id ,ILN_NUMBER
</value>
</property>
<property name="rowMapper">
<bean
class="org.springframework.jdbc.core.BeanPropertyRowMapper">
<property name="mappedClass"
value="com.tpf.model.Time" />
</bean>
</property>
</bean>
</batch:reader>
<batch:processor>
<bean id="compositeItemProcessor"
class="org.springframework.batch.item.support.CompositeItemProcessor">
<property name="delegates">
<list>
<ref bean="timeProcessor" />
</list>
</property>
</bean>
</batch:processor>
<batch:writer>
<bean id="compositeItemWriter"
class="org.springframework.batch.item.support.CompositeItemWriter">
<property name="delegates">
<list>
<ref bean="timeWriter" />
</list>
</property>
</bean>
</batch:writer>
<batch:skippable-exception-classes>
<batch:include
class="com.utility.skip.BatchSkipException" />
</batch:skippable-exception-classes>
<batch:listeners>
<batch:listener ref="batchSkipListener" />
</batch:listeners>
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:partition>
</batch:step>
<batch:validator>
<bean
class="org.springframework.batch.core.job.DefaultJobParametersValidator">
<property name="requiredKeys">
<list>
<value>batchRunNumber</value>
<value>creation_start_date1</value>
<value>creation_start_date2</value>
</list>
</property>
</bean>
</batch:validator>
</batch:job>
<bean id="timesheetWriter" class="com.tpf.writer.TimeWriter"
scope="step">
<property name="dataSource" ref="dataSource" />
</bean>
<bean id="timeProcessor"
class="com.tpf.processor.TimeProcessor" scope="step">
<property name="dataSource" ref="oracledataSource" />
</bean>
Does anyone have any sample example of such custom reader which implements JdbcCursorItemReader and stores last processed row in the cursor
The JdbcCursorItemReader does that, see Javadoc, here is an excerpt:
ExecutionContext: The current row is returned as restart data,
and when restored from that same data, the cursor is opened and the current row
set to the value within the restart data.
So you don't need a custom reader.