ignite client is taking long time to start when we are connecting to multiple nodes - cluster-analysis

scenario , i have two server nodes in beginning and when we are trying to connect client nodes taking 15+ min to start client. please find below server configuration. only change is IP address for another server nd, and on console i am getting below error thanks in advance
[12:42:10] Possible failure suppressed accordingly to a configured handler [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0, super=AbstractFailureHandler [ignoredFailureTypes=UnmodifiableSet [SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]], failureCtx=FailureContext [type=SYSTEM_WORKER_BLOCKED, err=class o.a.i.IgniteException: GridWorker [name=tcp-comm-worker, igniteInstanceName=null, finished=false, heartbeatTs=1600672317715]]] [12:42:40,486][SEVERE][tcp-disco-msg-worker-[5023dc59 172.16.0.189:48510]-#2][G] Blocked system-critical thread has been detected. This can lead to cluster-wide undefined behaviour [workerName=tcp-comm-worker, threadName=tcp-comm-worker-#1, blockedFor=18s] [12:42:40] Possible failure suppressed accordingly to a configured handler [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0, super=AbstractFailureHandler [ignoredFailureTypes=UnmodifiableSet [SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]], failureCtx=FailureContext [type=SYSTEM_WORKER_BLOCKED, err=class o.a.i.IgniteException: GridWorker [name=tcp-comm-worker, igniteInstanceName=null, finished=false, heartbeatTs=1600672341604]]] [12:42:49,498][SEVERE][tcp-disco-msg-worker-[5023dc59 172.16.0.189:48510]-#2][G] Blocked system-critical thread has been detected. This can lead to cluster-wide undefined behaviour [workerName=tcp-comm-worker, threadName=tcp-comm-worker-#1, blockedFor=27s] [12:42:49] Possible failure suppressed accordingly to a configured handler [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0, super=AbstractFailureHandler [ignoredFailureTypes=UnmodifiableSet [SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]], failureCtx=FailureContext [type=SYSTEM_WORKER_BLOCKED, err=class o.a.i.IgniteException: GridWorker [name=tcp-comm-worker, igniteInstanceName=null, finished=false, heartbeatTs=1600672341604]]] [12:43:01,603][SEVERE][tcp-disco-msg-worker-[5023dc59 172.16.0.189:48510]-#2][G] Blocked system-critical thread has been detected. This can lead to cluster-wide undefined behaviour [workerName=tcp-comm-worker, threadName=tcp-comm-worker-#1, blockedFor=39s]
``
-->
-->
<!-- <property name="consistentId" value="#{ systemEnvironment['IGNITE_CONSISTENT_ID'] }" /> -->
<!-- Enable task execution events for examples. -->
<property name="dataStorageConfiguration">
<bean class="org.apache.ignite.configuration.DataStorageConfiguration">
<property name="defaultDataRegionConfiguration">
<bean class="org.apache.ignite.configuration.DataRegionConfiguration">
<property name="persistenceEnabled" value="true" />
<property name="maxSize" value="#{4L * 1024 * 1024 * 1024}"/>
<property name="initialSize" value="#{1L * 1024 * 1024 * 1024}"/>
</bean>
</property>
</bean>
</property>
<!-- Explicitly configure TCP discovery SPI to provide list of initial nodes. -->
<property name="discoverySpi">
<bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
<property name="localPort" value="48510"/>
<property name="ipFinder">
<!--
Ignite provides several options for automatic discovery that can be used
instead os static IP based discovery. For information on all options refer
to our documentation: http://apacheignite.readme.io/docs/cluster-config
-->
<!-- Uncomment static IP finder to enable static-based discovery of initial nodes. -->
<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
<!-- <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.multicast.TcpDiscoveryMulticastIpFinder"> -->
<property name="addresses">
<list>
<!-- In distributed environment, replace with actual host IP address. -->
<value>127.0.0.1:48510..48512</value>
<value>X.16.0.X:48510..48512</value>
</list>
</property>
</bean>
</property>
</bean>
</property>
<property name="communicationSpi">
<bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
<property name="localPort" value="48110"/>
<!-- <property name="localPortRange" value="1000"/> -->
</bean>
</property>
<property name="clientConnectorConfiguration">
<bean class="org.apache.ignite.configuration.ClientConnectorConfiguration">
<property name="port" value="10801"/>
</bean>
</property>
<property name="userAttributes">
<map>
<entry key="ROLE" value="SecindNode" />
</map>
</property>
</bean>
``
Client Code
``
public final class IgniteConnectionUtil {
private static final Logger logger = Logger.getLogger(IgniteConnectionUtil.class);
private static IgniteConnectionUtil instance;
private static Ignite ignite;
private static String CACHE_NAME = "CollectionCache";
private static String jdbcThinHost = null;
private IgniteConnectionUtil() {
if(ignite == null)
init();
try {
boolean clearRedisMap = ConfigurationManager.getInstance().getPropertyAsBoolean("CLEAR_REDIS_MAP",
"IN_MEMORY_DB", "CONFIG");
if (clearRedisMap)
InMemoryTableStore.getInstance().clearStore();
} catch (Exception e) {
logger.info("Unable to clear ignite-redis map");
}
}
public static synchronized void init() {
try {
if(!isIgniteEnabled() || ignite != null)
return;
logger.info("Ignite Client starting");
Ignition.setClientMode(true);
DataStorageConfiguration storageCfg = new DataStorageConfiguration();
storageCfg.setWalMode(WALMode.BACKGROUND);
IgniteConfiguration cfg = new IgniteConfiguration();
cfg.setDataStorageConfiguration(storageCfg);
cfg.setPeerClassLoadingEnabled(true);
TcpDiscoverySpi discoverySpi = new TcpDiscoverySpi();
TcpDiscoveryVmIpFinder ipFinder = new TcpDiscoveryVmIpFinder();
String serverIp = ConfigurationManager.getInstance()
.getPropertyAsString("SERVER_ADDRESS", "IN_MEMORY_DB", "CONFIG");
//ipFinder.setAddresses(Arrays.asList(serverIp));
ipFinder.setAddresses(
Arrays.asList("127.0.0.1:48510","127.0.0.1:48511","127.0.0.1:48512",
"X.16.0.189:48510","X.16.0.X:48511","X.16.0.X:48512"
));
discoverySpi.setLocalPort(48510);
// timeout for which client node will try to connect to ignite servers
// it will throw exception and exit if server can not be found
long discoveryTimeout = ConfigurationManager.getInstance()
.getPropertyAsLong("DISCOVERY_TIMEOUT", "IN_MEMORY_DB", "CONFIG");
discoverySpi.setIpFinder(ipFinder).setJoinTimeout(discoveryTimeout);
TcpCommunicationSpi commSpi = new TcpCommunicationSpi();
long communicationTimeout = ConfigurationManager.getInstance()
.getPropertyAsLong("COMMUNICATION_TIMEOUT", "IN_MEMORY_DB", "CONFIG");
commSpi.setConnectTimeout(communicationTimeout).setLocalPort(48110);
// this timeout is used to reconnect client to server if server has failed/restarted
long clientFailureDetectionTimeout = ConfigurationManager.getInstance()
.getPropertyAsLong("CLIENT_FAILURE_DETECTION_TIMEOUT", "IN_MEMORY_DB", "CONFIG");
cfg.setClientFailureDetectionTimeout(30000);
cfg.setDiscoverySpi(discoverySpi);
cfg.setCommunicationSpi(commSpi);
//cfg.setIncludeEventTypes(EventType.EVT_NODE_JOINED);
ignite = Ignition.start(cfg);
ignite.cluster().active(true);
ignite.cluster().baselineAutoAdjustEnabled(true);
ignite.cluster().baselineAutoAdjustTimeout(30000);
initializeJDBCThinDriver();
//igniteEventListen();
logger.info("Ignite Client started");
} catch (Exception e) {
logger.error("Error in starting ignite cluster", e);
}
}
public static synchronized IgniteConnectionUtil getInstance() {
if (instance == null) {
instance = new IgniteConnectionUtil();
} else {
try {
if(ignite == null || ignite.cluster() == null) {
logger.error("Illegal Ignite state. Will try to restart ignite clinet.");
init();
} else if(Ignition.state().equals(IgniteState.STOPPED_ON_SEGMENTATION)) {
logger.error("Reconnecting to Ignite");
ignite = null;
init();
}else if(!ignite.cluster().active())
ignite.cluster().active(true);
} catch(Exception e) {
logger.error("Ignite Exception. Please restart ignite server.");
}
}
return instance;
}
public static void initializeJDBCThinDriver() {
try {
Class.forName("org.apache.ignite.IgniteJdbcThinDriver");
jdbcThinHost = ConfigurationManager.getInstance()
.getPropertyAsString("JDBC_THIN_HOST", "IN_MEMORY_DB", "CONFIG");
} catch (ClassNotFoundException e) {
logger.error("Error in loading IgniteJdbcThinDriver class", e);
}
}
public Connection getJDBCConnection() {
Connection conn = null;
try {
conn = DriverManager.getConnection("jdbc:ignite:thin://"+jdbcThinHost+"/");
if(conn == null )
{
conn = DriverManager.getConnection("jdbc:ignite:thin://172.16.0.189:10801/");
}
} catch (SQLException e) {
logger.error("Error in getting Ignite JDBC connection", e);
}
return conn;
}
public IgniteCache<?, ?> getOrCreateCache(String cacheName) {
CacheConfiguration<?, ?> cacheConfig = new CacheConfiguration<>(CACHE_NAME);
//cacheConfig.setDataRegionName("500MB_Region");
cacheConfig.setCacheMode(CacheMode.PARTITIONED);
cacheConfig.setBackups(1);
cacheConfig.setRebalanceMode(CacheRebalanceMode.ASYNC);
cacheConfig.setAtomicityMode(CacheAtomicityMode.ATOMIC);
cacheConfig.setWriteSynchronizationMode(CacheWriteSynchronizationMode.PRIMARY_SYNC);
cacheConfig.setReadFromBackup(true);
cacheConfig.setCopyOnRead(true);
cacheConfig.setOnheapCacheEnabled(true);
cacheConfig.setSqlSchema("PUBLIC");
if(ignite != null) {
return ignite.getOrCreateCache(cacheConfig);
}else {
throw new IgniteSQLException("Internal Server Error Please contact support");
}
}
public IgniteCache<?, ?> getOrCreateCache() {
CacheConfiguration<?, ?> cacheConfig = new CacheConfiguration<>(CACHE_NAME);
//cacheConfig.setDataRegionName("500MB_Region");
cacheConfig.setCacheMode(CacheMode.PARTITIONED);
cacheConfig.setBackups(1);
cacheConfig.setRebalanceMode(CacheRebalanceMode.ASYNC);
cacheConfig.setAtomicityMode(CacheAtomicityMode.ATOMIC);
cacheConfig.setWriteSynchronizationMode(CacheWriteSynchronizationMode.PRIMARY_SYNC);
cacheConfig.setReadFromBackup(true);
cacheConfig.setCopyOnRead(true);
cacheConfig.setOnheapCacheEnabled(true);
cacheConfig.setSqlSchema("PUBLIC");
if(ignite != null) {
return ignite.getOrCreateCache(cacheConfig);
}else {
throw new IgniteSQLException("Internal Server Error Please contact support");
}
}
public static synchronized void shutdown() throws Exception {
try {
if(ignite != null) {
ignite.close();
}
} catch(IgniteException ie) {
throw new Exception(ie);
} finally {
ignite = null;
}
}
public static boolean isIgniteEnabled() throws Exception {
return ConfigurationManager.getInstance().getPropertyAsBoolean("ENABLED",
"IN_MEMORY_DB");
}
}
``

Blocked system-critical thread has been detected. This can lead to cluster-wide undefined behaviour [workerName=tcp-comm-worker, threadName=tcp-comm-worker-#1, blockedFor=18s]
This would likely mean that server node cannot connect to client's communication port (47100), or vice versa. In 2.8.1 or earlier, it needs to be traversable in both directions. In 2.9, new operation mode will be introduced where server will never try to connect to client, only the traditional way around.

Related

Is It possible to support sync and async Application Events in Spring[5]

<bean id="applicationEventMulticaster"
class="com.test.listener.CustomApplicationEventMulticaster">
<property name="taskExecutor" >
<bean class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
<property name="maxPoolSize" value="10"/>
<property name="corePoolSize" value="10"/>
<property name="waitForTasksToCompleteOnShutdown" value="true"/>
<property name="awaitTerminationSeconds" value="200"/>
</bean>
</property>
</bean>
public class CustomApplicationEventMulticaster extends SimpleApplicationEventMulticaster {
#Override
public void multicastEvent(final ApplicationEvent event, ResolvableType eventType) {
boolean async = (event instanceof AbstractApplicationEvent) ? ((AbstractApplicationEvent) event).isAsyncEvent()
: true;
final SecurityContext sc = SecurityContextHolder.getContext();
ResolvableType defaultEventType = ResolvableType.forInstance(event);
for (final ApplicationListener listener : getApplicationListeners(event, defaultEventType)) {
Executor executor = getTaskExecutor();
if (async && executor != null) {
executor.execute(() -> {
try {
SecurityContextHolder.setContext(sc);
listener.onApplicationEvent(event);
} finally {
SecurityContextHolder.clearContext();
}
});
} else {
listener.onApplicationEvent(event);
}
}
}
}
In Application, I am trying to trigger sync and async event.
It is this fine to do?
executor.execute(() -> {
try {
SecurityContext emptyContext = SecurityContextHolder.createEmptyContext();
emptyContext.setAuthentication(sc.getAuthentication());
listener.onApplicationEvent(event);
} finally {
SecurityContextHolder.clearContext();
}
});

Hibernate Search not releasing db session

We use Hibernate search(version 3.1) and Lucene (version 2.4) for indexing the content runs on Jboss 7.2. Database team had reported huge database session spike. They reported db session go to idle after serving few requests. Here is the code:
public void updateDocumentByIds(IndexMessage indexMessage, java.io.Serializable[] entityPKs,FullTextSession session, boolean isSelfRebuild) throws AAException
{
if (indexMessage.getServProvCode() == null && indexMessage.getSourceNumber() == null) {
logger.error("Agency Code and Source Number are null!");
return;
}
TransactionManager transactionManager = null;
boolean isNewTransaction = false;
FullTextSession searchSession = session;
try
{
IIndexAdapter indexAdapter = IndexAdapter.getIndexAdapter(indexMessage.getEntityType());
IndexDirectoryManager directoryManager = IndexDirectoryManager.getInstance();
String specifyIndexName = directoryManager.getSpecifyIndexName(indexMessage.getServProvCode());
if (searchSession == null)
{
transactionManager = getTransactionManager();
transactionManager.begin();
entityManager = getEntityManager();
Session hibernateSession = (Session) entityManager.getDelegate();
searchSession = SwitchSession.getFullTextSession(hibernateSession, specifyIndexName);
searchSession.setFlushMode(FlushMode.MANUAL); // disable flush operations
searchSession.setCacheMode(CacheMode.IGNORE); // disable 2nd level cache operations
isNewTransaction = true;
}
AgencyModel agencyModel = null;
if (indexMessage.getServProvCode() != null)
{
agencyModel = getAgencyByAgencyCode(searchSession, indexMessage.getServProvCode().toUpperCase());
if (agencyModel == null)
{
logger.warn("No such agency:" + indexMessage.getServProvCode());
return;
}
}
else if (indexMessage.getSourceNumber() == null)
{
logger.warn("Please specify one Agency Code or Source Number!");
return;
}
directoryManager.chooseSyncOrSearchDirectory(specifyIndexName, indexMessage.getEntityType(),
ActionType.SYNC);
for (java.io.Serializable entityPK : entityPKs)
{
try
{
logger.info("========Start Update Index==========");
logger.info("Entity Type: " + indexMessage.getEntityType());
logger.info("Primary Key: " + entityPK.toString());
Object object = indexAdapter.getObjectByPK(searchSession, entityPK, agencyModel);
if (object == null)
{
// Remove index when the data was deleted from DB
searchSession.purge(EntityMapHelper.getEntityClass(indexMessage.getEntityType()), entityPK);
}
else
{
// Update Index when this entity can be found in DB
searchSession.index(object);
}
// While the related index is rebuilding, this record need be tracked into a LOG table.
if (!isSelfRebuild
&& directoryManager.needTrackForSync(searchSession, specifyIndexName, indexMessage
.getEntityType()))
{
saveToUnindexedData(indexMessage, indexMessage.getServProvCode(), entityPK);
}
logger.info("============ End Now =================\n");
}
catch (Exception e)
{
logger.error("Exception occured during update index for " + specifyIndexName + "/"
+ entityPK.toString(), e);
continue;
}
}
searchSession.flushToIndexes();
searchSession.clear();
if (isNewTransaction)
{
commit(entityManager, searchSession, transactionManager);
transactionManager = null;
}
}
catch (Exception e)
{
throw new SyncIndexException("", e);
}
finally
{
if (transactionManager != null)
{
try
{
transactionManager.rollback();
}
catch (Exception e)
{
;
}
}
IndexDirectoryThreadLocal.remove();
}
}
Note: This below Hibernate EntityManagerFactory configuration used for unit test
<bean id="entityManagerFactory" class="org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean">
<property name="packagesToScan" value="com.comp" />
<property name="persistenceProviderClass" value="com.comp.orm.hibernate3.compHibernatePersistence" />
<property name="dataSource" ref="dataSource" />
<property name="persistenceUnitName" value="AAPU"/>
<property name="jpaPropertyMap">
<map>
<entry key="hibernate.show_sql"
value="false"/>
<entry key="hibernate.format_sql"
value="true"/>
<entry key="use_sql_comments"
value="true"/>
<entry key="hibernate.bytecode.use_reflection_optimizer"
value="true"/>
<entry key="hibernate.max_fetch_depth"
value="1"/>
<entry key="hibernate.default_batch_fetch_size"
value="30"/>
<entry key="hibernate.jdbc.fetch_size"
value="30"/>
<entry key="hibernate.jdbc.batch_size"
value="15"/>
<entry key="hibernate.cache.use_second_level_cache"
value="false"/>
<entry key="hibernate.jdbc.use_scrollable_resultset"
value="true"/>
<entry key="hibernate.temp.use_jdbc_metadata_defaults"
value="false"/>
<entry key="hibernate.jdbc.factory_class"
value="org.hibernate.jdbc.BatchingBatcherFactory"/>
<entry key="hibernate.c3p0.min_size"
value="50"/>
<entry key="hibernate.c3p0.max_size"
value="100"/>
<entry key="hibernate.c3p0.timeout"
value="120"/>
<entry key="hibernate.c3p0.max_statements"
value="100"/>
<entry key="hibernate.c3p0.idle_test_period"
value="3000"/>
<entry key="hibernate.search.default.directory_provider"
value="com.comp.aa.globalsearch.directory.IndexDirectoryProvider"/>
<entry key="hibernate.search.default.indexBase"
value="/index"/>
<entry key="hibernate.search.reader.strategy"
value="com.comp.aa.globalsearch.directory.SwitchReaderProvider"/>
<entry key="hibernate.jdbc.factory_class"
value="org.hibernate.jdbc.BatchingBatcherFactory"/>
<entry key="hibernate.transaction.manager_lookup_class"
value="org.hibernate.transaction.JBossTransactionManagerLookup"/>
<entry key="hibernate.current_session_context_class"
value="jta"/>
</map>
</property>
<property name="persistenceUnitPostProcessors">
<list>
<bean class="com.comp.orm.util.JtaPersistenceUnitPostProcessor">
<property name="jtaDataSource" ref="dataSource"/>
</bean>
</list>
</property>
</bean>
In some of the forums, i saw hibernate.c3p0.idle_test_period should be less or equal to hibernate.c3p0.timeout. We changed both the values to 5 minutes, but still the problem exists
Any idea why db sessions are idle?

Auto-retry Failed Jobs In Spring XD

I'm looking for a standard pattern for automatically retrying failed jobs within Spring XD for a configured number of times and after a specified delay. Specifically, I have an HTTP item reader job that is triggered periodically from a cron stream. Occasionally we see the HTTP item reader fail due to network blips so we want the job to automatically try again.
I've tried with a JobExecutionListener which picks up when a job has failed but the tricky bit is actually retrying the failed job. I can do it by triggering a HTTP PUT to the XD admin controller (e.g. http://xd-server:9393/jobs/executions/2?restart=true)
which successfully retries the job. However, I want to be able to:
Specify a delay before retrying
Have some sort of audit within XD to indicate the job will be retried in X seconds.
Adding the delay can be done within the JobExecutionListener but it involves spinning off a thread with a delay which isnt really traceable from the XD container so it's difficult to see if a job is about the be retried or not.
It appears that you need to have a specific job definition that does delayed job retries for you to be able to get any trace of it from the XD container.
Can anyone suggest a pattern for this?
So here's the solution I went for in the end:
Created a job execution listener
public class RestartableBatchJobExecutionListener extends JobExecutionListener {
private Logger logger = LoggerFactory.getLogger(this.getClass());
public final static String JOB_RESTARTER_NAME = "jobRestarter";
/**
* A list of valid exceptions that are permissible to restart the job on
*/
private List<Class<Throwable>> exceptionsToRestartOn = new ArrayList<Class<Throwable>>();
/**
* The maximum number of times the job can be re-launched before failing
*/
private int maxRestartAttempts = 0;
/**
* The amount of time to wait in milliseconds before restarting a job
*/
private long restartDelayMs = 0;
/**
* Map of all the jobs against how many times they have been attempted to restart
*/
private HashMap<Long,Integer> jobInstanceRestartCount = new HashMap<Long,Integer>();
#Autowired(required=false)
#Qualifier("aynchJobLauncher")
JobLauncher aynchJobLauncher;
#Autowired(required=false)
#Qualifier("jobRegistry")
JobLocator jobLocator;
/*
* (non-Javadoc)
* #see org.springframework.batch.core.JobExecutionListener#afterJob(org.springframework.batch.core.JobExecution)
*/
#Override
public void afterJob(JobExecution jobExecution) {
super.afterJob(jobExecution);
// Check if we can restart if the job has failed
if( jobExecution.getExitStatus().equals(ExitStatus.FAILED) )
{
applyRetryPolicy(jobExecution);
}
}
/**
* Executes the restart policy if one has been specified
*/
private void applyRetryPolicy(JobExecution jobExecution)
{
String jobName = jobExecution.getJobInstance().getJobName();
Long instanceId = jobExecution.getJobInstance().getInstanceId();
if( exceptionsToRestartOn.size() > 0 && maxRestartAttempts > 0 )
{
// Check if the job has failed for a restartable exception
List<Throwable> failedOnExceptions = jobExecution.getAllFailureExceptions();
for( Throwable reason : failedOnExceptions )
{
if( exceptionsToRestartOn.contains(reason.getClass()) ||
exceptionsToRestartOn.contains(reason.getCause().getClass()) )
{
// Get our restart count for this job instance
Integer restartCount = jobInstanceRestartCount.get(instanceId);
if( restartCount == null )
{
restartCount = 0;
}
// Only restart if we haven't reached our limit
if( ++restartCount < maxRestartAttempts )
{
try
{
reLaunchJob(jobExecution, reason, restartCount);
jobInstanceRestartCount.put(instanceId, restartCount);
}
catch (Exception e)
{
String message = "The following error occurred while attempting to re-run job " + jobName + ":" + e.getMessage();
logger.error(message,e);
throw new RuntimeException( message,e);
}
}
else
{
logger.error("Failed to successfully execute jobInstanceId {} of job {} after reaching the maximum restart limit of {}. Abandoning job",instanceId,jobName,maxRestartAttempts );
try
{
jobExecution.setStatus(BatchStatus.ABANDONED);
}
catch (Exception e)
{
throw new RuntimeException( "The following error occurred while attempting to abandon job " + jobName + ":" + e.getMessage(),e);
}
}
break;
}
}
}
}
/**
* Re-launches the configured job with the current job execution details
* #param jobExecution
* #param reason
* #throws JobParametersInvalidException
* #throws JobInstanceAlreadyCompleteException
* #throws JobRestartException
* #throws JobExecutionAlreadyRunningException
*/
private void reLaunchJob( JobExecution jobExecution, Throwable reason, int restartCount ) throws JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException
{
try
{
Job jobRestarter = jobLocator.getJob(JOB_RESTARTER_NAME);
JobParameters jobParameters =new JobParametersBuilder().
addLong("delay",(long)restartDelayMs).
addLong("jobExecutionId", jobExecution.getId()).
addString("jobName", jobExecution.getJobInstance().getJobName())
.toJobParameters();
logger.info("Re-launching job with name {} due to exception {}. Attempt {} of {}", jobExecution.getJobInstance().getJobName(), reason, restartCount, maxRestartAttempts);
aynchJobLauncher.run(jobRestarter, jobParameters);
}
catch (NoSuchJobException e)
{
throw new RuntimeException("Failed to find the job restarter with name=" + JOB_RESTARTER_NAME + " in container context",e);
}
}
}
Then in the module definition, I add this job listener to the job:
<batch:job id="job">
<batch:listeners>
<batch:listener ref="jobExecutionListener" />
</batch:listeners>
<batch:step id="doReadWriteStuff" >
<batch:tasklet>
<batch:chunk reader="itemReader" writer="itemWriter"
commit-interval="3">
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:job>
<!-- Specific job execution listener that attempts to restart failed jobs -->
<bean id="jobExecutionListener"
class="com.mycorp.RestartableBatchJobExecutionListener">
<property name="maxRestartAttempts" value="3"></property>
<property name="restartDelayMs" value="60000"></property>
<property name="exceptionsToRestartOn">
<list>
<value>com.mycorp.ExceptionIWantToRestartOn</value>
</list>
</property>
</bean>
<!--
Specific job launcher that restarts jobs in a separate thread. This is important as the delayedRestartJob
fails on the HTTP call otherwise!
-->
<bean id="executor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
<property name="maxPoolSize" value="10"></property>
</bean>
<bean id="aynchJobLauncher"
class="com.mycorp.AsyncJobLauncher">
<property name="jobRepository" ref="jobRepository" />
<property name="taskExecutor" ref="executor" />
</bean>
AysncJobLauncher:
public class AsyncJobLauncher extends SimpleJobLauncher
{
#Override
#Async
public JobExecution run(final Job job, final JobParameters jobParameters)
throws JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException,
JobParametersInvalidException
{
return super.run(job, jobParameters);
}
}
I then have a separate processor module purely for restarting jobs after a delay (this allows us audit from the spring XD ui or db):
delayedJobRestart.xml:
<batch:job id="delayedRestartJob">
<batch:step id="sleep" next="restartJob">
<batch:tasklet ref="sleepTasklet" />
</batch:step>
<batch:step id="restartJob">
<batch:tasklet ref="jobRestarter" />
</batch:step>
</batch:job>
<bean id="sleepTasklet" class="com.mycorp.SleepTasklet" scope="step">
<property name="delayMs" value="#{jobParameters['delay'] != null ? jobParameters['delay'] : '${delay}'}" />
</bean>
<bean id="jobRestarter" class="com.mycorp.HttpRequestTasklet" init-method="init" scope="step">
<property name="uri" value="http://${xd.admin.ui.host}:${xd.admin.ui.port}/jobs/executions/#{jobParameters['jobExecutionId'] != null ? jobParameters['jobExecutionId'] : '${jobExecutionId}'}?restart=true" />
<property name="method" value="PUT" />
</bean>
delayedJobProperties:
# Job execution ID
options.jobExecutionId.type=Long
options.jobExecutionId.description=The job execution ID of the job to be restarted
# Job execution name
options.jobName.type=String
options.jobName.description=The name of the job to be restarted. This is more for monitoring purposes
# Delay
options.delay.type=Long
options.delay.description=The delay in milliseconds this job will wait until triggering the restart
options.delay.default=10000
and accompanying helper beans:
SleepTasklet:
public class SleepTasklet implements Tasklet
{
private static Logger logger = LoggerFactory.getLogger(SleepTasklet.class);
#Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception
{
logger.debug("Pausing current job for {}ms",delayMs);
Thread.sleep( delayMs );
return RepeatStatus.FINISHED;
}
private long delayMs;
public long getDelayMs()
{
return delayMs;
}
public void setDelayMs(long delayMs)
{
this.delayMs = delayMs;
}
}
HttpRequestTasklet:
public class HttpRequestTasklet implements Tasklet
{
private HttpClient httpClient = null;
private static final Logger LOGGER = LoggerFactory.getLogger(HttpRequestTasklet.class);
private String uri;
private String method;
/**
* Initialise HTTP connection.
* #throws Exception
*/
public void init() throws Exception
{
// Create client
RequestConfig config = RequestConfig.custom()
.setCircularRedirectsAllowed(true)
.setRedirectsEnabled(true)
.setExpectContinueEnabled(true)
.setRelativeRedirectsAllowed(true)
.build();
httpClient = HttpClientBuilder.create()
.setRedirectStrategy(new LaxRedirectStrategy())
.setDefaultRequestConfig(config)
.setMaxConnTotal(1)
.build();
}
#Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception
{
if (LOGGER.isDebugEnabled()) LOGGER.debug("Attempt HTTP {} from '" + uri + "'...",method);
HttpUriRequest request = null;
switch( method.toUpperCase() )
{
case "GET":
request = new HttpGet(uri);
break;
case "POST":
request = new HttpPost(uri);
break;
case "PUT":
request = new HttpPut(uri);
break;
default:
throw new RuntimeException("Http request method " + method + " not supported");
}
HttpResponse response = httpClient.execute(request);
// Check response status and, if valid wrap with InputStreamReader
StatusLine status = response.getStatusLine();
if (status.getStatusCode() != HttpStatus.SC_OK)
{
throw new Exception("Failed to get data from '" + uri + "': " + status.getReasonPhrase());
}
if (LOGGER.isDebugEnabled()) LOGGER.debug("Successfully issued request");
return RepeatStatus.FINISHED;
}
public String getUri()
{
return uri;
}
public void setUri(String uri)
{
this.uri = uri;
}
public String getMethod()
{
return method;
}
public void setMethod(String method)
{
this.method = method;
}
public HttpClient getHttpClient()
{
return httpClient;
}
public void setHttpClient(HttpClient httpClient)
{
this.httpClient = httpClient;
}
}
And finally when all is built and deployed, create your jobs as a pair (note, the restarter should be defined as "jobRestarter"):
job create --name myJob --definition "MyJobModule " --deploy true
job create --name jobRestarter --definition "delayedRestartJob" --deploy true
A little convoluted, but it seems to work.

Quartz threadpool reconfiguration

I'm using Quartz and want to change it's thread pool size via remote JMX call, but unfortunately couldn't find any proper solution. Is it possible to change the configuration of the running job programmatically ?
I used Quartz with spring. In my web.xml I created a spring ContextListener. My app starts the Quartz job and exposes 2 JMX methods to start and stop on demand.
<listener>
<listener-class>za.co.lance.admin.infrastructure.ui.util.MBeanContextListener</listener-class>
</listener>
The MBeanContextListener class like this.
public class MBeanContextListener extends ContextLoaderListener {
private ObjectName objectName;
private static Logger logger = LoggerFactory.getLogger(MBeanContextListener.class);
#Override
public void contextDestroyed(final ServletContextEvent sce) {
super.contextDestroyed(sce);
logger.debug("=============> bean context listener destroy");
final MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
try {
mbeanServer.unregisterMBean(objectName);
logger.info("=============> QuartzJmx unregisterMBean ok");
} catch (final Exception e) {
e.printStackTrace();
}
}
#Override
public void contextInitialized(final ServletContextEvent sce) {
super.contextInitialized(sce);
logger.debug("=============> bean context listener started");
final MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
try {
final QuartzJmx processLatestFailedDocumentsMbean = new QuartzJmx();
Scheduler scheduler = (Scheduler) ContextLoader.getCurrentWebApplicationContext().getBean("runProcessLatestFailedDocumentsScheduler");
processLatestFailedDocumentsMbean.setScheduler(scheduler);
objectName = new ObjectName("za.co.lance.admin.infrastructure.jmx.mbeans:type=QuartzJmxMBean");
mbeanServer.registerMBean(processLatestFailedDocumentsMbean, objectName);
logger.info("=============> QuartzJmx registerMBean ok");
} catch (final Exception e) {
e.printStackTrace();
}
}
}
The QuartzJmx class. PLEASE NOTE! Any MBean class (QuartzJmx) must have an interface ending with MBean (QuartzJmxMBean ).
#Component
public class QuartzJmx implements QuartzJmxMBean {
private Scheduler scheduler;
private static Logger LOG = LoggerFactory.getLogger(QuartzJmx.class);
#Override
public synchronized void suspendRunProcessLatestFailedDocumentsJob() {
LOG.info("Suspending RunProcessLatestFailedDocumentsJob");
if (scheduler != null) {
try {
if (scheduler.isStarted()) {
scheduler.standby();
LOG.info("RunProcessLatestFailedDocumentsJob suspended");
} else {
LOG.info("RunProcessLatestFailedDocumentsJob already suspended");
throw new SchedulerException("RunProcessLatestFailedDocumentsJob already suspended");
}
} catch (SchedulerException e) {
LOG.error(e.getMessage());
}
} else {
LOG.error("Cannot suspend RunProcessLatestFailedDocumentsJob. Scheduler = null");
throw new IllegalArgumentException("Cannot suspend RunProcessLatestFailedDocumentsJob. Scheduler = null");
}
}
#Override
public synchronized void startRunProcessLatestFailedDocumentsJob() {
LOG.info("Starting RunProcessLatestFailedDocumentsJob");
if (scheduler != null) {
try {
if (scheduler.isInStandbyMode()) {
scheduler.start();
LOG.info("RunProcessLatestFailedDocumentsJob started");
} else {
LOG.info("RunProcessLatestFailedDocumentsJob already started");
throw new SchedulerException("scheduler already started");
}
} catch (SchedulerException e) {
LOG.error(e.getMessage());
}
} else {
LOG.error("Cannot start RunProcessLatestFailedDocumentsJob. Scheduler = null");
throw new IllegalArgumentException("Cannot start RunProcessLatestFailedDocumentsJob. Scheduler = null");
}
}
#Override
public void setScheduler(Scheduler scheduler) {
this.scheduler = scheduler;
}
And last, the Spring context
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
<bean id="runProcessLatestFailedDocumentsTask"
class="za.co.lance.admin.infrastructure.service.vbs.process.ProcessDocumentServiceImpl" />
<!-- Spring Quartz -->
<bean name="runProcessLatestFailedDocumentsJob" class="org.springframework.scheduling.quartz.JobDetailBean">
<property name="jobClass"
value="za.co.lance.admin.infrastructure.service.quartz.RunProcessLatestFailedDocuments" />
<property name="jobDataAsMap">
<map>
<entry key="processDocumentService" value-ref="runProcessLatestFailedDocumentsTask" />
</map>
</property>
</bean>
<!-- Cron Trigger -->
<bean id="processLatestFailedDocumentsTrigger" class="org.springframework.scheduling.quartz.CronTriggerBean">
<property name="jobDetail" ref="runProcessLatestFailedDocumentsJob" />
<!-- Cron-Expressions (seperated with a space) fields are -->
<!-- Seconds Minutes Hours Day-of-Month Month Day-of-Week Year(optional) -->
<!-- Run every morning hour from 9am to 6pm from Monday to Saturday -->
<property name="cronExpression" value="0 0 9-18 ? * MON-SAT" />
</bean>
<!-- Scheduler -->
<bean id="runProcessLatestFailedDocumentsScheduler"
class="org.springframework.scheduling.quartz.SchedulerFactoryBean">
<property name="jobDetails">
<list>
<ref bean="runProcessLatestFailedDocumentsJob" />
</list>
</property>
<property name="triggers">
<list>
<ref bean="processLatestFailedDocumentsTrigger" />
</list>
</property>
</bean>
</beans>

in the hornetq, the consumer is automatically invoked?

I looked over all of examples in the hornetq, but I couldn't find the example that the consumer is automactically invoked whenever the message comess through the producer.
Please let me know about the example code or hint.
thanks in advance.
Use DefaultMessageListenerContainer. You can register a listener to it and consume messages asynchronously. Follow this link for more information about tuning MessageListenerContainer: http://bsnyderblog.blogspot.se/2010/05/tuning-jms-message-consumption-in.html.
Hornetq dependecies you need (I used a standalone hornetq-2.3.0.CR2) (You also need some spring jars):
<dependencies>
<!-- hornetq -->
<dependency>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
<version>3.2.7.Final</version>
</dependency>
<dependency>
<groupId>org.hornetq</groupId>
<artifactId>hornetq-jms-client</artifactId>
<version>2.3.0.CR2</version>
</dependency>
<dependency>
<groupId>org.hornetq</groupId>
<artifactId>hornetq-core-client</artifactId>
<version>2.3.0.CR2</version>
</dependency>
<!-- hornetq -->
</dependencies>
The beans you should use in your applicationContext.xml (I didn't use jndi for getting ConnectionFactory and destinations; For this, you can follow this question):
<!-- It's ConnectionFactory to connect to hornetq. 5445 is hornetq acceptor port -->
<bean name="connectionFactory" class="messaging.jms.CustomHornetQJMSConnectionFactory">
<constructor-arg index="0" name="ha" value="false" />
<constructor-arg index="1" name="commaSepratedServerUrls" value="127.0.0.1:5445" />
</bean>
<bean id="destinationParent" class="messaging.jms.JmsDestinationFactoryBean" abstract="true">
<property name="pubSubDomain" value="false" /> <!-- default is queue -->
</bean>
<bean id="exampleDestination" parent="destinationParent">
<property name="destinationName" value="example" /> <!-- queue name -->
</bean>
<!-- MessageListener -->
<bean id="messageHandler" class="messaging.consumer.MessageHandler">
</bean>
<!-- MessageListenerContainer -->
<bean id="paymentListenerContainer" class="org.springframework.jms.listener.DefaultMessageListenerContainer">
<property name="destination" ref="exampleDestination" />
<property name="messageListener" ref="messageHandler" />
<property name="connectionFactory" ref="connectionFactory" />
<property name="sessionTransacted" value="true" />
<property name="concurrentConsumers" value="1" />
<property name="maxConcurrentConsumers" value="10" />
<property name="idleConsumerLimit" value="2" />
<property name="idleTaskExecutionLimit" value="5" />
<property name="receiveTimeout" value="3000" />
</bean>
CustomHornetQJMSConnectionFactory:
public class CustomHornetQJMSConnectionFactory extends org.hornetq.jms.client.HornetQJMSConnectionFactory
{
private static final long serialVersionUID = 1L;
public CustomHornetQJMSConnectionFactory(boolean ha, String commaSepratedServerUrls)
{
super(ha, converToTransportConfigurations(commaSepratedServerUrls));
}
public static TransportConfiguration[] converToTransportConfigurations(String commaSepratedServerUrls)
{
String [] serverUrls = commaSepratedServerUrls.split(",");
TransportConfiguration[] transportconfigurations = new TransportConfiguration[serverUrls.length];
for(int i = 0; i < serverUrls.length; i++)
{
String[] urlParts = serverUrls[i].split(":");
HashMap<String, Object> map = new HashMap<String,Object>();
map.put(TransportConstants.HOST_PROP_NAME, urlParts[0]);
map.put(TransportConstants.PORT_PROP_NAME, urlParts[1]);
transportconfigurations[i] = new TransportConfiguration(NettyConnectorFactory.class.getName(), map);
}
return transportconfigurations;
}
}
JmsDestinationFactoryBean (Used in destinationParent bean):
public class JmsDestinationFactoryBean implements FactoryBean<Destination>
{
private String destinationName;
private boolean pubSubDomain = false;
public void setDestinationName(String destinationName) {
this.destinationName = destinationName;
}
public void setPubSubDomain(boolean pubSubDomain) {
this.pubSubDomain = pubSubDomain;
}
#Override
public Class<?> getObjectType()
{
return Destination.class;
}
#Override
public boolean isSingleton()
{
return true;
}
#Override
public Destination getObject() throws Exception
{
if(pubSubDomain)
{
return HornetQJMSClient.createTopic(destinationName);
}
else
{
return HornetQJMSClient.createQueue(destinationName);
}
}
}
MessageHandler (Received messages go to onMessage method for process) (For simplicity, You can implement javax.jms.MessageListener instead of SessionAwareMessageListener):
public class MessageHandler implements org.springframework.jms.listener.SessionAwareMessageListener<Message>
{
#Override
public void onMessage(Message msg, Session session) throws JMSException
{
if(msg instanceof TextMessage)
{
System.out.println(((TextMessage)msg).getText());
session.commit();
}
else
{
session.rollback(); // send message back to the queue
}
}