Java-Spark-Mongo: filter(dataset.col(newTime).$greater(oldTime)) not running on full data set - mongodb

I have written a Java-Spark code with Mongo connector. It is supposed to fetch all those rows from MongoDB where column createdDate is greater than previous run's createdDate (like a max of high-water-mark value for each run which I am storing in Oracle. Initially the high-water-mark value in Oracle is 1900-01-01 00:00:00.000).
This column createdDate is ISODate type in mongoDB.
In my MongoDB data, the max value stored for this column createdDate is 2018-04-11 01:43:20.165.
But the filter in the code is not working as desired, i.e. in first run it is fetching sometimes till 2018-03-30 21:48:59.519, then on second or third run its fetching until the max value (2018-04-11 01:43:20.165).
Ideally it should happen in the first run itself when the initial high-water-mark value is 1900-01.....
Here is the code:
package mongo;
import java.net.URI;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.sql.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.bson.Document;
import static org.apache.spark.sql.functions.*;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import com.mongodb.spark.MongoSpark;
import com.mongodb.spark.rdd.api.java.JavaMongoRDD;
import java.sql.Timestamp;
public final class MongoRead
{
private static Connection con=null;
private static String readHighWaterMark(String table, String oraConn, String oraUser, String oraPswd) throws Exception
{
String highWaterMarkValue = "";
try
{
con=DriverManager.getConnection(oraConn,oraUser,oraPswd);
Statement stmt=con.createStatement();
ResultSet rs=stmt.executeQuery("select * from difa.HIGH_WATER_MARK_TABLE where table_nm='"+table+"'");
while(rs.next()){
highWaterMarkValue = rs.getString(3);
}
}
catch(Exception e){
e.printStackTrace();
con.close();
}
return highWaterMarkValue;
}
private static void setHighWaterMark(String key, String value) throws Exception
{
PreparedStatement pStmt=con.prepareStatement("UPDATE high_water_mark_table SET high_water_mark_VALUE='"+value+"' where table_nm='"+key+"'");
int i=pStmt.executeUpdate();
System.out.println(i+" records updated");
}
public static void main(final String[] args) throws Exception {
if(args.length<8){
System.out.println("Please provide correct inputs");
System.exit(1);
}
String mongoAddress = args[0];
String clusterAddress = args[1];
String oraConn = args[2];
String oraUser = args[3];
String oraPswd = args[4];
String tableNm = args[5];
String highWaterCol = args[6];
String loadType = args[7];
SparkSession spark = SparkSession.builder()
.master("local")
.appName("MongoSparkRecordReader")
.config("spark.mongodb.input.uri", mongoAddress)
.config("spark.mongodb.output.uri", mongoAddress)
.getOrCreate();
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
try{
FileSystem fs = FileSystem.get(new URI(clusterAddress),jsc.hadoopConfiguration());
fs.delete(new Path(clusterAddress),true);
}
catch(Exception e){
e.printStackTrace();
}
/* ********Read data from MongoDB******* */
Dataset<Row> dataset = MongoSpark.load(jsc).toDF();
if(loadType.equalsIgnoreCase("I")){
String highWaterMark = readHighWaterMark(tableNm,oraConn,oraUser,oraPswd);
System.out.println("============HIGH_WATER_MARK_VALUE: "+highWaterMark);
Timestamp oldTime = Timestamp.valueOf(highWaterMark.replace("T"," ").replace("Z", ""));
//Fetches records that where createdDate is greater than previous high Water Mark.
Dataset<Row> filtered = dataset.filter(dataset.col(highWaterCol).$greater(oldTime)).persist();
filtered.toJSON().write().text(clusterAddress);
//Calculating the MAX(createdDate) in the fetched dataset.
Dataset<Row> maxHighWaterRow = filtered.agg(max(filtered.col(highWaterCol)).alias("newHighWater")).persist();
List<Timestamp> newHighWaterValue = maxHighWaterRow.select("newHighWater").as(Encoders.TIMESTAMP()).collectAsList();
Timestamp maxHighWaterMarkValue = newHighWaterValue.iterator().next();
SimpleDateFormat dtFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
Timestamp oldDate = Timestamp.valueOf(highWaterMark.replace('T', ' ').replace("Z",""));
//Setting HIGH_WATER_MARK_VALUE if a greater value is detected.
if(maxHighWaterMarkValue !=null && maxHighWaterMarkValue.after(oldDate)){
setHighWaterMark(tableNm,dtFormat.format(maxHighWaterMarkValue).replace(" ", "T").concat("Z"));
}
}
else{
dataset.toJSON().write().text(clusterAddress);
}
con.close();
jsc.close();
}
}
Any idea why the filter and $greater is not fetching the records correctly ?

I fixed this by adding .persist() for the Dataset:
/* ********Read data from MongoDB******* */
Dataset<Row> dataset = MongoSpark.load(jsc).toDF().persist();
....
..
...
Dataset<Row> filtered = dataset.filter(dataset.col(highWaterCol).$greater(old)).persist();
I don't know why without persist() the filter was not running on the whole dataset.

Related

Adding file to GitHub using java client - org.eclipse.egit.github.core

I am trying to add a file to a repository using the below code but I am getting below error. I just want to add a file for now.
org.eclipse.egit.github.core.client.RequestException: Invalid request.
For 'properties/email', nil is not a string. For 'properties/name',
nil is not a string. For 'properties/email', nil is not a string. For
'properties/name', nil is not a string. (422) at
org.eclipse.egit.github.core.client.GitHubClient.createException(GitHubClient.java:552)
at
org.eclipse.egit.github.core.client.GitHubClient.sendJson(GitHubClient.java:643)
at
org.eclipse.egit.github.core.client.GitHubClient.post(GitHubClient.java:757)
at
org.eclipse.egit.github.core.service.DataService.createCommit(DataService.java:397)
I sense that it is expecting some properties but how to supply this is not clear. What is that i am missing?
Referring to https://gist.github.com/Detelca/2337731
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import org.eclipse.egit.github.core.Blob;
import org.eclipse.egit.github.core.Commit;
import org.eclipse.egit.github.core.CommitUser;
import org.eclipse.egit.github.core.Reference;
import org.eclipse.egit.github.core.Repository;
import org.eclipse.egit.github.core.RepositoryCommit;
import org.eclipse.egit.github.core.Tree;
import org.eclipse.egit.github.core.TreeEntry;
import org.eclipse.egit.github.core.TypedResource;
import org.eclipse.egit.github.core.User;
import org.eclipse.egit.github.core.client.GitHubClient;
import org.eclipse.egit.github.core.service.CommitService;
import org.eclipse.egit.github.core.service.DataService;
import org.eclipse.egit.github.core.service.RepositoryService;
import org.eclipse.egit.github.core.service.UserService;
public class GHWriter {
public static void main(String[] args) {
try {
new GHWriter().writeFile("test_two.txt", "test content");
} catch (IOException e) {
e.printStackTrace();
}
}
//https://gist.github.com/Detelca/2337731
public boolean writeFile(String fileName, String fileContent) throws IOException{
// initialize github client
GitHubClient client = new GitHubClient();
//TextView password = (TextView)findViewById(R.id.textViewPassword);
client.setCredentials("username", "password");
// create needed services
RepositoryService repositoryService = new RepositoryService();
CommitService commitService = new CommitService(client);
DataService dataService = new DataService(client);
// get some sha's from current state in git
Repository repository = repositoryService.getRepository("username", "repositoryName");
String baseCommitSha = repositoryService.getBranches(repository).get(0).getCommit().getSha();
RepositoryCommit baseCommit = commitService.getCommit(repository, baseCommitSha);
String treeSha = baseCommit.getSha();
// create new blob with data
Blob blob = new Blob();
blob.setContent("[\"" + System.currentTimeMillis() + "\"]").setEncoding(Blob.ENCODING_UTF8);
String blob_sha = dataService.createBlob(repository, blob);
Tree baseTree = dataService.getTree(repository, treeSha);
// create new tree entry
TreeEntry treeEntry = new TreeEntry();
treeEntry.setPath("testfile.txt");
treeEntry.setMode(TreeEntry.MODE_BLOB);
treeEntry.setType(TreeEntry.TYPE_BLOB);
treeEntry.setSha(blob_sha);
treeEntry.setSize(blob.getContent().length());
Collection<TreeEntry> entries = new ArrayList<TreeEntry>();
entries.add(treeEntry);
Tree newTree = dataService.createTree(repository, entries, baseTree.getSha());
// create commit
Commit commit = new Commit();
commit.setMessage("first commit at " + new Date(System.currentTimeMillis()).toLocaleString());
commit.setTree(newTree);
UserService userService = new UserService( client );
User user = userService.getUser();
CommitUser author = new CommitUser();
author.setName( user.getName() );
Calendar now = Calendar.getInstance();
author.setDate(now.getTime());
commit.setAuthor(author);
commit.setCommitter(author);
List<Commit> listOfCommits = new ArrayList<Commit>();
listOfCommits.add(new Commit().setSha(baseCommitSha));
// listOfCommits.containsAll(base_commit.getParents());
commit.setParents(listOfCommits);
// commit.setSha(base_commit.getSha());
Commit newCommit = dataService.createCommit(repository, commit);
// create resource
TypedResource commitResource = new TypedResource();
commitResource.setSha(newCommit.getSha());
commitResource.setType(TypedResource.TYPE_COMMIT);
commitResource.setUrl(newCommit.getUrl());
// get master reference and update it
Reference reference = dataService.getReference(repository, "heads/master");
reference.setObject(commitResource);
dataService.editReference(repository, reference, true);
System.out.println("Committed URL: "+ newCommit.getUrl());
return false;
}
}
Thanks
After some debugging, found that email and name values are coming as null which is the source of the issue.
Adding below two lines will solve the issue:
author.setName( userName );
author.setEmail(email);

Reading data from kafka topic via golden gate

I have a Spring Boot code which reads data from Kafka topic. The code works as expected when data is feed to the topic via Kafka Producer Console. When I try to push data into the kafka topic via Golden Gate, the code doesn't reads the data from the topic, although I can see the golden gate is able to write the data into the kafka topic. Can anyone suggest why this change in behavior?
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.bson.Document;
import org.json.JSONArray;
import org.json.JSONObject;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
public class VideoConsumer implements Runnable {
private ObjectMapper objectMapper;
private KafkaStream<byte[], byte[]> kafkaStream;
private int threadNumber;
public VideoConsumer(KafkaStream<byte[], byte[]> kafkaStream, int threadNumber) {
this.threadNumber = threadNumber;
this.kafkaStream = kafkaStream;
this.objectMapper = new ObjectMapper();
}
#Override
public void run() {
ConsumerIterator<byte[], byte[]> it = kafkaStream.iterator();
while (it.hasNext()) {
byte[] messageData = it.next().message();
try {
//String videoFromMessage = objectMapper.readValue(messageData, String.class);
//byte[] videoFromMessage = it.next().message();
//System.out.print("got message");
String streamData = new String(messageData);
System.out.print("Thread:" + threadNumber + ".Consuming video: " + streamData + "\n");
String changed=streamData.toString();
int pos=changed.lastIndexOf("}}");
String change=changed.substring(0,pos );
change=change.replace("}}", "}},");
String res=change.concat("}}");
String result="[" +res+ "]";
System.out.println(result);
JSONArray json;
json = new JSONArray(result);
Map<String, List<JSONObject>> orderMongo = new HashMap<>();
Map<String, List<JSONObject>> orderItemMongo = new HashMap<>();
MongoClient mongoClient = new MongoClient( "localhost" , 27017 );
MongoDatabase db = mongoClient.getDatabase("Mongotest");
MongoCollection<Document> table = db.getCollection("test1");
Document doc1=new Document();
//Gson gson=new Gson();
BasicDBObject document = new BasicDBObject();
for (int i = 0; i < json.length(); i++) {
JSONObject obj = json.getJSONObject(i);
if(obj.getString("table").equals("TEST.S_ORDER_MONGO1")){
List<JSONObject> list = orderMongo.getOrDefault(obj.getString("table").equals("TEST.S_ORDER_MONGO1"),new ArrayList<>());
list.add(obj);
orderMongo.put(obj.getJSONObject("after").getString("ROW_ID"),list);
}
else if(obj.getString("table").equals("TEST.S_ORDER_ITEM_MONGO1")){
List<JSONObject> nextlist = orderItemMongo.getOrDefault(obj.getString("table").equals("TEST.S_ORDER_ITEM_MONGO1"),new ArrayList<>());
nextlist.add(obj);
orderItemMongo.put(obj.getJSONObject("after").getString("ORDER_ID"),nextlist);
}
}
System.out.println(orderMongo);
System.out.println(orderItemMongo);
// System.out.println(orderItemMongo);
for (Entry<String, List<JSONObject>> entry : orderMongo.entrySet()) {
for(Entry<String, List<JSONObject>> entry1 : orderItemMongo.entrySet()){
if(entry.getKey().equals(entry1.getKey())){
//String gsonString=gson.toJson(entry.getValue());
//System.out.println(gsonString);
List<JSONObject> listnext = entry.getValue();
List <JSONObject> orderlineList=entry1.getValue();
for(JSONObject obj:listnext){
Document doc = new Document("STATUS_CD", obj.getJSONObject("after").getString("STATUS_CD"));
if(obj.getJSONObject("after").isNull("INTEGRATION_ID")==true){
doc.append("INTEGRATION_ID", null);}
doc.append("X_CUST_REF", obj.getJSONObject("after").getString("X_CUST_REF"));
doc.append("REQ_SHIP_DT",obj.getJSONObject("after").getString("REQ_SHIP_DT"));
if(obj.getJSONObject("after").isNull("QUOTE_ID")==true){
doc.append("QUOTE_ID",null);}
doc.append("ACCNT_ID",obj.getJSONObject("after").getString("ACCNT_ID"));
doc.append("ACTIVE_FLG",obj.getJSONObject("after").getString("ACTIVE_FLG"));
doc.append("PROCESS_TIMESTAMP",obj.getJSONObject("after").getString("PROCESS_TIMESTAMP"));
doc.append("CONTACT_ID",obj.getJSONObject("after").getString("CONTACT_ID"));
doc.append("BU_ID", obj.getJSONObject("after").getString("BU_ID"));
doc.append("SHIP_CON_ID",obj.getJSONObject("after").getString("SHIP_CON_ID"));
doc.append("LAST_UPD", obj.getJSONObject("after").getString("LAST_UPD"));
if(obj.getJSONObject("after").isNull("X_CLOSE_DT")==true){
doc.append("X_CLOSE_DT", null);}
doc.append("X_SUB_STAT", obj.getJSONObject("after").getString("X_SUB_STAT"));
doc.append("ORDER_NUM", obj.getJSONObject("after").getString("ORDER_NUM"));
doc.append("SOFT_DELETE", obj.getJSONObject("after").getString("SOFT_DELETE"));
doc.append("ROW_ID", obj.getJSONObject("after").getString("ROW_ID"));
doc.append("LAST_UPD_BY",obj.getJSONObject("after").getString("LAST_UPD_BY"));
doc.append("REV_NUM",obj.getJSONObject("after").getString("REV_NUM"));
doc.append("ORDER_DT", obj.getJSONObject("after").getString("ORDER_DT"));
for(JSONObject object:orderlineList){
if(object.getJSONObject("after").isNull("ASSET_ID")==true){
doc1.append("ASSET_ID", null);}
if(object.getJSONObject("after").isNull("SERV_ACCNT_ID")==true){
doc1.append("SERV_ACCNT_ID", null);}
doc1.append("REQ_SHIP_DT",object.getJSONObject("after").getString("REQ_SHIP_DT"));
if(object.getJSONObject("after").isNull("X_PROD_DESC")==true){
doc1.append("X_PROD_DESC",null);}
if(object.getJSONObject("after").isNull("SHIP_CON_ID")==true){
doc1.append("SHIP_CON_ID",null);}
doc1.append("X_BES_STATUS",object.getJSONObject("after").getString("X_BES_STATUS"));
doc1.append("ROW_ID",object.getJSONObject("after").getString("ROW_ID"));
doc1.append("STATUS_CD",object.getJSONObject("after").getString("STATUS_CD"));
doc1.append("ORDER_ID",object.getJSONObject("after").getString("ORDER_ID"));
if(object.getJSONObject("after").isNull("COMPLETED_DT")==true){
doc1.append("COMPLETED_DT",null);}
doc1.append("LAST_UPD",object.getJSONObject("after").getString("LAST_UPD"));
doc1.append("SOFT_DELETE",object.getJSONObject("after").getString("SOFT_DELETE"));
doc1.append("INTEGRATION_ID",object.getJSONObject("after").getString("INTEGRATION_ID"));
doc1.append("X_CDD",object.getJSONObject("after").getString("X_CDD"));
doc1.append("ACTION_CD",object.getJSONObject("after").getString("ACTION_CD"));
doc1.append("X_ORDER_ITEM_SUBSTATUS",object.getJSONObject("after").getString("X_ORDER_ITEM_SUBSTATUS"));
if(object.getJSONObject("after").isNull("X_APPT_REF")==true){
doc1.append("X_APPT_REF",null);}
if(object.getJSONObject("after").isNull("X_CANCELLED_DT")==true){
doc1.append("X_CANCELLED_DT",null);}
doc1.append("PROD_ID",object.getJSONObject("after").getString("PROD_ID"));
if(object.getJSONObject("after").isNull("SERVICE_NUM")==true){
doc1.append("SERVICE_NUM",null);}
if(object.getJSONObject("after").isNull("MUST_DLVR_BY_DT")==true){
doc1.append("MUST_DLVR_BY_DT",null);}
doc1.append("ROLLUP_FLG",object.getJSONObject("after").getString("ROLLUP_FLG"));
doc1.append("ROOT_ORDER_ITEM_ID",object.getJSONObject("after").getString("ROOT_ORDER_ITEM_ID"));
doc1.append("BILL_ACCNT_ID",object.getJSONObject("after").getString("BILL_ACCNT_ID"));
doc1.append("PROCESS_TIMESTAMP",object.getJSONObject("after").getString("PROCESS_TIMESTAMP"));
doc1.append("QTY_REQ",object.getJSONObject("after").getString("QTY_REQ"));
}
doc.append("ORDERLINE", doc1);
table.insertOne(doc);
}
}
}
}
}
catch (Exception e) {
e.printStackTrace();
}
System.out.println("Shutting down Thread: " + kafkaStream);
}
}
}

Need access to job launcher parameters when creating job

I am invoking my batch service via the command line and supply a few parameters for the job. I need access to these parameters when create the job as I need to look up data from the db for a 'site name' that is supplied as a parameter and dynamically create multiple steps. The issue is in the 'createJob' method. I've hard coded the site id currently but there is an exception for the itemizedReader method:
Error creating bean with name 'scopedTarget.itemizedReader' defined in billing.BillingConfig: Unsatisfied dependency expressed through method 'itemizedReader' parameter 1: No qualifying bean of type [java.lang.String]
Spring configuration
package billing;
import billing.components.AspiviaFieldSetter;
import billing.components.AspiviaPrepStatementSetter;
import billing.components.SummaryProcessor;
import billing.mapper.ItemizedCostingMapper;
import billing.model.BillingItem;
import billing.model.ItemizedCosting;
import billing.tasklet.SummaryOutputTasklet;
import billing.batch.common.AppProps;
import billing.batch.common.SqlConst;
import billing.batch.common.model.ItemizedPartner;
import billing.batch.common.repo.PartnerBillingRepo;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.sql.DataSource;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.core.job.builder.SimpleJobBuilder;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.database.JdbcBatchItemWriter;
import org.springframework.batch.item.database.JdbcCursorItemReader;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.FlatFileItemWriter;
import org.springframework.batch.item.file.mapping.DefaultLineMapper;
import org.springframework.batch.item.file.transform.DelimitedLineAggregator;
import org.springframework.batch.item.file.transform.DelimitedLineTokenizer;
import org.springframework.batch.item.file.transform.FieldExtractor;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.EnableAspectJAutoProxy;
import org.springframework.context.annotation.Profile;
import org.springframework.context.annotation.PropertySource;
import org.springframework.context.support.PropertySourcesPlaceholderConfigurer;
import org.springframework.core.io.FileSystemResource;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.datasource.DataSourceTransactionManager;
#ComponentScan(basePackages = {"billing", "billing.batch.common"})
#Configuration
#EnableBatchProcessing
#EnableAspectJAutoProxy
#PropertySource("classpath:/app.properties")
public class BillingConfig {
private static final Logger LOG = LogManager.getLogger();
#Autowired
private AppProps appProps;
#Autowired
private PartnerBillingRepo billingRepo;
#Bean
#Profile("prod")
public DataSource datasource() {
final HikariConfig cfg = new HikariConfig();
cfg.setJdbcUrl(appProps.getPartnerBillingUrl());
cfg.setUsername(appProps.getPartnerBillingUsername());
cfg.setPassword(appProps.getPartnerBillingPassword());
cfg.addDataSourceProperty("cachePrepStmts", appProps.getCachePrepStatements());
cfg.addDataSourceProperty("prepStmtCacheSize", appProps.getPrepStatementCacheSize());
cfg.addDataSourceProperty("prepStmtCacheSqlLimit", appProps.getPrepStatementCacheSqlLimit());
HikariDataSource ds = new HikariDataSource(cfg);
return ds;
}
#Bean
public JdbcTemplate template(DataSource ds) {
return new JdbcTemplate(ds);
}
#Bean
#StepScope
public FlatFileItemReader billingFileReader(#Value("#{jobParameters['input.file']}") String inputFile) {
DefaultLineMapper lineMapper = new DefaultLineMapper();
lineMapper.setFieldSetMapper(new BillingFieldSetter());
lineMapper.setLineTokenizer(new DelimitedLineTokenizer());
FlatFileItemReader reader = new FlatFileItemReader();
reader.setLineMapper(lineMapper);
reader.setResource(new FileSystemResource(inputFile));
return reader;
}
#Bean
#StepScope
public JdbcBatchItemWriter BillingWriter(DataSource ds, BillingPrepStatementSetter setter) {
JdbcBatchItemWriter writer = new JdbcBatchItemWriter();
writer.setDataSource(ds);
writer.setItemPreparedStatementSetter(setter);
writer.setSql(SqlConst.INSERT_INTO_BILLING);
return writer;
}
#Bean
#StepScope
public BillingPrepStatementSetter prepStatementSetter() {
return new BillingPrepStatementSetter();
}
#Bean
#StepScope
public SummaryProcessor summaryProc() {
return new SummaryProcessor();
}
#Bean
#StepScope
public SummaryOutputTasklet summaryTask() {
return new SummaryOutputTasklet();
}
#Bean
#StepScope
public ItemReader<ItemizedCosting> itemizedReader(#Value("#{jobParameters['site.id']}") Integer siteId, String accountCodes,
#Value("#{jobParameter['start.date']") String startDate, #Value("#{jobParameters['end.date']") String endDate) {
JdbcCursorItemReader reader = new JdbcCursorItemReader();
reader.setDataSource(datasource());
reader.setSql(SqlConst.SELECT_ITEMIZED_BILLING_FOR_ACCOUNT_CODES);
reader.setRowMapper(new ItemizedCostingMapper());
reader.setPreparedStatementSetter((ps) -> {
try {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
ps.setTimestamp(0, new Timestamp(formatter.parse(startDate).getTime()));
ps.setTimestamp(1, new Timestamp(formatter.parse(endDate).getTime()));
} catch (Exception err) {
LOG.error("Unable to parse dates, start: {} end: {}", startDate, endDate);
}
ps.setString(2, accountCodes);
ps.setInt(3, siteId);
});
return reader;
}
#Bean
#StepScope
public ItemWriter<ItemizedCosting> itemizedWriter(#Value("start.date") String startDate,
String partnerName) {
DelimitedLineAggregator lineAgg = new DelimitedLineAggregator();
FieldExtractor<ItemizedCosting> extractor = (f) -> {
Object[] output = new Object[9];
output[0] = f.getExtension();
output[1] = f.getPbxCallTime();
output[2] = f.getDuration();
output[3] = f.getAccountCode();
output[4] = f.getDigits();
output[5] = f.getCost();
output[6] = f.getDestination();
output[7] = f.getCarrier();
output[8] = f.getAttribute();
return output;
};
lineAgg.setFieldExtractor(extractor);
Timestamp start = null;
try {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
start = new Timestamp(formatter.parse(startDate).getTime());
} catch (Exception e) {
LOG.error("Unable to parse date: {}", startDate);
}
FlatFileItemWriter<ItemizedCosting> writer = new FlatFileItemWriter<>();
writer.setEncoding("UTF-8");
writer.setLineAggregator(lineAgg);
writer.setResource(new FileSystemResource(String.format("%s/%2$tY-%2$tm_%s_",
appProps.getItemizedBillingOutputPath(), start, partnerName)));
return writer;
}
#Bean
public Job createJob(JobBuilderFactory jobBuilder, StepBuilderFactory stepBuilders, DataSource ds, FlatFileItemReader reader)
throws Exception {
Step findSiteIdStep = stepBuilders.get("find.site.id").tasklet((contribution, chunkContext) -> {
String siteName
= (String) chunkContext.getStepContext().getJobParameters().get(BillingConst.PARAM_SITE);
Integer siteId = billingRepo.findSiteIdByName(siteName);
chunkContext.getStepContext().getStepExecution().getJobExecution().getExecutionContext().put(
BillingConst.SITE_ID, siteId);
return RepeatStatus.FINISHED;
}).build();
Step processFileStep = stepBuilders.get("process.file").<BillingItem, BillingItem>chunk(appProps.getChunkSize())
.reader(reader)
.processor(summaryProc())
.writer(aspiviaWriter(ds, prepStatementSetter())).build();
Step outputSummary = stepBuilders.get("output.summary").tasklet(summaryTask()).build();
SimpleJobBuilder builder = jobBuilder.get("process.aspivia").incrementer(new RunIdIncrementer())
.start(findSiteIdStep)
.next(processFileStep)
.next(outputSummary);
List<ItemizedPartner> partners = billingRepo.findPartnersForSite("CPT");
Integer siteId = billingRepo.findSiteIdByName("CPT");
Map<String, String> partnerAccCodes = new HashMap<>();
partners.stream().forEach(i -> {
if (!partnerAccCodes.containsKey(i.getPartnerName())) {
partnerAccCodes.put(i.getPartnerName(), "");
}
String accCodes = partnerAccCodes.get(i.getPartnerName());
accCodes += i.getAccountCode().toString() + ", ";
partnerAccCodes.put(i.getPartnerName(), accCodes);
});
partnerAccCodes.forEach((k, v) -> {
Step itemizedReport = stepBuilders.get("itemized." + k).<ItemizedCosting, ItemizedCosting>chunk(appProps.getChunkSize())
.reader(itemizedReader(siteId, v, null, null))
.writer(itemizedWriter(null, k)).build();
builder.next(itemizedReport);
});
return builder.build();
}
#Bean
public static PropertySourcesPlaceholderConfigurer propCfg() {
return new PropertySourcesPlaceholderConfigurer();
}
#Bean
public DataSourceTransactionManager transactionManager(DataSource datasource) {
return new DataSourceTransactionManager(datasource);
}
}
The issue is due to the lifecycle on how spring batch works. If the bean is decorated for the #StepScope the job parameters are only available once it is launched.
final Job loadAspiviaDataJob = context.getBean(Job.class);
final JobLauncher launcher = context.getBean(JobLauncher.class);
JobParametersBuilder paramBuilder = new JobParametersBuilder();
paramBuilder.addString(AspiviaConst.PARAM_INPUT_FILE, inputFile);
paramBuilder.addString(AspiviaConst.PARAM_SITE, site);
paramBuilder.addString(AspiviaConst.PARAM_OUTPUT_FILE_PATH, summaryFile);
JobExecution runStatus = launcher.run(loadAspiviaDataJob, paramBuilder.toJobParameters());
In the above code same we retrieve the Job which is setup via the createJob bean method in my configuration. The job parameters are not available.
What I have done to get access to the values I need is as follows:
Added an extra #PropertySource("classpath:cli-runtime.properties")
The Application.java that launches the spring batch job will save the properties we need to cli-runtime.properties. When the Job is created in the #Configuration class the values will be loaded from the property file and I can create the additional steps in the job I require

how to create workflow model programmatically in aem

I would like to create an aem workflow model programmatically due to the business requirement, and i used below code to implement it, but it throws an exception,this problem has tortured for a week. Could you please give some hints? Thanks in advance.
The code is below!
package com.sample.mms.workflow;
import java.util.Iterator;
import java.util.List;
import javax.jcr.RepositoryException;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.jackrabbit.api.security.user.User;
import org.apache.jackrabbit.api.security.user.UserManager;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.framework.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.sample.mms.service.ConfigurationService;
import com.sample.mms.service.TopicOwnerBizService;
import com.sample.mms.util.WorkflowUtil;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.WorkflowService;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.exec.WorkItem;
import com.day.cq.workflow.exec.WorkflowData;
import com.day.cq.workflow.exec.WorkflowProcess;
import com.day.cq.workflow.metadata.MetaDataMap;
import com.day.cq.workflow.model.WorkflowModel;
import com.day.cq.workflow.model.WorkflowNode;
import com.day.cq.workflow.model.WorkflowTransition;
#Component
#Service
#Properties({
#Property(name = Constants.SERVICE_DESCRIPTION, value = "general topic owner mark and approval each topic page step"),
#Property(name = Constants.SERVICE_VENDOR, value = "Someone"),
#Property(name = "process.label", value = "SAMPLE MMS NL - General Topic Owner Approval Process Step") })
public class TopicOwnerHandleProcessStep implements WorkflowProcess {
protected final Logger logger = LoggerFactory.getLogger(this.getClass());
#Reference
ResourceResolverFactory resourceResolverFactory;
#Reference
private ConfigurationService configurationService;
#Reference
private TopicOwnerBizService topicOwnerBizService;
#Reference
private WorkflowService workflowService;
#Override
public void execute(WorkItem workItem, WorkflowSession workflowSession, MetaDataMap metaDataMap)
throws WorkflowException {
final WorkflowData workflowData = workItem.getWorkflowData();
final String payLoadType = workflowData.getPayloadType();
if(!StringUtils.equals(payLoadType, "JCR_PATH")){
return;
}
final String payLoad = workflowData.getPayload().toString();
String topicOwnerGroup = configurationService.getTopic_owner_participant_group();
ResourceResolver resourceResolver = null;
UserManager userManager = null;
try {
resourceResolver = WorkflowUtil.getResourceResolver(resourceResolverFactory);
userManager = resourceResolver.adaptTo(UserManager.class);
List<User> list = WorkflowUtil.getUsersByGroup(userManager, topicOwnerGroup);
User user = null;
//create a model for each topic owner approval
WorkflowModel wm = workflowSession.createNewModel("sample mms topic owner review each topic page_" + System.currentTimeMillis());
WorkflowData wd = workflowSession.newWorkflowData("JCR_PATH", payLoad);
//get start node
WorkflowNode startNode = wm.getRootNode();
//wm.createNode("start",WorkflowNode.TYPE_START,"");
//get end node
WorkflowNode endNode = wm.getEndNode();
//wm.createNode("end",WorkflowNode.TYPE_END,"");
//create and split node
WorkflowNode andSplitNode = wm.createNode("and split",WorkflowNode.TYPE_AND_SPLIT,null);
//create and join node
WorkflowNode andJoinNode = wm.createNode("and join",WorkflowNode.TYPE_AND_JOIN,"");
wm.validate();
//create transition between start node and split node
wm.createTransition();
//wm.createTransition(startNode,andSplitNode,null);
//create transition between split node and end node
wm.createTransition(andJoinNode,endNode,null);
for(int i=0;i<list.size();i++){
user = list.get(i);
Iterator<Resource> resources = topicOwnerBizService.getResourceByTopicOwner(resourceResolver, payLoad, user.getID());
if(resources.hasNext()){
// ResourceResolver resourceResolver1 = WorkflowUtil.getResourceResolver(resourceResolverFactory);
// Session session = resourceResolver1.adaptTo(Session.class);
// WorkflowSession workflowSession1 = workflowService.getWorkflowSession(session);
// WorkflowModel wm = workflowSession1.getModel(configurationService.getTopicOwnerHandleSubsequentWorkflow());
// WorkflowData wd = workflowSession1.newWorkflowData("JCR_PATH", payLoad);
// wd.getMetaDataMap().put("topicOwnerParticipant", user.getID());
// workflowSession1.startWorkflow(wm, wd);
//create branch node for and split node
WorkflowNode topicOwnerParticipantNode = wm.createNode("topic owner participant",WorkflowNode.TYPE_PARTICIPANT,"");
//{timeoutMillis=0, timeoutHandler=com.sample.mms.workflow.TopicOwnerTimeoutHandler, PARTICIPANT=hunter.liang}
topicOwnerParticipantNode.getMetaDataMap().put("timeoutMillis", 0L);
topicOwnerParticipantNode.getMetaDataMap().put("timeoutHandler", "com.sample.mms.workflow.TopicOwnerTimeoutHandler");
topicOwnerParticipantNode.getMetaDataMap().put("PARTICIPANT", user.getID());
WorkflowNode orSplitNode = wm.createNode("or split",WorkflowNode.TYPE_OR_SPLIT,"");
WorkflowNode orJoinNode = wm.createNode("or join",WorkflowNode.TYPE_OR_JOIN,"");
//{PROCESS_AUTO_ADVANCE=true, PROCESS=com.sample.mms.workflow.TopicOwnerApprovalProcessStep, PROCESS_ARGS=approval}
WorkflowNode topicOwnerApprovalNode = wm.createNode("topic owner approval",WorkflowNode.TYPE_PROCESS,"");
topicOwnerApprovalNode.getMetaDataMap().put("PROCESS_AUTO_ADVANCE", true);
topicOwnerApprovalNode.getMetaDataMap().put("PROCESS", "com.sample.mms.workflow.TopicOwnerApprovalProcessStep");
topicOwnerApprovalNode.getMetaDataMap().put("PROCESS_ARGS", "approval");
//{PROCESS_AUTO_ADVANCE=true, PROCESS=com.sample.mms.workflow.TopicOwnerApprovalProcessStep, PROCESS_ARGS=reject}
WorkflowNode topicOwnerRejectNode = wm.createNode("topic owner reject",WorkflowNode.TYPE_PROCESS,"");
topicOwnerRejectNode.getMetaDataMap().put("PROCESS_AUTO_ADVANCE", true);
topicOwnerRejectNode.getMetaDataMap().put("PROCESS", "com.sample.mms.workflow.TopicOwnerApprovalProcessStep");
topicOwnerRejectNode.getMetaDataMap().put("PROCESS_ARGS", "reject");
WorkflowNode timeoutNode = wm.createNode("time out join",WorkflowNode.TYPE_PROCESS,"");
//wm.createTransition(andSplitNode,orSplitNode,"");
wm.createTransition(orSplitNode,topicOwnerApprovalNode,null);
wm.createTransition(orSplitNode,topicOwnerRejectNode,null);
WorkflowTransition orSplitAndTimeOutTransition = wm.createTransition(orSplitNode,timeoutNode,null);
orSplitAndTimeOutTransition.setRule("function check(){return false;}");
wm.createTransition(topicOwnerApprovalNode,orJoinNode,null);
wm.createTransition(topicOwnerRejectNode,orJoinNode,null);
wm.createTransition(timeoutNode,orJoinNode,null);
wm.createTransition(andSplitNode,orSplitNode,null);
wm.createTransition(orJoinNode,andJoinNode,null);
}
}
workflowSession.startWorkflow(wm, wd);
} catch (LoginException e) {
e.printStackTrace();
} catch (RepositoryException e) {
e.printStackTrace();
}
}
}
The error log is below!
20.04.2016 17:35:24.054 *INFO* [JobHandler: /etc/workflow/instances/2016-04-20/model_27918689599044:/content/samplemms/2016/02/index] com.adobe.granite.workflow.core.WorkflowSessionImpl Workflow model deployed: /etc/workflow/models/sample_mms_topic_owner_175(Version: 1.0)
20.04.2016 17:35:36.015 *ERROR* [JobHandler: /etc/workflow/instances/2016-04-20/model_27918689599044:/content/samplemms/2016/02/index] com.day.cq.workflow.compatibility.CQWorkflowProcessRunner Process execution resulted in an error: null
java.lang.NullPointerException: null
at com.adobe.granite.workflow.core.model.WorkflowModelImpl.createTransition(WorkflowModelImpl.java:155)
at com.adobe.granite.workflow.core.model.WorkflowModelImpl.createTransition(WorkflowModelImpl.java:149)
at com.day.cq.workflow.impl.model.CQWorkflowModelWrapper.createTransition(CQWorkflowModelWrapper.java:145)
at com.sample.mms.workflow.TopicOwnerHandleProcessStep.execute(TopicOwnerHandleProcessStep.java:105)
at com.day.cq.workflow.compatibility.CQWorkflowProcessRunner.execute(CQWorkflowProcessRunner.java:93)
at com.adobe.granite.workflow.core.job.HandlerBase.executeProcess(HandlerBase.java:215)
at com.adobe.granite.workflow.core.job.JobHandler.process(JobHandler.java:140)
at org.apache.sling.event.jobs.JobUtil$1.run(JobUtil.java:365)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
20.04.2016 17:35:36.015 *ERROR* [JobHandler: /etc/workflow/instances/2016-04-20/model_27918689599044:/content/samplemms/2016/02/index] com.adobe.granite.workflow.core.job.JobHandler Process execution resulted in an error
com.adobe.granite.workflow.WorkflowException: Process execution resulted in an error
at com.adobe.granite.workflow.core.job.HandlerBase.executeProcess(HandlerBase.java:225)
at com.adobe.granite.workflow.core.job.JobHandler.process(JobHandler.java:140)
at org.apache.sling.event.jobs.JobUtil$1.run(JobUtil.java:365)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: com.adobe.granite.workflow.WorkflowException: Failed to execute process
at com.day.cq.workflow.compatibility.CQWorkflowProcessRunner.execute(CQWorkflowProcessRunner.java:108)
at com.adobe.granite.workflow.core.job.HandlerBase.executeProcess(HandlerBase.java:215)
... 5 common frames omitted
Caused by: java.lang.NullPointerException: null
at com.adobe.granite.workflow.core.model.WorkflowModelImpl.createTransition(WorkflowModelImpl.java:155)
at com.adobe.granite.workflow.core.model.WorkflowModelImpl.createTransition(WorkflowModelImpl.java:149)
at com.day.cq.workflow.impl.model.CQWorkflowModelWrapper.createTransition(CQWorkflowModelWrapper.java:145)
at com.sample.mms.workflow.TopicOwnerHandleProcessStep.execute(TopicOwnerHandleProcessStep.java:105)
at com.day.cq.workflow.compatibility.CQWorkflowProcessRunner.execute(CQWorkflowProcessRunner.java:93)
... 6 common frames omitted
Instead of com.day.cq.workflow APIs,Can you try using the com.adobe.granite.workflow APIs.

Cassandra / Eclipse - Can't make connection

So I'm using Cassandra in my project and I have to make a connection between Eclipse and the data base. I tried to use a JDBC-compliant driver that I found on code.google.com but I'm getting this exception:
Exception in thread "main" java.lang.StringIndexOutOfBoundsException: String index out of range: -1
This is my code:
package cassandrasampledriver;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.cassandra.cql.jdbc.DriverResolverException;
import org.apache.cassandra.cql.jdbc.InvalidUrlException;
public class CassandraDriver
{
public static void main(String[] args) {
Connection con = null;
String KS = "cassandrademocql";
try
{
Class.forName("org.apache.cassandra.cql.jdbc.CassandraDriver");
con = DriverManager.getConnection("jdbc:cassandra://localhost:9160/" + KS);
Statement stmt = con.createStatement();
String query = "DROP KEYSPACE cassandrademocql;";
ResultSet result = stmt.executeQuery(query);
}
catch (ClassNotFoundException e) {
e.printStackTrace();
}
catch (SQLException e) {
e.printStackTrace();
}
}
}
Thanks in advance :)
I ran your code using this cql driver (Version 1.1.2 compatible with cassandra 1.2.
on JDK6 without an Error. Maybe you are missing references in your classpath? The only problem with the code is that you are assigning a variable to a method call that wont return anything ResultSet result = stmt.executeQuery(query); and so a java.sql.SQLNonTransientException exception will be thrown.
You said you are new to Cassandra, just some friendly advice, I would do some research on the available APIs for Cassandra before making a choice.
These are the JARs I used
apache-cassandra-1.2.0-rc1-SNAPSHOT.jar
apache-cassandra-clientutil-1.2.0-rc1-SNAPSHOT.jar
apache-cassandra-thrift-1.2.0-rc1-SNAPSHOT.jar
cassandra-jdbc-1.1.2.jar libthrift-0.9.0.jar log4j-1.2.16.jar
log4j-over-slf4j-1.7.2.jar selenium-server-standalone-2.21.0.jar
slf4j-api-1.7.2.jar slf4j-simple-1.7.2.jar
Full code that I ran:
package cassandrasampledriver;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class CassandraDriver {
public static void main(String[] args) {
Connection con = null;
String KS = "cassandrademocql";
try
{
Class.forName("org.apache.cassandra.cql.jdbc.CassandraDriver");
con = DriverManager.getConnection("jdbc:cassandra://localhost:9160/" + KS);
Statement stmt = con.createStatement();
String query = "DROP KEYSPACE cassandrademocql;";
// Because you are droping the KS this will not return anything
// So the result set will be null and a java.sql.SQLNonTransientException exception will be thrown every time.
ResultSet result = stmt.executeQuery(query);
}
catch (Exception ex) {
ex.printStackTrace();
}
}
}