How to use sequence in multi-threaded enviroment - orientdb

I try to create multiple vertexes in parallel:
public static void main(String[] args) throws InterruptedException {
//create db and seq
ODatabaseDocumentTx db = new ODatabaseDocumentTx("memory:/TestDB");
db.create();
OSequenceLibrary seqLib = db.getMetadata().getSequenceLibrary();
seqLib.createSequence("testSeq",
OSequence.SEQUENCE_TYPE.ORDERED,
new OSequence.CreateParams().setStart(0L).setIncrement(1)
);
OrientGraphFactory factory = new OrientGraphFactory("memory:/TestDB", "admin", "admin").setupPool(1, 8);
//mt
Executor executor = Executors.newFixedThreadPool(8);
CountDownLatch latch = new CountDownLatch(1000);
for (int i = 1; i <= 1000; i++) {
executor.execute(() -> {
OrientGraph g = factory.getTx();
try {
OSequence seq = g.getRawGraph().getMetadata().getSequenceLibrary().getSequence("testSeq");
OrientVertex v = g.addVertex("TestClass");
v.setProperty("seq", seq.next());
latch.countDown();
} finally {
g.shutdown();
}
});
}
latch.await(5, TimeUnit.SECONDS);
System.exit(0);
}
And receive lots of exceptions:
com.orientechnologies.orient.core.exception.OConcurrentModificationException:
Cannot UPDATE the record #7:0 because the version is not the latest.
Probably you are updating an old record or it has been modified by
another user (db=v2 your=v1)
How to use sequence in mt environment properly?

OrientDB is entirely based on an optimistic approach with no or few locks. For this reason you should catch the exception and retry. Example:
OrientGraph g = factory.getTx();
try {
for( int retry = 0; retry < 100; ++retry ){
try {
OSequence seq = g.getRawGraph().getMetadata().getSequenceLibrary().getSequence("testSeq");
OrientVertex v = g.addVertex("TestClass");
v.setProperty("seq", seq.next());
latch.countDown();
break;
} catch( ONeedRetryException e ) {
}
}
} finally {
g.shutdown();
}

Related

Curator ServiceCacheListener is triggered three times when a service is added

I am learning zookeeper and trying out the Curator framework for service discoveries. However, I am facing a weird issue that I have difficulties to figure out. The problem is when I tried to register an instance via serviceDiscovery, the cacheChanged event of the serviceCache gets triggered three times. When I removed an instance, it is only triggered once, which is the expected behavior. Please see the code below:
public class DiscoveryExample {
private static String PATH = "/base";
static ServiceDiscovery<InstanceDetails> serviceDiscovery = null;
public static void main(String[] args) throws Exception {
CuratorFramework client = null;
try {
// this is the ip address of my VM
client = CuratorFrameworkFactory.newClient("192.168.149.129:2181", new ExponentialBackoffRetry(1000, 3));
client.start();
JsonInstanceSerializer<InstanceDetails> serializer = new JsonInstanceSerializer<InstanceDetails>(
InstanceDetails.class);
serviceDiscovery = ServiceDiscoveryBuilder.builder(InstanceDetails.class)
.client(client)
.basePath(PATH)
.serializer(serializer)
.build();
serviceDiscovery.start();
ServiceCache<InstanceDetails> serviceCache = serviceDiscovery.serviceCacheBuilder()
.name("product")
.build();
serviceCache.addListener(new ServiceCacheListener() {
#Override
public void stateChanged(CuratorFramework curator, ConnectionState state) {
// TODO Auto-generated method stub
System.out.println("State Changed to " + state.name());
}
// THIS IS THE PART GETS TRIGGERED MULTIPLE TIMES
#Override
public void cacheChanged() {
System.out.println("Cached Changed ");
List<ServiceInstance<InstanceDetails>> list = serviceCache.getInstances();
Iterator<ServiceInstance<InstanceDetails>> it = list.iterator();
while(it.hasNext()) {
System.out.println(it.next().getAddress());
}
}
});
serviceCache.start();
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
System.out.print("> ");
String line = in.readLine();
} finally {
CloseableUtils.closeQuietly(serviceDiscovery);
CloseableUtils.closeQuietly(client);
}
}
}
AND
public class RegisterApplicationServer {
final static String PATH = "/base";
static ServiceDiscovery<InstanceDetails> serviceDiscovery = null;
public static void main(String[] args) throws Exception {
CuratorFramework client = null;
try {
client = CuratorFrameworkFactory.newClient("192.168.149.129:2181", new ExponentialBackoffRetry(1000, 3));
client.start();
JsonInstanceSerializer<InstanceDetails> serializer = new JsonInstanceSerializer<InstanceDetails>(
InstanceDetails.class);
serviceDiscovery = ServiceDiscoveryBuilder.builder(InstanceDetails.class).client(client).basePath(PATH)
.serializer(serializer).build();
serviceDiscovery.start();
// SOME OTHER CODE THAT TAKES CARES OF USER INPUT...
} finally {
CloseableUtils.closeQuietly(serviceDiscovery);
CloseableUtils.closeQuietly(client);
}
}
private static void addInstance(String[] args, CuratorFramework client, String command,
ServiceDiscovery<InstanceDetails> serviceDiscovery) throws Exception {
// simulate a new instance coming up
// in a real application, this would be a separate process
if (args.length < 2) {
System.err.println("syntax error (expected add <name> <description>): " + command);
return;
}
StringBuilder description = new StringBuilder();
for (int i = 1; i < args.length; ++i) {
if (i > 1) {
description.append(' ');
}
description.append(args[i]);
}
String serviceName = args[0];
ApplicationServer server = new ApplicationServer(client, PATH, serviceName, description.toString());
server.start();
serviceDiscovery.registerService(server.getThisInstance());
System.out.println(serviceName + " added");
}
private static void deleteInstance(String[] args, String command, ServiceDiscovery<InstanceDetails> serviceDiscovery) throws Exception {
// in a real application, this would occur due to normal operation, a
// crash, maintenance, etc.
if (args.length != 2) {
System.err.println("syntax error (expected delete <name>): " + command);
return;
}
final String serviceName = args[0];
Collection<ServiceInstance<InstanceDetails>> set = serviceDiscovery.queryForInstances(serviceName);
Iterator<ServiceInstance<InstanceDetails>> it = set.iterator();
while (it.hasNext()) {
ServiceInstance<InstanceDetails> si = it.next();
if (si.getPayload().getDescription().indexOf(args[1]) != -1) {
serviceDiscovery.unregisterService(si);
}
}
System.out.println("Removed an instance of: " + serviceName);
}
}
I appriciate if anyone can please point out where I am doing wrong and maybe can share some good materials/examples so I can refer to. The official website and the examples on github does not help a lot.

Why is Apache Storm KafkaSpout emitting so many items from Kafka topic?

I'm having issues with Kafka and Storm. I'm not sure at this point if it's a problem with the KafkaSpout config I am setting up, or if I am not ACKing properly or what.
I en-queued 50 items onto my Kafka Topic, but my spout has emitted over 1300 (and counting) tuples. Also, the Spout reports that almost all have "failed." The topology is actually not failing, it's writing to a database successfully, but I just don't know why it is apparently replaying everything so much (if that's what it's doing)
The big question is:
Why is it emitting so many tuples when I only passed 50 to Kafka?
Here is how I am setting up the topology and the KafkaSpout
public static void main(String[] args) {
try {
String databaseServerIP = "";
String kafkaZookeepers = "";
String kafkaTopicName = "";
int numWorkers = 1;
int numAckers = 1;
int numSpouts = 1;
int numBolts = 1;
int messageTimeOut = 10;
String topologyName = "";
if (args == null || args[0].isEmpty()) {
System.out.println("Args cannot be null or empty. Exiting");
return;
} else {
if (args.length == 8) {
for (String arg : args) {
if (arg == null) {
System.out.println("Parameters cannot be null. Exiting");
return;
}
}
databaseServerIP = args[0];
kafkaZookeepers = args[1];
kafkaTopicName = args[2];
numWorkers = Integer.valueOf(args[3]);
numAckers = Integer.valueOf(args[4]);
numSpouts = Integer.valueOf(args[5]);
numBolts = Integer.valueOf(args[6]);
topologyName = args[7];
} else {
System.out.println("Bad parameters: found " + args.length + ", required = 8");
return;
}
}
Config conf = new Config();
conf.setNumWorkers(numWorkers);
conf.setNumAckers(numAckers);
conf.setMessageTimeoutSecs(messageTimeOut);
conf.put("databaseServerIP", databaseServerIP);
conf.put("kafkaZookeepers", kafkaZookeepers);
conf.put("kafkaTopicName", kafkaTopicName);
/**
* Now would put kafkaSpout instance below instead of TemplateSpout()
*/
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(topologyName + "-flatItems-from-kafka-spout", getKafkaSpout(kafkaZookeepers, kafkaTopicName), numSpouts);
builder.setBolt(topologyName + "-flatItem-Writer-Bolt", new ItemWriterBolt(), numBolts).shuffleGrouping(topologyName + "-flatItems-from-kafka-spout");
StormTopology topology = builder.createTopology();
StormSubmitter.submitTopology(topologyName, conf, topology);
} catch (Exception e) {
System.out.println("There was a problem starting the topology. Check parameters.");
e.printStackTrace();
}
}
private static KafkaSpout getKafkaSpout(String zkHosts, String topic) throws Exception {
//String topic = "FLAT-ITEMS";
String zkNode = "/" + topic + "-subscriber-pipeline";
String zkSpoutId = topic + "subscriberpipeline";
KafkaTopicInZkCreator.createTopic(topic, zkHosts);
SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkHosts), topic, zkNode, zkSpoutId);
spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
// spoutConfig.useStartOffsetTimeIfOffsetOutOfRange = true;
//spoutConfig.startOffsetTime = System.currentTimeMillis();
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
return new KafkaSpout(spoutConfig);
}
and here is the creation of the topic in case that matters
public static void createTopic(String topicName, String zookeeperHosts) throws Exception {
ZkClient zkClient = null;
ZkUtils zkUtils = null;
try {
int sessionTimeOutInMs = 15 * 1000; // 15 secs
int connectionTimeOutInMs = 10 * 1000; // 10 secs
zkClient = new ZkClient(zookeeperHosts, sessionTimeOutInMs, connectionTimeOutInMs, ZKStringSerializer$.MODULE$);
zkUtils = new ZkUtils(zkClient, new ZkConnection(zookeeperHosts), false);
int noOfPartitions = 1;
int noOfReplication = 1;
Properties topicConfiguration = new Properties();
boolean topicExists = AdminUtils.topicExists(zkUtils, topicName);
if (!topicExists) {
AdminUtils.createTopic(zkUtils, topicName, noOfPartitions, noOfReplication, topicConfiguration, RackAwareMode.Disabled$.MODULE$);
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (zkClient != null) {
zkClient.close();
}
}
}
you need to see if messages in the bolt failed.
If they all failed too, you probably didn't ack the message in the bolt, or there is exception in the bolt code.
If bolt messages acked, it's more likely a timeout. Increasing the topology timeout config or the paralisim should fix the problem.

Query in OrientDB

I try to print a query through the java console but nothing comes out. this is my code someone could help me.
I'm new to OrientDB and I'm just learning.
The query I need is to know the shortest path between two nodes and print this query on the Java console. It does not give me any errors but nothing comes out.
public class Graph {
private static final String DB_PATH = "C:/OrientDataBase/shortest_path";
static OrientGraphNoTx DBGraph;
static OrientGraphFactory factory;
public static void main(String[] args) {
factory = new OrientGraphFactory("plocal:"+DB_PATH);
DBGraph = factory.getNoTx();
HashMap<String, Vertex> nodes = new HashMap<String, Vertex>();
for(int i = 0; i <= 1000; i++)
{
Vertex v = DBGraph.addVertex("class:V");
v.setProperty("vertexID", i+"");
nodes.put(i+"", v);
}
try(BufferedReader br = new BufferedReader(new FileReader("C:/OrientDataBase/sp1.csv"))) {
int i=0;
for(String line; (line = br.readLine()) !=null ; ) {
if(i==0){
i++;
}
else{
String[] vertices = line.split(",");
String vertex1 = vertices[0];
String vertex2 = vertices[1];
String weight= vertices[2];
vertex2 = vertex2.replaceAll(" ", "");
Vertex v1 = nodes.get(vertex1);
Vertex v2 = nodes.get(vertex2);
Edge eLives = DBGraph.addEdge(null, v1, v2, "belongs");
eLives.setProperty("weight", weight);
System.out.println(v1+","+v2+","+weight);
String query = "select expand(shortestPath) from (select shortestPath(#10:0,#10:2,BOTH))";
Iterable<OrientVertex> res = DBGraph.command(new OCommandSQL(query)).execute();
while(res.iterator().hasNext()){
OrientVertex v = res.iterator().next();
System.out.println("rid: "+v.getId().toString()+"\tn:"+v.getProperty("n"));
}
}
}
}
catch (IOException e) {
e.printStackTrace();
}
}
}
I tried your code and you have to put the ticks when you do the query so, it becomes:
String query = "select expand(shortestPath) from (select shortestPath(#10:0,#10:2,'BOTH'))";
I used this csv file.
Hope it helps.
Regards

Implementing resource queue in rx

I have a hot observable Observable<Resource> resources that represents consumable resources and I want to queue up consumers Action1<Resource> for these resources. A Resource can be used by at most 1 consumer. It should not be used at all once a new value is pushed from resources. If my consumers were also wrapped in a hot observable then the marble-diagram of what I'm after would be
--A--B--C--D--E--
----1----2--34---
----A----C--D-E--
----1----2--3-4--
I've managed a naive implementation using a PublishSubject and zip but this only works if each resource is consumed before a new resource is published (i.e. instead of the required sequence [A1, C2, D3, E4] this implementation will actually produce [A1, B2, C3, D4]).
This is my first attempt at using rx and I've had a play around with both delay and join but can't quite seem to get what I'm after. I've also read that ideally Subjects should be avoided, but I can't see how else I would implement this.
public class ResourceQueue<Resource> {
private final PublishSubject<Action1<Resource>> consumers = PublishSubject.create();
public ResourceQueue(Observable<Resource> resources) {
resources.zipWith(this.consumers, new Func2<Resource, Action1<Resource>, Object>() {
#Override
public Object call(Resource resource, Action1<Resource> consumer) {
consumer.execute(resource);
return null;
}
}).publish().connect();
}
public void queue(final Action1<Resource> consumer) {
consumers.onNext(consumer);
}
}
Is there a way to achieve what I'm after? Is there a more 'rx-y' approach to the solution?
EDIT: changed withLatesFrom suggestion with combineLatest.
The only solution I can think of is to use combineLatest to get all the possible combinations, and manually exclude the ones that you do not need:
final ExecutorService executorService = Executors.newCachedThreadPool();
final Observable<String> resources = Observable.create(s -> {
Runnable r = new Runnable() {
#Override
public void run() {
final List<Integer> sleepTimes = Arrays.asList(200, 200, 200, 200, 200);
for (int i = 0; i < sleepTimes.size(); i++) {
try {
Thread.sleep(sleepTimes.get(i));
} catch (Exception e) {
e.printStackTrace();
}
String valueOf = String.valueOf((char) (i + 97));
System.out.println("new resource " + valueOf);
s.onNext(valueOf);
}
s.onCompleted();
}
};
executorService.submit(r);
});
final Observable<Integer> consumers = Observable.create(s -> {
Runnable r = new Runnable() {
#Override
public void run() {
final List<Integer> sleepTimes = Arrays.asList(300, 400, 200, 0);
for (int i = 0; i < sleepTimes.size(); i++) {
try {
Thread.sleep(sleepTimes.get(i));
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("new consumer " + (i + 1));
s.onNext(i + 1);
}
s.onCompleted();
};
};
executorService.submit(r);
});
final LatestValues latestValues = new LatestValues();
final Observable<String> combineLatest = Observable.combineLatest(consumers, resources, (c, r) -> {
if (latestValues.alreadyProcessedAnyOf(c, r)) {
return "";
}
System.out.println("consumer " + c + " will consume resource " + r);
latestValues.updateWithValues(c, r);
return c + "_" + r;
});
combineLatest.subscribe();
executorService.shutdown();
executorService.awaitTermination(10, TimeUnit.SECONDS);
The class holding the latest consumers and resources.
static class LatestValues {
Integer latestConsumer = Integer.MAX_VALUE;
String latestResource = "";
public boolean alreadyProcessedAnyOf(Integer c, String r) {
return latestConsumer.equals(c) || latestResource.equals(r);
}
public void updateWithValues(Integer c, String r) {
latestConsumer = c;
latestResource = r;
}
}

How to implement boolean retrieval using hitcollector in below scenario

I am running my code on TREC documents and right now implementing scoring scheme to get number of relevant documents. However now i want to implement boolean retrieval, I am trying to use HitCollector.
below is my code..
public class BatchSearch {
private BatchSearch() {}
/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
String usage =
"Usage:\tjava BatchSearch [-index dir] [-simfn similarity] [-field f] [-queries file]";
if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
System.out.println(usage);
System.out.println("Supported similarity functions:\ndefault: DefaultSimilary (tfidf)\n");
System.exit(0);
}
String index = "index";
String field = "contents";
String queries = null;
String simstring = "default";
for(int i = 0;i < args.length;i++) {
if ("-index".equals(args[i])) {
index = args[i+1];
i++;
} else if ("-field".equals(args[i])) {
field = args[i+1];
i++;
} else if ("-queries".equals(args[i])) {
queries = args[i+1];
i++;
} else if ("-simfn".equals(args[i])) {
simstring = args[i+1];
i++;
}
}
Similarity simfn = null;
if ("default".equals(simstring)) {
simfn = new DefaultSimilarity();
} else if ("bm25".equals(simstring)) {
simfn = new BM25Similarity();
} else if ("dfr".equals(simstring)) {
simfn = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2());
} else if ("lm".equals(simstring)) {
simfn = new LMDirichletSimilarity();
}
if (simfn == null) {
System.out.println(usage);
System.out.println("Supported similarity functions:\ndefault: DefaultSimilary (tfidf)");
System.out.println("bm25: BM25Similarity (standard parameters)");
System.out.println("dfr: Divergence from Randomness model (PL2 variant)");
System.out.println("lm: Language model, Dirichlet smoothing");
System.exit(0);
}
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(simfn);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
BufferedReader in = null;
if (queries != null) {
in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
} else {
in = new BufferedReader(new InputStreamReader(new FileInputStream("queries"), "UTF-8"));
}
QueryParser parser = new QueryParser(Version.LUCENE_41, field, analyzer);
while (true) {
String line = in.readLine();
if (line == null || line.length() == -1) {
break;
}
line = line.trim();
if (line.length() == 0) {
break;
}
String[] pair = line.split(" ", 2);
Query query = parser.parse(pair[1]);
doBatchSearch(in, searcher, pair[0], query, simstring);
}
reader.close();
}
/**
* This function performs a top-1000 search for the query as a basic TREC run.
*/
public static void doBatchSearch(BufferedReader in, IndexSearcher searcher, String qid, Query query, String runtag)
throws IOException {
// Collect enough docs to show 5 pages
TopDocs results = searcher.search(query, 1000);
ScoreDoc[] hits = results.scoreDocs;
HashMap<String, String> seen = new HashMap<String, String>(1000);
int numTotalHits = results.totalHits;
int start = 0;
int end = Math.min(numTotalHits, 1000);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String docno = doc.get("docno");
// There are duplicate document numbers in the FR collection, so only output a given
// docno once.
if (seen.containsKey(docno)) {
continue;
}
seen.put(docno, docno);
System.out.println(qid+" Q0 "+docno+" "+i+" "+hits[i].score+" "+runtag);
}
}
}
The scoring is done in doBatchSearch and now i want to implement HitCollector here.