Esper loss-less events processing - complex-event-processing

I'm evaluating Esper as a system for loss-less processing of billing data. It is expected that system can handle ~20000 events per second and run ~400 statements with continuos aggregation (without storing events in memory).
In order to get expected performance I've started to send events in multiple threads and found that esper often looses data.
Simple example that shows data loss
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import com.espertech.esper.client.Configuration;
import com.espertech.esper.client.EPAdministrator;
import com.espertech.esper.client.EPRuntime;
import com.espertech.esper.client.EPServiceProvider;
import com.espertech.esper.client.EPServiceProviderManager;
import com.espertech.esper.client.EPStatement;
public class Example {
public static void main(String[] args) throws Exception {
new Example().run();
}
public void run() throws Exception {
Configuration config = new Configuration();
// use default configuration
EPServiceProvider epService = EPServiceProviderManager.getDefaultProvider(config);
EPAdministrator epAdministrator = epService.getEPAdministrator();
// simple schema
epAdministrator.getConfiguration().addEventType(LogLine.class);
// event for terminating context partition
createEPL(epAdministrator, "create schema TerminateEvent() ");
// Start context partition on LogLine event and terminate on TerminateEvent.
createEPL(epAdministrator, "create context InitCtx start LogLine end TerminateEvent");
// select to collect count of events per account_name.
EPStatement statement = createEPL(epAdministrator, "context InitCtx select context.id as partition_id, count(*), sum(bytes) from LogLine output last when terminated");
// register listener to output all newEvents properties values
statement.addListener((newEvents, oldEvents) -> {
String resultEvents = Arrays.stream(newEvents).map((event) -> {
return Arrays.stream(event.getEventType().getPropertyNames()).map((prop) -> {
return prop + "=" + event.get(prop);
}).collect(Collectors.joining(", "));
}).collect(Collectors.joining("]; ["));
System.out.println("=== results: [" + resultEvents + "]");
});
//lets use 4 threads for sending data
ExecutorService myexecutor = Executors.newFixedThreadPool(4);
List<CompletableFuture<Void>> listOfTasks = new ArrayList<>();
//get data to be processed
List<LogLine> list = getData();
for (int i = 1; i <= list.size(); i++) {
//concurrently send each logline
final LogLine logLine = list.get(i - 1);
CompletableFuture<Void> task = CompletableFuture.runAsync(() -> {
epService.getEPRuntime().sendEvent(logLine);
System.out.println("== sending data " + logLine);
}, myexecutor);
listOfTasks.add(task);
if (i % 4 == 0) {
// terminate context partition after every 4 events.
sendTerminateEvent(listOfTasks, epService.getEPRuntime());
}
}
// terminate context partition at the end of the execution.
sendTerminateEvent(listOfTasks, epService.getEPRuntime());
// shutdow all services.
myexecutor.shutdown();
epService.destroy();
}
private void sendTerminateEvent(List<CompletableFuture<Void>> listOfTasks, EPRuntime epRuntime) throws Exception {
// wait for all submitted tasks to finish
CompletableFuture[] array = listOfTasks.toArray(new CompletableFuture[listOfTasks.size()]);
CompletableFuture.allOf(array).get(1, TimeUnit.MINUTES);
listOfTasks.clear();
System.out.println("== sending terminate event.");
// send partition termination event
epRuntime.sendEvent(Collections.emptyMap(), "TerminateEvent");
}
private List<LogLine> getData() {
List<LogLine> dataEventsList = new ArrayList<>();
dataEventsList.add(new LogLine(0, 1));
dataEventsList.add(new LogLine(0, 2));
dataEventsList.add(new LogLine(0, 3));
dataEventsList.add(new LogLine(0, 4));
dataEventsList.add(new LogLine(0, 5));
dataEventsList.add(new LogLine(1, 1));
dataEventsList.add(new LogLine(1, 2));
dataEventsList.add(new LogLine(1, 3));
dataEventsList.add(new LogLine(1, 4));
dataEventsList.add(new LogLine(1, 5));
return dataEventsList;
}
private EPStatement createEPL(EPAdministrator admin, String statement) {
System.out.println("creating EPL: " + statement);
return admin.createEPL(statement);
}
public static class LogLine {
int account_id;
int bytes;
public LogLine(int account_id, int bytes) {
this.account_id = account_id;
this.bytes = bytes;
}
public int getAccount_id() {
return account_id;
}
public int getBytes() {
return bytes;
}
#Override
public String toString() {
return "[account_id=" + account_id + ", bytes=" + bytes + "]";
}
}
}
Execution output:
creating EPL: create schema TerminateEvent()
creating EPL: create context InitCtx start LogLine end TerminateEvent
creating EPL: context InitCtx select context.id as partition_id, count(*), sum(bytes) from LogLine output last when terminated
== data [account_id=0, bytes=3] was send
== data [account_id=0, bytes=1] was send
== data [account_id=0, bytes=4] was send
== data [account_id=0, bytes=2] was send
== sending terminate event.
=== results: [partition_id=0, count(*)=4, sum(bytes)=10]
== data [account_id=1, bytes=2] was send
== data [account_id=1, bytes=3] was send
== data [account_id=0, bytes=5] was send
== data [account_id=1, bytes=1] was send
== sending terminate event.
=== results: [partition_id=1, count(*)=2, sum(bytes)=6]
== data [account_id=1, bytes=5] was send
== data [account_id=1, bytes=4] was send
== sending terminate event.
=== results: [partition_id=2, count(*)=1, sum(bytes)=4]
There are correct results for the first partition, the next 2 partitions output invalid results:
// OK
actual [partition_id=0, count(*)=4, sum(bytes)=10]
expected [partition_id=0, count(*)=4, sum(bytes)=10]
// LOSS
actual [partition_id=1, count(*)=2, sum(bytes)=6]
expected [partition_id=1, count(*)=4, sum(bytes)=11]
// LOSS
actual [partition_id=2, count(*)=1, sum(bytes)=4]
expected [partition_id=2, count(*)=2, sum(bytes)=9]
What's wrong with this example code?
Enabling priority execution order didn't help
creating EPL: create schema TerminateEvent()
creating EPL: #Priority(1) create context InitCtx start LogLine end TerminateEvent
creating EPL: #Priority(0) context InitCtx select context.id as partition_id, count(*), sum(bytes) from LogLine output last when terminated
== data [account_id=0, bytes=3] was send
== data [account_id=0, bytes=4] was send
== data [account_id=0, bytes=1] was send
== data [account_id=0, bytes=2] was send
== sending terminate event.
=== results: [partition_id=0, count(*)=4, sum(bytes)=10]
== data [account_id=1, bytes=2] was send
== data [account_id=1, bytes=3] was send
== data [account_id=0, bytes=5] was send
== data [account_id=1, bytes=1] was send
== sending terminate event.
=== results: [partition_id=1, count(*)=2, sum(bytes)=6]
== data [account_id=1, bytes=5] was send
== data [account_id=1, bytes=4] was send
== sending terminate event.
=== results: [partition_id=2, count(*)=1, sum(bytes)=4]

This question is a more elaborate DUPLICATE of Esper data loss when inbound threading is enabled
In the case that Esper EPL requires ordered execution you must develop your code so it processes events in an ordered fashion. Esper cannot magically enforce some ordering. The JVM can pause any thread at any time. You must design your code properly.
For example, lets assume you have 2 threads. Lets assume that A can be processed in parallel and B must be processed exactly in the order as provided in the example below.
Lets say you have events come in. You want B to process after A1 and A2 but before A3 and A4:
A1 A2 B1 A3 A4
If you just add all A and B events to a queue and thread pool with say 5 threads that means that B can be processed first, in the middle, or last. Every run you can get a different result as the JVM does not enforce an order. Esper cannot enforce an order as your application drives Esper and not the other way around.
What you can do, for example, is add the first set of A events to a queue (A1, A2). When B comes in, wait for the queue to empty. Next add B to the queue. Wait for B to complete. Then add the next set of A events (A3, A4) into the queue. Thus you achieve ordered processing in respect to A and B, with all A events being processed in parallel.
CORRECTION:
I see now that you only have one event type and no A+B. In this case make sure you are running the most recent version. Also make sure that "create context" does not get a lower priority otherwise context partitions get created last. I have run your code around 10 times and did not see invalid output with 7.1.0. I'm on JDK 1.8.0_121 (Oracle).

Related

Kafka: KafkaConsumer not able to pull all records

I'm pretty new at Kafka.
For the purpose of stress testing my cluster, and building operational experience, I created two simple Java applications: one that repeatedly publishes messages to a topic (a sequence of integers) and another application that loads the entire topic (all records) and verifies that the sequence is complete. Expectation is that no messages get lost due to operations on the cluster (restart a node, replacing a node, topics partitions reconfigurations, etc).
The topic "sequence" has two partitions, and replication factor 3. The cluster is made of 3 virtual nodes (its for testing purposes, hence they are running on the same machine). The topic is configured to retain all messages (retention.ms set to -1)
I currently have two issues, that I have difficulties figuring out:
If I use bin/kafka-console-consumer.sh --bootstrap-server kafka-test-server:9090,kafka-test-server:9091,kafka-test-server:9092 --topic sequence --from-beginning I see ALL messages (even though not ordered, as expected) loaded on console. On the other hand, if I use the consumer application that I wrote, I see different results being loaded at each cycle: https://i.stack.imgur.com/tMK10.png - In the console output, the first line after the divisor is a call to records.partitions(), hence records are only sometimes pulled from both partitions. Why and why is the java app not behaving like bin/kafka-console-consumer.sh?
When the topic gets to big, the bin/kafka-console-consumer.sh is still able to show all messages, while the application is able to load only about 18'000 messages. I have tried playing around with
the various consumer-side configurations, with no progress. Again, the question is why is there a difference?
Thank you in advance for any hint!
Here are for ref. the two app discussed:
package ch.demo.toys;
import java.io.FileInputStream;
import java.util.Properties;
import java.util.concurrent.Future;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
public class SequenceProducer {
public static void main(String[] args) throws Exception {
Properties properties = new Properties();
properties.load(new FileInputStream("toy.properties"));
properties.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("acks", "1");
properties.put("retries", "3");
properties.put("compression.type", "snappy");
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 1);
for (Integer sequence_i = 0; true; sequence_i++) {
try(Producer<Integer, String> producer = new KafkaProducer<>(properties)) {
ProducerRecord<Integer, String> record = new ProducerRecord<>("sequence", sequence_i, "Sequence number: " + String.valueOf(sequence_i));
Future<RecordMetadata> sendFuture = producer.send(record, (metadata, exception) -> {
System.out.println("Adding " + record.key() + " to partition " + metadata.partition());
if (exception != null) {
exception.printStackTrace();
}
});
}
Thread.sleep(200);
}
}
}
package ch.demo.toys;
import java.io.FileInputStream;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
public class CarthusianConsumer {
private static Properties getProperties() throws Exception {
Properties properties = new Properties();
properties.load(new FileInputStream("toy.properties"));
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.IntegerDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
properties.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, Integer.MAX_VALUE);
properties.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 60 * 1000);
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "carthusian-consumer");
properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG, 60 * 1000);
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG, 1024 * 1024 * 1024);
return properties;
}
private static boolean checkConsistency(List<Integer> sequence) {
Collections.sort(sequence);
Iterator<Integer> iterator = sequence.iterator();
int control = 0;
while(iterator.hasNext()) {
int value = iterator.next();
if (value != control) {
System.out.println("");
System.out.println("Gap found:");
System.out.println("\tSequence: " + value);
System.out.println("\tControl: " + control);
return false;
}
control++;
}
System.out.print(".");
return true;
}
public static void main(String[] args) throws Exception {
// Step 1: create a base consumer object
Consumer<Integer, String> consumer = new KafkaConsumer<>(getProperties());
// Step 2: load topic configuration and build list of TopicPartitons
List<TopicPartition> topicPartitions = consumer
.partitionsFor("sequence")
.stream()
.parallel()
.map(partitionInfo -> new TopicPartition(partitionInfo.topic(), partitionInfo.partition()))
.collect(Collectors.toList());
while (true) {
List<Integer> sequence = new ArrayList<>();
for (TopicPartition topicPartition : topicPartitions) {
// Step 3. specify the topic-partition to "read" from
// System.out.println("Partition specified: " + topicPartition);
consumer.assign(Arrays.asList(topicPartition));
// Step 4. set offset at the beginning
consumer.seekToBeginning(Arrays.asList(topicPartition));
// Step 5. get all records from topic-partition
ConsumerRecords<Integer, String> records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
// System.out.println("\tCount: " + records.count());
// System.out.println("\tPartitions: " + records.partitions());
records.forEach(record -> { sequence.add(record.key()); });
}
System.out.println(sequence.size());
checkConsistency(sequence);
Thread.sleep(2500);
}
}
}
Thank you Mickael-Maison, here is my answer:
On producer: thanks you for the comment. I admit taking the example from the book and modifying it directly without performances considerations.
On consumer: as mentioned in the comments above, subscription was the first approach attempted which unfortunately yielded the same result described in my question: results from individual partitions, and only rarely from both partitions in the same call. I'd also love to understand the reasons for this apparently random behavior!
More on consumer: I rewind to the beginning of the topic at every cycle, because the purpose is to continuously verify that the sequence did not break (hence no messages lost). At every cycle I load all the messages and check them.
Because the single call based on topic subscription yielded an apparently random behavior (unsure when the full content of the topic is returned); I had to read off from each individual partition and join the lists of records manually before checking them - which is not what I wanted to do initially!
Are my approaches wrong?
There are a few things you should change in your clients logic.
Producer:
You are creating a new producer for each record you're sending. This is terrible in terms of performance as each producer as to first bootstrap before sendign a record. Also as a single record is sent by each producer, no batching happens. Finally compression on a single record is also inexistant.
You should first create a Producer and use it to send all records, ie move the creation out of the loop, something like:
try (Producer<Integer, String> producer = new KafkaProducer<>(properties)) {
for (int sequence_i = 18310; true; sequence_i++) {
ProducerRecord<Integer, String> record = new ProducerRecord<>("sequence", sequence_i, "Sequence number: " + String.valueOf(sequence_i));
producer.send(record, (metadata, exception) -> {
System.out.println("Adding " + record.key() + " to partition " + metadata.partition());
if (exception != null) {
exception.printStackTrace();
}
});
Thread.sleep(200L);
}
}
Consumer:
At every iteration of the for loop, you change the assignment and seek back to the beginning of the partition, so at best you will reconsume the same messages every time!
To begin, you should probably use the subscribe() API (like kafka-console-consumer.sh), so you don't have to fiddle with partitions. For example:
try (Consumer<Integer, String> consumer = new KafkaConsumer<>(properties)) {
consumer.subscribe(Collections.singletonList("topic"));
while (true) {
List<Integer> sequence = new ArrayList<>();
ConsumerRecords<Integer, String> records = consumer.poll(Duration.ofSeconds(1L));
records.forEach(record -> {
sequence.add(record.key());
});
System.out.println(sequence.size());
checkConsistency(sequence);
Thread.sleep(2500L);
}
}

Async request/respone in Proto.Actor?

I’m new to proto.actor/actor programming and I’m wondering is this possible to achieve this behavior:
Actor A is asking actor B via async command – he should await for response to achieve request/response model but using tasks.
Actor B is using HTTP request so it would be some async IO operation so I don’t want it to be blocked for other actors in this time, so when 10 actors will ask him in the same time each request will be queued but while first request is waiting for process second should get a chance to proceed. Once firs request will be finished it should have priority in queue and get response to actor A.
How to get this flow?
For example I have 3 clients that ask service for some data, service call is taking 5 seconds and most of this time service is spending in IO. With current implementation we have 15 second in total for all requests but I would like it to take ~5-6 second
public static class ProtoTest
{
public static PID Service;
public static async Task Start()
{
var context = new RootContext();
var props = Props.FromProducer(() => new ClientActor());
var serviceProps = Props.FromProducer(() => new ServiceActor());
Service = context.Spawn(serviceProps);
var jobs = new List<Task>();
for (int i = 0; i < 3; i++)
{
string actorName = $"Actor_{i}";
jobs.Add(Task.Run(() =>
{
var client = context.SpawnNamed(props, actorName);
context.Send(client, new Command());
}));
}
Console.ReadLine();
}
}
public class ClientActor : IActor
{
public virtual async Task ReceiveAsync(IContext context)
{
if (context.Message is Command)
{
Console.WriteLine($"{DateTime.Now.ToLongTimeString()} START processing by {context.Self.Id}");
var result = await context.RequestAsync<string>(ProtoTest.Service, new Query());
Console.WriteLine($"{DateTime.Now.ToLongTimeString()} End processing by {context.Self.Id}");
}
return;
}
}
public class ServiceActor : IActor
{
public async virtual Task ReceiveAsync(IContext context)
{
if (context.Message is Query)
{
// this operation is taking long time so actor could handle others in this time
await Task.Delay(5000);
context.Respond("result");
}
return;
}
}
One of the core principles of an actor is that it does not perform multiple operations in parallel. If I understand your problem correctly, what you can do instead is to create a new actor for each operation that you want to run in parallel (actors are cheap so creating many is not an issue). So if actor A needs to send N commands to be processed asynchronously and receive each result as they come in, it could spawn N actors, B1,B2...Bn (one for each command) and send a Request to each of them. The B actors await the result and then Respond back to the A actor. Each response would then be sent as a message to actor A's mailbox and be processed sequentially in the order they complete.

How to process all events emitted by RX Java regardless of error?

I'm using vertx.io web framework to send a list of items to a downstream HTTP server.
records.records() emits 4 records and I have specifically set the web client to connect to the wrong I.P/port.
Processing... prints 4 times.
Exception outer! prints 3 times.
If I put back the proper I.P/port then Susbscribe outer! prints 4 times.
io.reactivex.Flowable
.fromIterable(records.records())
.flatMap(inRecord -> {
System.out.println("Processing...");
// Do stuff here....
Observable<Buffer> bodyBuffer = Observable.just(Buffer.buffer(...));
Single<HttpResponse<Buffer>> request = client
.post(..., ..., ...)
.rxSendStream(bodyBuffer);
return request.toFlowable();
})
.subscribe(record -> {
System.out.println("Subscribe outer!");
}, ex -> {
System.out.println("Exception outer! " + ex.getMessage());
});
UPDATE:
I now understand that on error RX stops right a way. Is there a way to continue and process all records regardless and get an error for each?
Given this article: https://medium.com/#jagsaund/5-not-so-obvious-things-about-rxjava-c388bd19efbc
I have come up with this... Unless you see something wrong with this?
io.reactivex.Flowable
.fromIterable(records.records())
.flatMap
(inRecord -> {
Observable<Buffer> bodyBuffer = Observable.just(Buffer.buffer(inRecord.toString()));
Single<HttpResponse<Buffer>> request = client
.post("xxxxxx", "xxxxxx", "xxxxxx")
.rxSendStream(bodyBuffer);
// So we can capture how long each request took.
final long startTime = System.currentTimeMillis();
return request.toFlowable()
.doOnNext(response -> {
// Capture total time and print it with the logs. Removed below for brevity.
long processTimeMs = System.currentTimeMillis() - startTime;
int status = response.statusCode();
if(status == 200)
logger.info("Success!");
else
logger.error("Failed!");
}).doOnError(ex -> {
long processTimeMs = System.currentTimeMillis() - startTime;
logger.error("Failed! Exception.", ex);
}).doOnTerminate(() -> {
// Do some extra stuff here...
}).onErrorResumeNext(Flowable.empty()); // This will allow us to continue.
}
).subscribe(); // Don't handle here. We subscribe to the inner events.
Is there a way to continue and process all records regardless and get
an error for each?
According to the doc, the observable should be terminated if it encounters an error. So you can't get each error in onError.
You can use onErrorReturn or onErrorResumeNext() to tell the upstream what to do if it encounters an error (e.g. emit null or Flowable.empty()).

Unique Transactional IDs for Kafka Producer in distributed running mode

I have a big data application that is based on the process Consume -> Process -> Produce. I am using Kafka in my ingestion pipeline and I am using the transactional producer for producing messages. All pieces of my application run fine, however there is a small problem in generating the IDs for the Transactional Producer. Scenario:
Say my application is running on one machine, I instantiate 2 consumer which have their own producers, so for e.g. lets say
Producer 1 has the transactional ID -> Consumer-0-Producer
Producer 2 has the transactional ID -> Consumer-1-Producer
now transactions initiated by these two producers will not interfere with each other, and this is what I desire. Pseudo code looks something like this:
ExecutorService executorService// responsible for starting my consumers
for (int i = 0; i < 2; i++) {
prod_trans_id = "consumer-" + str(i) + "-producer"
Custom_Consumer consumer = new Custom_Consumer(prod_trans_id)
executorService.submit(consumer)
}
This works perfectly fine if my application works on a single machine, however, this is not the case as the application needs to be run on multiple machines so when the same code is run on machine 2 the producers instantiated by the consumers on machine 2 will have same transactional ID as on machine 1. I want transactional IDs to be produced in a way that they don't conflict with one another as well as they are reproducible, which means in case if a application crashes/stops (say someone does service application stop and then service application start) and when it comes back online, then it should use the same Transactional IDs that were being used previously. I thought of UUIDs based approach, however, UUIDs are random and will not be the same when the application on one machine dies and comes back up online.
private final static String HOSTNAME_COMMAND = "hostname";
public static String getHostName() {
BufferedReader inputStreamReader = null;
BufferedReader errorStreamReader = null;
try {
Process process = Runtime.getRuntime().exec(HOSTNAME_COMMAND);
inputStreamReader = new BufferedReader(new InputStreamReader(process.getInputStream()));
errorStreamReader = new BufferedReader(new InputStreamReader(process.getErrorStream()));
if (errorStreamReader.readLine() != null) {
throw new RuntimeException(String.format("Failed to get the hostname, exception message: %s",
errorStreamReader.readLine()));
}
return inputStreamReader.readLine();
} catch (IOException e) {
try {
if (inputStreamReader != null) {
inputStreamReader.close();
}
if (errorStreamReader != null) {
errorStreamReader.close();
}
} catch (IOException e1) {
LogExceptionTrace.logExceptionStackTrace(e1);
throw new RuntimeException(e1);
}
LogExceptionTrace.logExceptionStackTrace(e);
throw new RuntimeException(e);
}
}
And then use the hostname as follows:
final String producerTransactionalID = String.format("%s_producer", this.consumerName);
Where consumer name is set as follows:
for (int i = 0; i < NUMBER_OF_CONSUMERS; i++) {
String consumerName = String.format("%s-worker-%d", hostName, i);
Executor executor = new Executor(
Configuration, consumerName
);
Executors.add(executor);
futures.add(executorService.submit(executor));
}

How can I retrieve the first-completed Actor in a group of Actors in Scala?

I have a moderate number of long-running Actors and I wish to write a synchronous function that returns the first one of these that completes. I can do it with a spin-wait on futures (e.g.,:
while (! fs.exists(f => f.isSet) ) {
Thread.sleep(100)
}
val completeds = fs.filter(f => f.isSet)
completeds.head()
), but that seems very "un-Actor-y"
The scala.actors.Futures class has two methods awaitAll() and awaitEither() that seem awfully close; if there were an awaitAny() I'd jump on it. Am I missing a simple way to do this or is there a common pattern that is applicable?
A more "actorish" way of waiting for completion is creating an actor in charge of handling completed result (lets call it ResultHandler)
Instead of replying, workers send their answer to ResultHandler in fire-and-forget manner. The latter will continue processing the result while other workers complete their job.
The key for me was the discovery that every (?) Scala object is, implicitly, an Actor, so you can use Actor.react{ } to block. Here is my source code:
import scala.actors._
import scala.actors.Actor._
//Top-level class that wants to return the first-completed result from some long-running actors
class ConcurrentQuerier() {
//Synchronous function; perhaps fulfilling some legacy interface
def synchronousQuery : String = {
//Instantiate and start the monitoring Actor
val progressReporter = new ProgressReporter(self) //All (?) objects are Actors
progressReporter.start()
//Instantiate the long-running Actors, giving each a handle to the monitor
val lrfs = List (
new LongRunningFunction(0, 2000, progressReporter), new LongRunningFunction(1, 2500, progressReporter), new LongRunningFunction(3, 1500, progressReporter),
new LongRunningFunction(4, 1495, progressReporter), new LongRunningFunction(5, 1500, progressReporter), new LongRunningFunction(6, 5000, progressReporter) )
//Start 'em
lrfs.map{ lrf =>
lrf.start()
}
println("All actors started...")
val start = System.currentTimeMillis()
/*
This blocks until it receives a String in the Inbox.
Who sends the string? A: the progressReporter, which is monitoring the LongRunningFunctions
*/
val s = receive {
case s:String => s
}
println("Received " + s + " after " + (System.currentTimeMillis() - start) + " ms")
s
}
}
/*
An Actor that reacts to a message that is a tuple ("COMPLETED", someResult) and sends the
result to this Actor's owner. Not strictly necessary (the LongRunningFunctions could post
directly to the owner's mailbox), but I like the idea that monitoring is important enough
to deserve its own object
*/
class ProgressReporter(val owner : Actor) extends Actor {
def act() = {
println("progressReporter awaiting news...")
react {
case ("COMPLETED", s) =>
println("progressReporter received a completed signal " + s);
owner ! s
case s =>
println("Unexpected message: " + s ); act()
}
}
}
/*
Some long running function
*/
class LongRunningFunction(val id : Int, val timeout : Int, val supervisor : Actor) extends Actor {
def act() = {
//Do the long-running query
val s = longRunningQuery()
println(id.toString + " finished, sending results")
//Send the results back to the monitoring Actor (the progressReporter)
supervisor ! ("COMPLETED", s)
}
def longRunningQuery() : String = {
println("Starting Agent " + id + " with timeout " + timeout)
Thread.sleep(timeout)
"Query result from agent " + id
}
}
val cq = new ConcurrentQuerier()
//I don't think the Actor semantics guarantee that the result is absolutely, positively the first to have posted the "COMPLETED" message
println("Among the first to finish was : " + cq.synchronousQuery)
Typical results look like:
scala ActorsNoSpin.scala
progressReporter awaiting news...
All actors started...
Starting Agent 1 with timeout 2500
Starting Agent 5 with timeout 1500
Starting Agent 3 with timeout 1500
Starting Agent 4 with timeout 1495
Starting Agent 6 with timeout 5000
Starting Agent 0 with timeout 2000
4 finished, sending results
progressReporter received a completed signal Query result from agent 4
Received Query result from agent 4 after 1499 ms
Among the first to finish was : Query result from agent 4
5 finished, sending results
3 finished, sending results
0 finished, sending results
1 finished, sending results
6 finished, sending results