Understanding kafka zookeper auto reset - apache-kafka

I still having doubts with kafka ZOOKEPER_AUTO_RESET.I have seen lot of questions asked on this regard. Kindly excuse if the same is a duplicate query .
I am having a high level java consumer which keeps on consuming.
I do have multiple topics and all topics are having a single partition.
My concern is on the below.
I started the consumerkafka.jar with consumer group name as “ncdev1” and ZOOKEPER_AUTO_RESET = smallest . Could observe that init offset is set as -1. Then I stop/started the jar after sometime. At this time, it picks the latest offset assigned to the consumer group (ncdev1) ie 36. I again restarted after sometime, then the initoffset is set to 39. Which is the latest value.
Then I changed the group name to ZOOKEPER_GROUP_ID = ncdev2. And restarted the jar file, this time again the offset is set to -1. In further restarts, it jumped to the latest value ie 39
Then I set the
ZOOKEPER_AUTO_RESET=largest and ZOOKEPER_GROUP_ID = ncdev3
Then tried restarting the jar file with group name ncdev3. There is no difference in the way it picks offset when it restarts. That is it is picking 39 when it restarts, which is same as the previous configuration.
Any idea on why is it not picking offset form the beginning.Any other configuration to be done to make it read from the beginning?(largest and smallest understanding from What determines Kafka consumer offset?)
Thanks in Advance
Code addedd
public class ConsumerForKafka {
private final ConsumerConnector consumer;
private final String topic;
private ExecutorService executor;
ServerSocket soketToWrite;
Socket s_Accept ;
OutputStream s1out ;
DataOutputStream dos;
static boolean logEnabled ;
static File fileName;
private static final Logger logger = Logger.getLogger(ConsumerForKafka.class);
public ConsumerForKafka(String a_zookeeper, String a_groupId, String a_topic,String session_timeout,String auto_reset,String a_commitEnable) {
consumer = kafka.consumer.Consumer.createJavaConsumerConnector(
createConsumerConfig(a_zookeeper, a_groupId,session_timeout,auto_reset,a_commitEnable));
this.topic =a_topic;
}
public void run(int a_numThreads,String a_zookeeper, String a_topic) throws InterruptedException, IOException {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(topic, new Integer(a_numThreads));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
String socketURL = PropertyUtils.getProperty("SOCKET_CONNECT_HOST");
int socketPort = Integer.parseInt(PropertyUtils.getProperty("SOCKET_CONNECT_PORT"));
Socket socks = new Socket(socketURL,socketPort);
//****
String keeper = a_zookeeper;
String topic = a_topic;
long millis = new java.util.Date().getTime();
//****
PrintWriter outWriter = new PrintWriter(socks.getOutputStream(), true);
List<KafkaStream<byte[], byte[]>> streams = null;
// now create an object to consume the messages
//
int threadNumber = 0;
// System.out.println("going to forTopic value is "+topic);
boolean keepRunningThread =false;
boolean chcek = false;
logger.info("logged");
BufferedWriter bw = null;
FileWriter fw = null;
if(logEnabled){
fw = new FileWriter(fileName, true);
bw = new BufferedWriter(fw);
}
for (;;) {
streams = consumerMap.get(topic);
keepRunningThread =true;
for (final KafkaStream stream : streams) {
ConsumerIterator<byte[], byte[]> it = stream.iterator();
while(keepRunningThread)
{
try{
if (it.hasNext()){
if(logEnabled){
String data = new String(it.next().message())+""+"\n";
bw.write(data);
bw.flush();
outWriter.print(data);
outWriter.flush();
consumer.commitOffsets();
logger.info("Explicit commit ......");
}else{
outWriter.print(new String(it.next().message())+""+"\n");
outWriter.flush();
}
}
// logger.info("running");
} catch(ConsumerTimeoutException ex) {
keepRunningThread =false;
break;
}catch(NullPointerException npe ){
keepRunningThread =true;
npe.printStackTrace();
}catch(IllegalStateException ile){
keepRunningThread =true;
ile.printStackTrace();
}
}
}
}
}
private static ConsumerConfig createConsumerConfig(String a_zookeeper, String a_groupId,String session_timeout,String auto_reset,String commitEnable) {
Properties props = new Properties();
props.put("zookeeper.connect", a_zookeeper);
props.put("group.id", a_groupId);
props.put("zookeeper.session.timeout.ms", session_timeout);
props.put("zookeeper.sync.time.ms", "2000");
props.put("auto.offset.reset", auto_reset);
props.put("auto.commit.interval.ms", "60000");
props.put("consumer.timeout.ms", "30");
props.put("auto.commit.enable",commitEnable);
//props.put("rebalance.max.retries", "4");
return new ConsumerConfig(props);
}
public static void main(String[] args) throws InterruptedException {
String zooKeeper = PropertyUtils.getProperty("ZOOKEEPER_URL_PORT");
String groupId = PropertyUtils.getProperty("ZOOKEPER_GROUP_ID");
String session_timeout = PropertyUtils.getProperty("ZOOKEPER_SESSION_TIMOUT_MS"); //6400
String auto_reset = PropertyUtils.getProperty("ZOOKEPER_AUTO_RESET"); //smallest
String enableLogging = PropertyUtils.getProperty("ENABLE_LOG");
String directoryPath = PropertyUtils.getProperty("LOG_DIRECTORY");
String log4jpath = PropertyUtils.getProperty("LOG_DIR");
String commitEnable = PropertyUtils.getProperty("ZOOKEPER_COMMIT"); //false
PropertyConfigurator.configure(log4jpath);
String socketURL = PropertyUtils.getProperty("SOCKET_CONNECT_HOST");
int socketPort = Integer.parseInt(PropertyUtils.getProperty("SOCKET_CONNECT_PORT"));
try {
Socket socks = new Socket(socketURL,socketPort);
boolean connected = socks.isConnected() && !socks.isClosed();
if(connected){
//System.out.println("Able to connect ");
}else{
logger.info("Not able to conenct to socket ..Exiting...");
System.exit(0);
}
} catch (UnknownHostException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch(java.net.ConnectException cne){
logger.info("Not able to conenct to socket ..Exitring...");
System.exit(0);
}
catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// String zooKeeper = args[0];
// String groupId = args[1];
String topic = args[0];
int threads = 1;
logEnabled = Boolean.parseBoolean(enableLogging);
if(logEnabled)
createDirectory(topic,directoryPath);
ConsumerForKafka example = new ConsumerForKafka(zooKeeper, groupId, topic, session_timeout,auto_reset,commitEnable);
try {
example.run(threads,zooKeeper,topic);
} catch(java.net.ConnectException cne){
cne.printStackTrace();
System.exit(0);
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void createDirectory(String topic,String d_Path) {
try{
File file = new File(d_Path);
if (!file.exists()) {
if (file.mkdir()) {
logger.info("Directory Created" +file.getPath());
} else {
logger.info("Directory Creation failed");
}
}
fileName = new File(d_Path + topic + ".log");
if (!fileName.exists()) {
fileName.createNewFile();
}
}catch(IOException IOE){
//logger.info("IOException occured during Directory or During File creation ");
}
}
}

After rereading your post carefully, I think what you ran into should be as expected.
I started the consumerkafka.jar with consumer group name as “ncdev1” and ZOOKEPER_AUTO_RESET = smallest . Could observe that init offset is set as -1. Then I stop/started the jar after sometime. At this time, it picks the latest offset assigned to the consumer group (ncdev1) ie 36.
auto.offset.reset only applies when there is no initial offset or if an offset is out of range. Since you only have 36 messages in the log, it's possible for the consumer group to read all those records very quickly, that's why you see consumer group always picked the latest offsets every time it got restarted.

Related

How do I set in Kafka to not consume from where it left?

I have a Kafka consumer in Golang. I don't want to consume from where I left last time, but rather current message. How can I do it?
You can set enable.auto.commit to false and auto.offset.reset to latest for your consumer group id. This means kafka will not be automatically committing your offsets.
With auto commit disabled, your consumer group progress would not be saved (unless you do manually). So whenever the consumer is restarted for whatever reason, it does not find its progress saved and resets to the latest offset.
set a new group.id to your consumer.
Then use auto.offset.reset to define the behavior of this new consumer group, in you case: latest
Apache kafka consumer api provides a method called kafkaConsumer.seekToEnd() which can be used to ignore the existing messages and only consume messages published after the consumer has been started without changing the current group ID of the consumer.
Below is the implementation of the same. The program takes 3 arguments : topic name, group ID and offset range (0 to start from beginning, - 1 to receive messages after consumer has started, other than 0 or - 1 will imply to to consumer to consume from that offset)
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.WakeupException;
import java.util.*;
public class Consumer {
private static Scanner in;
public static void main(String[] argv)throws Exception{
if (argv.length != 3) {
System.err.printf("Usage: %s <topicName> <groupId> <startingOffset>\n",
Consumer.class.getSimpleName());
System.exit(-1);
}
in = new Scanner(System.in);
String topicName = argv[0];
String groupId = argv[1];
final long startingOffset = Long.parseLong(argv[2]);
ConsumerThread consumerThread = new ConsumerThread(topicName,groupId,startingOffset);
consumerThread.start();
String line = "";
while (!line.equals("exit")) {
line = in.next();
}
consumerThread.getKafkaConsumer().wakeup();
System.out.println("Stopping consumer .....");
consumerThread.join();
}
private static class ConsumerThread extends Thread{
private String topicName;
private String groupId;
private long startingOffset;
private KafkaConsumer<String,String> kafkaConsumer;
public ConsumerThread(String topicName, String groupId, long startingOffset){
this.topicName = topicName;
this.groupId = groupId;
this.startingOffset=startingOffset;
}
public void run() {
Properties configProperties = new Properties();
configProperties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
configProperties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
configProperties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
configProperties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
configProperties.put(ConsumerConfig.CLIENT_ID_CONFIG, "offset123");
configProperties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
configProperties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//Figure out where to start processing messages from
kafkaConsumer = new KafkaConsumer<String, String>(configProperties);
kafkaConsumer.subscribe(Arrays.asList(topicName), new ConsumerRebalanceListener() {
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are revoked from this consumer\n", Arrays.toString(partitions.toArray()));
}
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
System.out.printf("%s topic-partitions are assigned to this consumer\n", Arrays.toString(partitions.toArray()));
Iterator<TopicPartition> topicPartitionIterator = partitions.iterator();
while(topicPartitionIterator.hasNext()){
TopicPartition topicPartition = topicPartitionIterator.next();
System.out.println("Current offset is " + kafkaConsumer.position(topicPartition) + " committed offset is ->" + kafkaConsumer.committed(topicPartition) );
if(startingOffset == -2) {
System.out.println("Leaving it alone");
}else if(startingOffset ==0){
System.out.println("Setting offset to begining");
kafkaConsumer.seekToBeginning(topicPartition);
}else if(startingOffset == -1){
System.out.println("Setting it to the end ");
kafkaConsumer.seekToEnd(topicPartition);
}else {
System.out.println("Resetting offset to " + startingOffset);
kafkaConsumer.seek(topicPartition, startingOffset);
}
}
}
});
//Start processing messages
try {
while (true) {
ConsumerRecords<String, String> records = kafkaConsumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value());
}
if(startingOffset == -2)
kafkaConsumer.commitSync();
}
}catch(WakeupException ex){
System.out.println("Exception caught " + ex.getMessage());
}finally{
kafkaConsumer.close();
System.out.println("After closing KafkaConsumer");
}
}
public KafkaConsumer<String,String> getKafkaConsumer(){
return this.kafkaConsumer;
}
}
}

Why i'm getting FailedToSendMessageException Exception when sending message at first time

When sending a message for the first time I get the exception. But second message sends properly.
Here is my Producer Configuration:
public void Init()
{
_logger.info("Initializing KAFKA server context...");
Properties ProducerProperties = new Properties();
ProducerProperties.put("metadata.broker.list", "localhost:9092");
ProducerProperties.put("serializer.class", "kafka.serializer.StringEncoder");
ProducerProperties.put("producer.type", "async");
_kafkaProducerConfig = new ProducerConfig(ProducerProperties);
_logger.info("KAFKA configuration done ...!!");
}`
I am sending Message using this method:
public Event Send(WSSession ws, JSONObject obj) throws JSONException
{
String roomId = obj.getString("RoomId");
Room room = _roomList.get(roomId);
String message = "";
if (_hmRooms.containsKey(room.getRoomId()))
{
String msg = new KafkaMessage(obj.getString("ReqId"), obj.getString("ReqType"), ws.getUser().getTaskName(), obj.getString("Message")).toString();
kafka.javaapi.producer.Producer<String, String> producer = new kafka.javaapi.producer.Producer<String, String>(_kafkaProducerConfig);
KeyedMessage<String, String> km = new KeyedMessage<String, String>(room.getRoomId(), msg);
producer.send(km);
producer.close();
message = "sent";
}
else
{
message = "failled";
}
EventSuccess evt = new EventSuccess(obj);
evt.setMessage(message);
return evt;
}

search for a very simple EsperIO Kafka example

I'm just desperately looking for example code for an Esper CEP Kafka Adapter code. I've already installed Kafka and wrote data to a Kafka topic using a producer and now I want to process it with Esper CEP. Unfortunately the documentation of Esper for the Kafka Adapter is not very meaningful. Does anyone have a very simple example?
Edit:
So far I added an adapter and it seems to work. However, I don't know how to read the adapter nor how to link a CEP pattern with this adapter. This is my code so far:
config.addImport(KafkaOutputDefault.class);
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
props.put(ConsumerConfig.GROUP_ID_CONFIG, "group.id");
props.put(EsperIOKafkaConfig.INPUT_SUBSCRIBER_CONFIG, EsperIOKafkaInputSubscriberByTopicList.class.getName());
props.put(EsperIOKafkaConfig.TOPICS_CONFIG, "test123");
props.put(EsperIOKafkaConfig.INPUT_PROCESSOR_CONFIG, EsperIOKafkaInputProcessorDefault.class.getName());
props.put(EsperIOKafkaConfig.INPUT_TIMESTAMPEXTRACTOR_CONFIG, EsperIOKafkaInputTimestampExtractorConsumerRecord.class.getName());
Configuration config2 = new Configuration();
config2.addPluginLoader("KafkaInput", EsperIOKafkaInputAdapterPlugin.class.getName(), props, null);
EsperIOKafkaInputAdapter adapter = new EsperIOKafkaInputAdapter(props, "default");
adapter.start();
I've had the same problem. I created a sample Project you could have a look at, especially the plain-esper branch.
An even more simplified Version would be:
public class KafkaExample implements Runnable {
private String runtimeURI;
public KafkaExample(String runtimeURI) {
this.runtimeURI = runtimeURI;
}
public static void main(String[] args){
new KafkaExample("KafkaExample").run();
}
#Override
public void run() {
Configuration configuration = new Configuration();
configuration.getCommon().addImport(KafkaOutputDefault.class);
configuration.getCommon().addEventType(String.class);
Properties consumerProps = new Properties();
// Kafka Consumer Properties
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.EARLIEST.toString().toLowerCase());
// EsperIO Kafka Input Adapter Properties
consumerProps.put(EsperIOKafkaConfig.INPUT_SUBSCRIBER_CONFIG, Consumer.class.getName());
consumerProps.put(EsperIOKafkaConfig.INPUT_PROCESSOR_CONFIG, InputProcessor.class.getName());
consumerProps.put(EsperIOKafkaConfig.INPUT_TIMESTAMPEXTRACTOR_CONFIG, EsperIOKafkaInputTimestampExtractorConsumerRecord.class.getName());
configuration.getRuntime().addPluginLoader("KafkaInput", EsperIOKafkaInputAdapterPlugin.class.getName(), consumerProps, null);
String stmt = "#name('sampleQuery') select * from String";
EPCompiled compiled;
try {
compiled = EPCompilerProvider.getCompiler().compile(stmt, new CompilerArguments(configuration));
} catch (EPCompileException ex) {
throw new RuntimeException(ex);
}
EPRuntime runtime = EPRuntimeProvider.getRuntime(runtimeURI, configuration);
EPDeployment deployment;
try {
deployment = runtime.getDeploymentService().deploy(compiled, new DeploymentOptions().setDeploymentId(UUID.randomUUID().toString()));
} catch (EPDeployException ex) {
throw new RuntimeException(ex);
}
EPStatement statement = runtime.getDeploymentService().getStatement(deployment.getDeploymentId(), "sampleQuery");
statement.addListener((newData, oldData, sta, run) -> {
for (EventBean nd : newData) {
System.out.println(nd.getUnderlying());
}
});
while (true) {}
}
}
public class Consumer implements EsperIOKafkaInputSubscriber {
#Override
public void subscribe(EsperIOKafkaInputSubscriberContext context) {
Collection<String> collection = new ArrayList<String>();
collection.add("input");
context.getConsumer().subscribe(collection);
}
}
public class InputProcessor implements EsperIOKafkaInputProcessor {
private EPRuntime runtime;
#Override
public void init(EsperIOKafkaInputProcessorContext context) {
this.runtime = context.getRuntime();
}
#Override
public void process(ConsumerRecords<Object, Object> records) {
for (ConsumerRecord record : records) {
if (record.value() != null) {
try {
runtime.getEventService().sendEventBean(record.value().toString(), "String");
} catch (Exception e) {
throw e;
}
}
}
}
public void close() {}
}
Sample code follows. This code assumes there are already some messages in the topic. This does not loop and wait for more messages.
Properties consumerProps = new Properties();
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, ip);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "mygroup");
KafkaConsumer consumer = new KafkaConsumer<>(consumerProps);
ConsumerRecords<String, String> rows = consumer.poll(1000);
Iterator<ConsumerRecord<String, String>> it = rows.iterator();
while (it.hasNext()) {
ConsumerRecord<String, String> row = it.next();
MyEvent event = new MyEvent(row.value()); // transform string to event
// process event
runtime.sendEvent(event);
}

How to check multiple kafka topics exists via Kafka client (consumer or producer) [duplicate]

I am trying to create a topic in kafka 0.8.2 by using :
AdminUtils.createTopic(zkClient, myTopic, 2, 1, properties);
If I run the code more than once locally for testing, this fails as the topic was already created. Is there a way to check if the topic exists before creating the topic? The TopicCommand api doesn't seem to return anything for listTopics or describeTopic
.
You can use AdminClient from kakfa-client version 0.11.0.0
Sample code:
Properties config = new Properties();
config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhist:9091");
AdminClient admin = AdminClient.create(config);
ListTopicsResult listTopics = admin.listTopics();
Set<String> names = listTopics.names().get();
boolean contains = names.contains("TEST_6");
if (!contains) {
List<NewTopic> topicList = new ArrayList<NewTopic>();
Map<String, String> configs = new HashMap<String, String>();
int partitions = 5;
Short replication = 1;
NewTopic newTopic = new NewTopic("TEST_6", partitions, replication).configs(configs);
topicList.add(newTopic);
admin.createTopics(topicList);
}
For this purpose, you can use the method AdminUtils.topicExists(ZkUtils zkClient, String topic), it will return true if the topic already exists, false otherwise.
Your code would then be something like this:
if (!AdminUtils.topicExists(zkClient, myTopic)){
AdminUtils.createTopic(zkClient, myTopic, 2, 1, properties);
}
public static void createKafkaTopic(String sourceTopicName, String sinkTopicName, String responseTopicName, String kafkaUrl) {
try {
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaUrl);
AdminClient kafkaAdminClient = KafkaAdminClient.create(properties);
ListTopicsResult topics = kafkaAdminClient.listTopics();
Set <String> names = topics.names().get();
boolean containsSourceTopic = names.contains(sourceTopicName);
boolean containsSinkTopic = names.contains(sinkTopicName);
boolean containsResponseTopic = names.contains(responseTopicName);
if (!containsResponseTopic && !containsSinkTopic && !containsSourceTopic) {
CreateTopicsResult result = kafkaAdminClient.createTopics(
Stream.of(sourceTopicName, sinkTopicName, responseTopicName).map(
name -> new NewTopic(name, 1, (short) 1)
).collect(Collectors.toList())
);
result.all().get();
LOG.info("new sourceTopicName: {}, sinkTopicName: {}, responseTopicName: {} are created",
sourceTopicName, sinkTopicName, responseTopicName);
}
} catch (ExecutionException | InterruptedException e) {
LOG.info("Error message {}", e.getMessage());
}
}

Why is Apache Storm KafkaSpout emitting so many items from Kafka topic?

I'm having issues with Kafka and Storm. I'm not sure at this point if it's a problem with the KafkaSpout config I am setting up, or if I am not ACKing properly or what.
I en-queued 50 items onto my Kafka Topic, but my spout has emitted over 1300 (and counting) tuples. Also, the Spout reports that almost all have "failed." The topology is actually not failing, it's writing to a database successfully, but I just don't know why it is apparently replaying everything so much (if that's what it's doing)
The big question is:
Why is it emitting so many tuples when I only passed 50 to Kafka?
Here is how I am setting up the topology and the KafkaSpout
public static void main(String[] args) {
try {
String databaseServerIP = "";
String kafkaZookeepers = "";
String kafkaTopicName = "";
int numWorkers = 1;
int numAckers = 1;
int numSpouts = 1;
int numBolts = 1;
int messageTimeOut = 10;
String topologyName = "";
if (args == null || args[0].isEmpty()) {
System.out.println("Args cannot be null or empty. Exiting");
return;
} else {
if (args.length == 8) {
for (String arg : args) {
if (arg == null) {
System.out.println("Parameters cannot be null. Exiting");
return;
}
}
databaseServerIP = args[0];
kafkaZookeepers = args[1];
kafkaTopicName = args[2];
numWorkers = Integer.valueOf(args[3]);
numAckers = Integer.valueOf(args[4]);
numSpouts = Integer.valueOf(args[5]);
numBolts = Integer.valueOf(args[6]);
topologyName = args[7];
} else {
System.out.println("Bad parameters: found " + args.length + ", required = 8");
return;
}
}
Config conf = new Config();
conf.setNumWorkers(numWorkers);
conf.setNumAckers(numAckers);
conf.setMessageTimeoutSecs(messageTimeOut);
conf.put("databaseServerIP", databaseServerIP);
conf.put("kafkaZookeepers", kafkaZookeepers);
conf.put("kafkaTopicName", kafkaTopicName);
/**
* Now would put kafkaSpout instance below instead of TemplateSpout()
*/
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(topologyName + "-flatItems-from-kafka-spout", getKafkaSpout(kafkaZookeepers, kafkaTopicName), numSpouts);
builder.setBolt(topologyName + "-flatItem-Writer-Bolt", new ItemWriterBolt(), numBolts).shuffleGrouping(topologyName + "-flatItems-from-kafka-spout");
StormTopology topology = builder.createTopology();
StormSubmitter.submitTopology(topologyName, conf, topology);
} catch (Exception e) {
System.out.println("There was a problem starting the topology. Check parameters.");
e.printStackTrace();
}
}
private static KafkaSpout getKafkaSpout(String zkHosts, String topic) throws Exception {
//String topic = "FLAT-ITEMS";
String zkNode = "/" + topic + "-subscriber-pipeline";
String zkSpoutId = topic + "subscriberpipeline";
KafkaTopicInZkCreator.createTopic(topic, zkHosts);
SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkHosts), topic, zkNode, zkSpoutId);
spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
// spoutConfig.useStartOffsetTimeIfOffsetOutOfRange = true;
//spoutConfig.startOffsetTime = System.currentTimeMillis();
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
return new KafkaSpout(spoutConfig);
}
and here is the creation of the topic in case that matters
public static void createTopic(String topicName, String zookeeperHosts) throws Exception {
ZkClient zkClient = null;
ZkUtils zkUtils = null;
try {
int sessionTimeOutInMs = 15 * 1000; // 15 secs
int connectionTimeOutInMs = 10 * 1000; // 10 secs
zkClient = new ZkClient(zookeeperHosts, sessionTimeOutInMs, connectionTimeOutInMs, ZKStringSerializer$.MODULE$);
zkUtils = new ZkUtils(zkClient, new ZkConnection(zookeeperHosts), false);
int noOfPartitions = 1;
int noOfReplication = 1;
Properties topicConfiguration = new Properties();
boolean topicExists = AdminUtils.topicExists(zkUtils, topicName);
if (!topicExists) {
AdminUtils.createTopic(zkUtils, topicName, noOfPartitions, noOfReplication, topicConfiguration, RackAwareMode.Disabled$.MODULE$);
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (zkClient != null) {
zkClient.close();
}
}
}
you need to see if messages in the bolt failed.
If they all failed too, you probably didn't ack the message in the bolt, or there is exception in the bolt code.
If bolt messages acked, it's more likely a timeout. Increasing the topology timeout config or the paralisim should fix the problem.