Firstly I have started zookeeper, then kafka server and I have created a topic. Then I read tweets and try to store them into MongoDB but nothing is written to my collection although server is establishing connection.
Here is the java code:
package kafka;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.LinkedBlockingQueue;
import twitter4j.*;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.conf.ConfigurationBuilder;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.log4j.Logger;
import org.bson.Document;
import org.mortbay.util.ajax.JSON;
import com.mongodb.*;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
public class KafkaTwitterProducer {
#SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
final LinkedBlockingQueue<Status> queue = new LinkedBlockingQueue<Status>(1000);
if (args.length < 4) {
System.out.println(
"Usage: KafkaTwitterProducer <twitter-consumer-key> <twitter-consumer-secret> <twitter-access-token> <twitter-access-token-secret> <topic-name> <twitter-search-keywords>");
return;
}
String consumerKey = args[0].toString();
String consumerSecret = args[1].toString();
String accessToken = args[2].toString();
String accessTokenSecret = args[3].toString();
String topicName = args[4].toString();
String[] arguments = args.clone();
String[] keyWords = Arrays.copyOfRange(arguments, 5, arguments.length);
// Set twitter oAuth tokens in the configuration
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setDebugEnabled(true).setOAuthConsumerKey(consumerKey).setOAuthConsumerSecret(consumerSecret)
.setOAuthAccessToken(accessToken).setOAuthAccessTokenSecret(accessTokenSecret).setJSONStoreEnabled(true);
// Create twitterstream using the configuration
TwitterStream twitterStream = new TwitterStreamFactory(cb.build()).getInstance();
StatusListener listener = new StatusListener() {
public void onStatus(Status status) {
queue.offer(status);
}
public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
}
public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
}
public void onScrubGeo(long userId, long upToStatusId) {
System.out.println("Got scrub_geo event userId:" + userId + "upToStatusId:" + upToStatusId);
}
public void onStallWarning(StallWarning warning) {
System.out.println("Got stall warning:" + warning);
}
public void onException(Exception ex) {
ex.printStackTrace();
}
};
twitterStream.addListener(listener);
// Filter keywords
FilterQuery query = new FilterQuery().track(keyWords);
twitterStream.filter(query);
// Thread.sleep(5000);
// Add Kafka producer config settings
Properties props = new Properties();
props.put("metadata.broker.list", "localhost:9092");
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<String, String>(props);
int i = 0;
int j = 0;
// Mongodb initialization parameters.
int port_no = 27017;
String host_name = "localhost", db_name = "bigdata", db_coll_name = "tweets";
// Mongodb connection string.
String client_url = "mongodb://" + host_name + ":" + port_no + "/" + db_name;
MongoClientURI uri = new MongoClientURI(client_url);
// Connecting to the mongodb server using the given client uri.
MongoClient mongo_client = new MongoClient(uri);
// Fetching the database from the mongodb.
MongoDatabase db = mongo_client.getDatabase(db_name);
// Fetching the collection from the mongodb.
MongoCollection<Document> coll = db.getCollection(db_coll_name);
//List<Document> docs = new ArrayList<Document>();
// poll for new tweets in the queue. If new tweets are added, send them
// to the topic
for(Status status:queue) {
String json = TwitterObjectFactory.getRawJSON(status);
BasicDBObject dbObject = (BasicDBObject) JSON.parse(json);
coll.insertOne(new Document(dbObject));
}
}
}
This is what mongod command prompt is showing me:
2019-05-26T19:26:29.181+0300 I NETWORK [initandlisten] waiting for connections on port 27017
2019-05-26T19:28:05.466+0300 I NETWORK [listener] connection accepted from 127.0.0.1:54232 #1 (1 connection now open)
2019-05-26T19:28:05.475+0300 I NETWORK [conn1] received client metadata from 127.0.0.1:54232 conn1: { driver: { name: "mongo-java-driver", version: "3.5.0" }, os: { type: "Windows", name: "Windows 10", architecture: "amd64", version: "10.0" }, platform: "Java/Oracle Corporation/1.8.0_152-b16" }
but no tweets are written to the database. Can someone please explain where is the error?
Related
This is follow-up to this question.
My code can initiate connection, session etc., however messages are not consumed. I don't see any exceptions in logs.
This test reproduces the problem:
import javax.jms.Connection;
import javax.jms.ConnectionFactory;
import javax.jms.JMSException;
import javax.jms.MessageConsumer;
import javax.jms.MessageProducer;
import javax.jms.Queue;
import javax.jms.Session;
import javax.naming.Context;
import java.io.File;
import java.util.Hashtable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.api.core.QueueConfiguration;
import org.apache.activemq.artemis.api.core.SimpleString;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.impl.ConfigurationImpl;
import org.apache.activemq.artemis.core.server.embedded.EmbeddedActiveMQ;
import org.apache.activemq.artemis.core.settings.impl.AddressSettings;
import org.apache.activemq.artemis.jndi.ActiveMQInitialContextFactory;
import org.junit.After;
import org.junit.Before;
public class Test {
EmbeddedActiveMQ jmsServer;
final String QUEUE_NAME = "myQueue";
#Before
public void setUp() throws Exception {
final String baseDir = File.separator + "tmp";
final EmbeddedActiveMQ embeddedActiveMQ = new EmbeddedActiveMQ();
final Configuration config = new ConfigurationImpl();
config.setPersistenceEnabled(true);
config.setBindingsDirectory(baseDir + File.separator + "bindings");
config.setJournalDirectory(baseDir + File.separator + "journal");
config.setPagingDirectory(baseDir + File.separator + "paging");
config.setLargeMessagesDirectory(baseDir + File.separator + "largemessages");
config.setSecurityEnabled(false);
AddressSettings adr = new AddressSettings();
adr.setDeadLetterAddress(new SimpleString("DLQ"));
adr.setExpiryAddress(new SimpleString("ExpiryQueue"));
config.addAddressSetting("#", adr);
config.addAcceptorConfiguration("invmConnectionFactory", "vm://0");
embeddedActiveMQ.setConfiguration(config);
this.jmsServer = embeddedActiveMQ;
this.jmsServer.start();
System.out.println("creating queue");
final boolean isSuccess = jmsServer.getActiveMQServer().createQueue(new QueueConfiguration(QUEUE_NAME)) != null;
if(isSuccess) {
System.out.println(QUEUE_NAME + "queue created");
}
}
#After
public void tearDown() {
try {
this.jmsServer.stop();
} catch(Exception e) {
// ignore
}
}
#org.junit.Test
public void simpleTest() throws Exception {
Hashtable d = new Hashtable();
d.put(Context.INITIAL_CONTEXT_FACTORY, "org.apache.activemq.artemis.jndi.ActiveMQInitialContextFactory");
d.put("connectionFactory.invmConnectionFactory", "vm://0");
final ActiveMQInitialContextFactory activeMQInitialContextFactory = new ActiveMQInitialContextFactory();
Context initialContext = activeMQInitialContextFactory.getInitialContext(d);
ConnectionFactory connectionFactory = (ConnectionFactory) initialContext.lookup("invmConnectionFactory");
Connection connection = connectionFactory.createConnection();
Session session = connection.createSession(true, Session.SESSION_TRANSACTED);
Queue queue = session.createQueue(QUEUE_NAME);
MessageProducer producer = session.createProducer(queue);
MessageConsumer consumer = session.createConsumer(queue);
CountDownLatch latch = new CountDownLatch(1);
consumer.setMessageListener(message -> {
System.out.println("=== " + message);
try {
message.acknowledge();
session.commit();
latch.countDown();
} catch(JMSException e) {
e.printStackTrace();
}
});
connection.start();
producer.send(session.createMessage());
session.commit();
if(!latch.await(2, TimeUnit.SECONDS)) {
throw new IllegalStateException();
}
connection.close();
}
}
The problem with this code is subtle but important. When configuring the broker you're creating a queue like so:
...
final String QUEUE_NAME = "myQueue";
...
jmsServer.getActiveMQServer().createQueue(new QueueConfiguration(QUEUE_NAME))
...
This is perfectly valid in and of itself, but for this use-case involving a JMS queue it's important to note that this will result in an address named myQueue and a multicast queue named myQueue since the default routing type is MULTICAST and you didn't specify any routing type on your QueueConfiguration. This is not the kind of configuration you want for a JMS queue. You want an address and an ANYCAST queue of the same name (i.e. myQueue in this case) as noted in the documentation. Therefore, you should use:
...
import org.apache.activemq.artemis.api.core.RoutingType;
...
jmsServer.getActiveMQServer().createQueue(new QueueConfiguration(QUEUE_NAME).setRoutingType(RoutingType.ANYCAST))
When you use the multicast queue the message sent by the JMS client will not actually be routed because it is sent with the anycast routing type.
Another option would be to not create the queue explicitly at all and allow it to be auto-created.
I have a kafka topic of Strings with an arbitrary key. I want to create a topic of characters in string : value pairs, e.g:
input("key","value") -> outputs (["v","value"],["a","value"],...)
To keep it simple, my input topic has a single partition, and thus the KTable code should be receiving all messages to a single instance.
I have created the following sandbox code, which builds the new table just fine , but doesn't update when a new item is put into the original topic:
import java.util.LinkedHashSet;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.Grouped;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.state.KeyValueIterator;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.QueryableStoreTypes;
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
public class Sandbox
{
private final static String kafkaBootstrapServers = "192.168.1.254:9092";
private final static String kafkaGlobalTablesDirectory = "C:\\Kafka\\tmp\\kafka-streams-global-tables\\";
private final static String topic = "sandbox";
private static KafkaStreams streams;
public static void main(String[] args)
{
// 1. set up the test data
Properties producerProperties = new Properties();
producerProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaBootstrapServers);
producerProperties.put(ProducerConfig.CLIENT_ID_CONFIG, Sandbox.class.getName() + "_testProducer");
producerProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
producerProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
Producer<String, String> sandboxProducer = new KafkaProducer<>(producerProperties);
sandboxProducer.send(new ProducerRecord<String, String>(topic,"uvw","uvw"));
// 2. read the test data and check it's working
ReadOnlyKeyValueStore<String, String> store = getStore();
printStore(store.all());
System.out.println("-------------ADDING NEW VALUE----------------");
sandboxProducer.send(new ProducerRecord<String, String>(topic,"xyz","xyz"));
System.out.println("-------------ADDED NEW VALUE----------------");
printStore(store.all());
sandboxProducer.close();
streams.close();
}
private static void printStore(KeyValueIterator<String, String> i)
{
System.out.println("-------------PRINT START----------------");
while (i.hasNext())
{
KeyValue<String, String> n = i.next();
System.out.println(n.key + ":" + String.join(",", n.value));
}
System.out.println("-------------PRINT END----------------");
}
private static ReadOnlyKeyValueStore<String, String> getStore()
{
ReadOnlyKeyValueStore<String, String> store = null;
String storeString = "sandbox_store";
StreamsBuilder builder = new StreamsBuilder();
builder.stream(topic
, Consumed.with(Serdes.String(),Serdes.String()))
.filter((k,v)->v!=null)
.flatMap((k,v)->{
Set<KeyValue<String, String>> results = new LinkedHashSet<>();
if (v != null)
{
for (char subChar : v.toCharArray())
{
results.add(KeyValue.<String, String>pair(new String(new char[] {subChar}), v));
}
}
return results;
})
.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
.aggregate(()->new String()
, (key, value, agg) -> {
agg = agg + value;
return agg;
}
,Materialized.<String, String, KeyValueStore<Bytes, byte[]>>as(storeString)
.withKeySerde(Serdes.String())
.withValueSerde(Serdes.String()));
final Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "sandbox");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, Sandbox.class.getName());
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaBootstrapServers);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, kafkaGlobalTablesDirectory + "Sandbox");
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.setUncaughtExceptionHandler((Thread thread, Throwable throwable) -> {
System.out.println("Exception on thread " + thread.getName() + ":" + throwable.getLocalizedMessage());
});
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
streams.cleanUp(); // clear any old streams data - forces a rebuild of the local caches.
streams.start(); // hangs until the global table is built
StoreQueryParameters<ReadOnlyKeyValueStore<String, String>> storeSqp
= StoreQueryParameters.fromNameAndType(storeString
,QueryableStoreTypes.<String, String>keyValueStore());
// this while loop gives time for Kafka Streams to start up properly before creating the store
while (store == null)
{
try {
TimeUnit.SECONDS.sleep(1);
store = streams.store(storeSqp);
System.out.println("Store " + storeString + " Created successfully.");
} catch (InterruptedException e) {
}
catch (Exception e) {
System.out.println("Exception creating store " + storeString + ". Will try again in 1 second. Message: " + e.getLocalizedMessage());
}
}
return store;
}
}
The output I am getting is as follows:
Store sandbox_store Created successfully.
-------------PRINT START----------------
u:uvw
v:uvw
w:uvw
-------------PRINT END----------------
-------------ADDING NEW VALUE----------------
-------------ADDED NEW VALUE----------------
-------------PRINT START----------------
u:uvw
v:uvw
w:uvw
-------------PRINT END----------------
Note that the xyz I added has gone missing!
(p.s. I know I could use reduce instead of aggregate, but in practise the new value would be a different type, not a string, so it wouldn't work for my actual use-case)
Now, if I add a 10 second pause before printing the second time; or if I then restart the Sandbox class without clearing the topic, the first xyz shows up then. So it's clearly that there's a time delay somewhere in the system. And in practise I'm dealing with 300mb+ of messages all going onto the input topic at once, once an hour; and so the delay is even longer than just a few seconds.
How can I help speed things up?
I have a Spring Boot code which reads data from Kafka topic. The code works as expected when data is feed to the topic via Kafka Producer Console. When I try to push data into the kafka topic via Golden Gate, the code doesn't reads the data from the topic, although I can see the golden gate is able to write the data into the kafka topic. Can anyone suggest why this change in behavior?
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.bson.Document;
import org.json.JSONArray;
import org.json.JSONObject;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
public class VideoConsumer implements Runnable {
private ObjectMapper objectMapper;
private KafkaStream<byte[], byte[]> kafkaStream;
private int threadNumber;
public VideoConsumer(KafkaStream<byte[], byte[]> kafkaStream, int threadNumber) {
this.threadNumber = threadNumber;
this.kafkaStream = kafkaStream;
this.objectMapper = new ObjectMapper();
}
#Override
public void run() {
ConsumerIterator<byte[], byte[]> it = kafkaStream.iterator();
while (it.hasNext()) {
byte[] messageData = it.next().message();
try {
//String videoFromMessage = objectMapper.readValue(messageData, String.class);
//byte[] videoFromMessage = it.next().message();
//System.out.print("got message");
String streamData = new String(messageData);
System.out.print("Thread:" + threadNumber + ".Consuming video: " + streamData + "\n");
String changed=streamData.toString();
int pos=changed.lastIndexOf("}}");
String change=changed.substring(0,pos );
change=change.replace("}}", "}},");
String res=change.concat("}}");
String result="[" +res+ "]";
System.out.println(result);
JSONArray json;
json = new JSONArray(result);
Map<String, List<JSONObject>> orderMongo = new HashMap<>();
Map<String, List<JSONObject>> orderItemMongo = new HashMap<>();
MongoClient mongoClient = new MongoClient( "localhost" , 27017 );
MongoDatabase db = mongoClient.getDatabase("Mongotest");
MongoCollection<Document> table = db.getCollection("test1");
Document doc1=new Document();
//Gson gson=new Gson();
BasicDBObject document = new BasicDBObject();
for (int i = 0; i < json.length(); i++) {
JSONObject obj = json.getJSONObject(i);
if(obj.getString("table").equals("TEST.S_ORDER_MONGO1")){
List<JSONObject> list = orderMongo.getOrDefault(obj.getString("table").equals("TEST.S_ORDER_MONGO1"),new ArrayList<>());
list.add(obj);
orderMongo.put(obj.getJSONObject("after").getString("ROW_ID"),list);
}
else if(obj.getString("table").equals("TEST.S_ORDER_ITEM_MONGO1")){
List<JSONObject> nextlist = orderItemMongo.getOrDefault(obj.getString("table").equals("TEST.S_ORDER_ITEM_MONGO1"),new ArrayList<>());
nextlist.add(obj);
orderItemMongo.put(obj.getJSONObject("after").getString("ORDER_ID"),nextlist);
}
}
System.out.println(orderMongo);
System.out.println(orderItemMongo);
// System.out.println(orderItemMongo);
for (Entry<String, List<JSONObject>> entry : orderMongo.entrySet()) {
for(Entry<String, List<JSONObject>> entry1 : orderItemMongo.entrySet()){
if(entry.getKey().equals(entry1.getKey())){
//String gsonString=gson.toJson(entry.getValue());
//System.out.println(gsonString);
List<JSONObject> listnext = entry.getValue();
List <JSONObject> orderlineList=entry1.getValue();
for(JSONObject obj:listnext){
Document doc = new Document("STATUS_CD", obj.getJSONObject("after").getString("STATUS_CD"));
if(obj.getJSONObject("after").isNull("INTEGRATION_ID")==true){
doc.append("INTEGRATION_ID", null);}
doc.append("X_CUST_REF", obj.getJSONObject("after").getString("X_CUST_REF"));
doc.append("REQ_SHIP_DT",obj.getJSONObject("after").getString("REQ_SHIP_DT"));
if(obj.getJSONObject("after").isNull("QUOTE_ID")==true){
doc.append("QUOTE_ID",null);}
doc.append("ACCNT_ID",obj.getJSONObject("after").getString("ACCNT_ID"));
doc.append("ACTIVE_FLG",obj.getJSONObject("after").getString("ACTIVE_FLG"));
doc.append("PROCESS_TIMESTAMP",obj.getJSONObject("after").getString("PROCESS_TIMESTAMP"));
doc.append("CONTACT_ID",obj.getJSONObject("after").getString("CONTACT_ID"));
doc.append("BU_ID", obj.getJSONObject("after").getString("BU_ID"));
doc.append("SHIP_CON_ID",obj.getJSONObject("after").getString("SHIP_CON_ID"));
doc.append("LAST_UPD", obj.getJSONObject("after").getString("LAST_UPD"));
if(obj.getJSONObject("after").isNull("X_CLOSE_DT")==true){
doc.append("X_CLOSE_DT", null);}
doc.append("X_SUB_STAT", obj.getJSONObject("after").getString("X_SUB_STAT"));
doc.append("ORDER_NUM", obj.getJSONObject("after").getString("ORDER_NUM"));
doc.append("SOFT_DELETE", obj.getJSONObject("after").getString("SOFT_DELETE"));
doc.append("ROW_ID", obj.getJSONObject("after").getString("ROW_ID"));
doc.append("LAST_UPD_BY",obj.getJSONObject("after").getString("LAST_UPD_BY"));
doc.append("REV_NUM",obj.getJSONObject("after").getString("REV_NUM"));
doc.append("ORDER_DT", obj.getJSONObject("after").getString("ORDER_DT"));
for(JSONObject object:orderlineList){
if(object.getJSONObject("after").isNull("ASSET_ID")==true){
doc1.append("ASSET_ID", null);}
if(object.getJSONObject("after").isNull("SERV_ACCNT_ID")==true){
doc1.append("SERV_ACCNT_ID", null);}
doc1.append("REQ_SHIP_DT",object.getJSONObject("after").getString("REQ_SHIP_DT"));
if(object.getJSONObject("after").isNull("X_PROD_DESC")==true){
doc1.append("X_PROD_DESC",null);}
if(object.getJSONObject("after").isNull("SHIP_CON_ID")==true){
doc1.append("SHIP_CON_ID",null);}
doc1.append("X_BES_STATUS",object.getJSONObject("after").getString("X_BES_STATUS"));
doc1.append("ROW_ID",object.getJSONObject("after").getString("ROW_ID"));
doc1.append("STATUS_CD",object.getJSONObject("after").getString("STATUS_CD"));
doc1.append("ORDER_ID",object.getJSONObject("after").getString("ORDER_ID"));
if(object.getJSONObject("after").isNull("COMPLETED_DT")==true){
doc1.append("COMPLETED_DT",null);}
doc1.append("LAST_UPD",object.getJSONObject("after").getString("LAST_UPD"));
doc1.append("SOFT_DELETE",object.getJSONObject("after").getString("SOFT_DELETE"));
doc1.append("INTEGRATION_ID",object.getJSONObject("after").getString("INTEGRATION_ID"));
doc1.append("X_CDD",object.getJSONObject("after").getString("X_CDD"));
doc1.append("ACTION_CD",object.getJSONObject("after").getString("ACTION_CD"));
doc1.append("X_ORDER_ITEM_SUBSTATUS",object.getJSONObject("after").getString("X_ORDER_ITEM_SUBSTATUS"));
if(object.getJSONObject("after").isNull("X_APPT_REF")==true){
doc1.append("X_APPT_REF",null);}
if(object.getJSONObject("after").isNull("X_CANCELLED_DT")==true){
doc1.append("X_CANCELLED_DT",null);}
doc1.append("PROD_ID",object.getJSONObject("after").getString("PROD_ID"));
if(object.getJSONObject("after").isNull("SERVICE_NUM")==true){
doc1.append("SERVICE_NUM",null);}
if(object.getJSONObject("after").isNull("MUST_DLVR_BY_DT")==true){
doc1.append("MUST_DLVR_BY_DT",null);}
doc1.append("ROLLUP_FLG",object.getJSONObject("after").getString("ROLLUP_FLG"));
doc1.append("ROOT_ORDER_ITEM_ID",object.getJSONObject("after").getString("ROOT_ORDER_ITEM_ID"));
doc1.append("BILL_ACCNT_ID",object.getJSONObject("after").getString("BILL_ACCNT_ID"));
doc1.append("PROCESS_TIMESTAMP",object.getJSONObject("after").getString("PROCESS_TIMESTAMP"));
doc1.append("QTY_REQ",object.getJSONObject("after").getString("QTY_REQ"));
}
doc.append("ORDERLINE", doc1);
table.insertOne(doc);
}
}
}
}
}
catch (Exception e) {
e.printStackTrace();
}
System.out.println("Shutting down Thread: " + kafkaStream);
}
}
}
This is the StBolt.java class.
package com.storm.cassandra;
import java.util.Map;
import net.sf.json.JSONObject;
import net.sf.json.JSONSerializer;
import org.apache.log4j.Logger;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.IBasicBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
public class StBolt implements IBasicBolt {
private static final long serialVersionUID = 1L;
private static final Logger logger = Logger
.getLogger(StBolt.class);
private static Session session = null;
private Cluster cluster = null;
String cassandraURL;
JSONObject eventJson = null;
String topicname = null;
String ip = null;
String menu = null;
String product = null;
Row row = null;
com.datastax.driver.core.ResultSet viewcount = null;
com.datastax.driver.core.ResultSet segmentlistResult = null;
com.datastax.driver.core.ResultSet newCountUpdatedResult = null;
public StBolt(String topicname) {
this.topicname = topicname;
}
public void prepare(Map stormConf, TopologyContext topologyContext) {
cluster = Cluster.builder().addContactPoint("127.0.0.1").build();
System.out.println("load cassandra ip");
session = cluster.connect();
System.out.println("CassandraCounterBolt prepare method ended");
}
public void execute(Tuple input, BasicOutputCollector collector) {
System.out.println("Execute");
Fields fields = input.getFields();
try {
eventJson = (JSONObject) JSONSerializer.toJSON((String) input
.getValueByField(fields.get(0)));
topicname = (String) eventJson.get("topicName");
ip = (String) eventJson.get("ip");
menu = (String) eventJson.get("menu");
product = (String) eventJson.get("product");
String ievent = "ievent";
String install = "install";
viewcount = session
.execute("update webapp.viewcount set count=count+1 where topicname='"+topicname+
"'and ip= '"+ip+"'and menu='"+menu+"'and product='"+product+"'" );
} catch (Exception e) {
e.printStackTrace();
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
public Map<String, Object> getComponentConfiguration() {
return null;
}
public void cleanup() {
}
}
Here is the StTopology.java class
package com.storm.cassandra;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.topology.TopologyBuilder;
public class StTopology {
public static void main(String[] args) throws Exception {
if (args.length == 4) {
BrokerHosts hosts = new ZkHosts("localhost:2181");
//System.out
//.println("Insufficent Arguements - topologyName kafkaTopic ZKRoot ID");
SpoutConfig kafkaConf1 = new SpoutConfig(hosts, args[1], args[2],
args[3]);
//System.out
//.println("Insufficent Arguements - topologyName kafkaTopic ZKRoot ID");
//kafkaConf1.forceFromStart = false;
kafkaConf1.zkRoot = args[2];
kafkaConf1.scheme = new SchemeAsMultiScheme(new StringScheme());
KafkaSpout kafkaSpout1 = new KafkaSpout(kafkaConf1);
StBolt countbolt = new StBolt(args[1]);
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafkaspout", kafkaSpout1, 1);
builder.setBolt("counterbolt", countbolt, 1).shuffleGrouping(
"kafkaspout");
Config config = new Config();
config.setDebug(true);
config.put(Config.TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS, 1);
config.setNumWorkers(1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(args[0], config, builder.createTopology());
// StormSubmitter.submitTopology(args[0], config,
// builder.createTopology());
} else {
System.out
.println("Insufficent Arguements - topologyName kafkaTopic ZKRoot ID");
}
}
}
I am trying to get JSON data from the Kafka console producer, process it in Storm and store it into Cassandra.
For some reason, there is no response from the bolt when I run the code with parameters viewcount usercount /kafkastorm webapp1.
I have Kafka getting data from the console producer as topic usercount, and the correct table in Cassandra.
The code compiles and runs without any error but the console shows terminated.
I have no activity anywhere, despite providing the right JSON input to the Kafka console producer multiple times {"topicname":"usercount","ip":"127.0.0.1","menu":"dress","product":"tshirt"}.
There is no topology shown as being created in the Storm UI's Topology Summary either.
I believe I have all the Kafka, Storm and Cassandra dependencies in place.
Please point me in the right direction with this issue. Thanks.
I'm trying to embed a kafkaserver in my code. I've used the following example code to try to learn how to do so but for some reason, my producer can't send messages to the embedded server (it times out after 60 secs). I'm using kafka 0.8.2.2. Can someone tell me what I'm doing wrong?
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.javaapi.FetchResponse;
import kafka.javaapi.TopicMetadata;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.MessageAndOffset;
import kafka.producer.ProducerConfig;
import kafka.server.KafkaConfig;
import kafka.server.KafkaServer;
import kafka.utils.Time;
import kafka.utils.Utils;
import org.apache.commons.collections.functors.ExceptionPredicate;
import org.apache.curator.test.TestingServer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import java.io.File;
import java.nio.ByteBuffer;
import java.util.Properties;
public class KafkaLocalBroker {
public static final String TEST_TOPIC = "test-topic";
public KafkaConfig kafkaConfig;
public KafkaServer kafkaServer;
public TestingServer zookeeper;
public KafkaLocalBroker() throws Exception{
zookeeper = new TestingServer(true);
Properties props = new Properties();
props.put("zookeeper.connect", zookeeper.getConnectString());
props.put("broker.id", 0);
kafkaConfig = new KafkaConfig(props);
kafkaServer = new KafkaServer(kafkaConfig, new Time() {
public long nanoseconds() {
return System.nanoTime();
}
public long milliseconds() {
return System.currentTimeMillis();
}
public void sleep(long ms) {
try {
Thread.sleep(ms);
} catch(InterruptedException e){
// Do Nothing
}
}
});
kafkaServer.startup();
System.out.println("embedded kafka is up");
}
public void stop(){
kafkaServer.shutdown();
System.out.println("embedded kafka stop");
}
/**
* a main that tests the embedded kafka
* #param args
*/
public static void main(String[] args) {
KafkaLocalBroker kafkaLocalBroker = null;
//init kafka server and start it:
try {
kafkaLocalBroker = new KafkaLocalBroker();
} catch (Exception e){
}
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("retries", 1);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
KafkaProducer<String, String> producer = new KafkaProducer<String, String>(props);
//send one message to local kafka server:
for (int i=0; i<10; i++){
ProducerRecord<String, String> data = new ProducerRecord<String, String>(TEST_TOPIC, "test-message" + i);
producer.send(data, (metadata, exception) -> {
if (exception != null) {
System.out.println("Failed to write log message: " + exception.getMessage());
} else {
System.out.println("Successful write to offset {} in partition {} on topic {}: " +
metadata.offset() + ", " + metadata.partition() + ", "+ metadata.topic());
}
});
}
//consume messages from Kafka:
SimpleConsumer consumer = new SimpleConsumer("localhost", 9092, 10000, 1024000, "clientId");
long offset = 0L;
while (offset < 160) { //this is an exit criteria just for this test so we are not stuck in enless loop
// create a fetch request for topic “test”, partition 0, current offset, and fetch size of 1MB
FetchRequest fetchRequest = new FetchRequestBuilder().addFetch(TEST_TOPIC, 0, offset, 100000).build();//new FetchRequest(TEST_TOPIC, 0, offset, 1000000);
// get the message set from the consumer and print them out
FetchResponse messages = consumer.fetch(fetchRequest);
for(MessageAndOffset msg : messages.messageSet(TEST_TOPIC, 0)) {
ByteBuffer payload = msg.message().payload();
byte[] bytes = new byte[payload.limit()];
payload.get(bytes);
try {
System.out.println(new String(bytes, "UTF-8"));
} catch (Exception e){
}
// advance the offset after consuming each message
offset = msg.offset();
}
}
producer.close();
//close the consumer
consumer.close();
//stop the kafka broker:
if(kafkaLocalBroker != null) {
kafkaLocalBroker.stop();
}
}
}
EDIT: I've included the exception returned from the producer below:
org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
The properties used to create kafka producer is not valid for 0.8. Go through producerconfig and change the properties. or update kafka version