Spark rookie here. I was trying to write some Unit Test for my spark Job, but keep getting exception when I'm trying to create a new StreamingContext in test, in line ssc = new StreamingContext(conf, batchDuration)
Code looks like:
#RunWith(classOf[JUnitRunner])
class SparkJobTest extends FlatSpec with MockitoSugar with BeforeAndAfter {
private val master = "local[2]"
private val appName = "TestingAppName"
private val batchDuration = Seconds(1)
private val loggerMock = mock[Logger]
private var ssc: StreamingContext = _
private var sc: SparkContext = _
before {
val conf = new SparkConf()
.setMaster(master)
.setAppName(appName)
ssc = new StreamingContext(conf, batchDuration)
}
after {
if (ssc != null) {
ssc.stop()
}
}
"This" should "always work" in {
assert(1==1)
}
Exception I got:
An exception or error caused a run to abort: Bad return type
Exception Details:
Location:
org/apache/spark/streaming/StreamingContext.fileStream(Ljava/lang/String;Lscala/Function1;ZLscala/reflect/ClassTag;Lscala/reflect/ClassTag;Lscala/reflect/ClassTag;)Lorg/apache/spark/streaming/dstream/InputDStream; #23: areturn
Reason:
Type 'org/apache/spark/streaming/dstream/FileInputDStream' (current frame, stack[0]) is not assignable to 'org/apache/spark/streaming/dstream/InputDStream' (from method signature)
Current Frame:
bci: #23
flags: { }
locals: { 'org/apache/spark/streaming/StreamingContext', 'java/lang/String', 'scala/Function1', integer, 'scala/reflect/ClassTag', 'scala/reflect/ClassTag', 'scala/reflect/ClassTag' }
stack: { 'org/apache/spark/streaming/dstream/FileInputDStream' }
Bytecode:
0000000: bb01 9959 2a2b 2c1d b201 9eb6 01a6 1904
0000010: 1905 1906 b701 a9b0
java.lang.VerifyError: Bad return type
Exception Details:
Location:
org/apache/spark/streaming/StreamingContext.fileStream(Ljava/lang/String;Lscala/Function1;ZLscala/reflect/ClassTag;Lscala/reflect/ClassTag;Lscala/reflect/ClassTag;)Lorg/apache/spark/streaming/dstream/InputDStream; #23: areturn
Reason:
Type 'org/apache/spark/streaming/dstream/FileInputDStream' (current frame, stack[0]) is not assignable to 'org/apache/spark/streaming/dstream/InputDStream' (from method signature)
Current Frame:
bci: #23
flags: { }
locals: { 'org/apache/spark/streaming/StreamingContext', 'java/lang/String', 'scala/Function1', integer, 'scala/reflect/ClassTag', 'scala/reflect/ClassTag', 'scala/reflect/ClassTag' }
stack: { 'org/apache/spark/streaming/dstream/FileInputDStream' }
Bytecode:
0000000: bb01 9959 2a2b 2c1d b201 9eb6 01a6 1904
0000010: 1905 1906 b701 a9b0
at com.appnexus.data.spark.job.SparkJobTest$$anonfun$1.apply$mcV$sp(SparkJobTest.scala:25)
at com.appnexus.data.spark.job.SparkJobTest$$anonfun$1.apply(SparkJobTest.scala:21)
at com.appnexus.data.spark.job.SparkJobTest$$anonfun$1.apply(SparkJobTest.scala:21)
at org.scalatest.BeforeAndAfter$class.runTest(BeforeAndAfter.scala:195)
at com.appnexus.data.spark.job.SparkJobTest.runTest(SparkJobTest.scala:13)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1714)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1714)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:390)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:427)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
at org.scalatest.FlatSpecLike$class.runTests(FlatSpecLike.scala:1714)
at org.scalatest.FlatSpec.runTests(FlatSpec.scala:1683)
at org.scalatest.Suite$class.run(Suite.scala:1424)
at org.scalatest.FlatSpec.org$scalatest$FlatSpecLike$$super$run(FlatSpec.scala:1683)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1760)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1760)
at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
at org.scalatest.FlatSpecLike$class.run(FlatSpecLike.scala:1760)
at com.appnexus.data.spark.job.SparkJobTest.org$scalatest$BeforeAndAfter$$super$run(SparkJobTest.scala:13)
at org.scalatest.BeforeAndAfter$class.run(BeforeAndAfter.scala:241)
at com.appnexus.data.spark.job.SparkJobTest.run(SparkJobTest.scala:13)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722)
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043)
at org.scalatest.tools.Runner$.run(Runner.scala:883)
at org.scalatest.tools.Runner.run(Runner.scala)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
Process finished with exit code 0
I googled a lot, some said it's a version issue, I tried spark 1.2, 1.3, 1.4 but no luck.
I'm running my test on Mac OS X, Java 1.8.
EDIT: Spark dep in pom
<properties>
<!-- maven specific properties -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scala.version>2.10.4</scala.version>
<kafka.version>0.8.2.0</kafka.version>
<spark.version>1.3.1</spark.version>
<hadoop.version>2.6.0-cdh5.4.0</hadoop.version>
<samza.version>0.8.x-hadoop${hadoop.version}-kafka${kafka.version}</samza.version>
<assembly.configuration>src/main/assembly/src.xml</assembly.configuration>
<data.deps.scope>compile</data.deps.scope>
<spray.version>1.3.1</spray.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalamock</groupId>
<artifactId>scalamock-scalatest-support_2.11</artifactId>
<version>3.2</version>
<scope>test</scope>
</dependency>
<!-- spark dependencies -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
Related
I am able to insert the data in HBase table using the Put method. But, now I want to read data from CSV file and write it to the HBase table.
I wrote this code:
object Read_CSV_Method2 {
case class HotelBooking(hotelName : String, is_cancelled : String, reservation_status : String,
reservation_status_date : String)
def main(args : Array[String]){
Logger.getLogger("org").setLevel(Level.ERROR)
val sparkConf = new SparkConf().setAppName("Spark_Hbase_Connection").setMaster("local[*]")
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()
// establish a connection
val connection:Connection = ConnectionFactory.createConnection(conf)
// Table on which different commands have to be run.
val tableName = connection.getTable(TableName.valueOf("hotelTable"))
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val textRDD = sc.textFile("/Users/sukritmehta/Documents/hotel_bookings.csv")
var c = 1
textRDD.foreachPartition{
iter =>
val hbaseConf = HBaseConfiguration.create()
val connection:Connection = ConnectionFactory.createConnection(hbaseConf)
val myTable = connection.getTable(TableName.valueOf("hotelTable"))
iter.foreach{
f =>
val col = f.split(",")
val insertData = new Put(Bytes.toBytes(c.toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("hotelName"), Bytes.toBytes(col(0).toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("is_canceled"), Bytes.toBytes(col(1).toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("reservation_status"), Bytes.toBytes(col(30).toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("reservation_status_date"), Bytes.toBytes(col(31).toString()))
c = c+1
myTable.put(insertData)
}
}
}
}
But after running this code, I am getting the following error:
Exception in thread "dag-scheduler-event-loop" java.lang.NoClassDefFoundError: org/apache/hadoop/mapred/InputSplitWithLocationInfo
at org.apache.spark.rdd.HadoopRDD.getPreferredLocations(HadoopRDD.scala:356)
at org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:275)
at org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:275)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:274)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal(DAGScheduler.scala:2003)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$1.apply$mcVI$sp(DAGScheduler.scala:2014)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$1.apply(DAGScheduler.scala:2013)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$1.apply(DAGScheduler.scala:2013)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2.apply(DAGScheduler.scala:2013)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2.apply(DAGScheduler.scala:2011)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal(DAGScheduler.scala:2011)
at org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1977)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$17.apply(DAGScheduler.scala:1112)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$17.apply(DAGScheduler.scala:1110)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.AbstractTraversable.map(Traversable.scala:104)
at org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1110)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:1069)
at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:1013)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2065)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2057)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2046)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.mapred.InputSplitWithLocationInfo
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 32 more
pom.xml file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>Spark_Hbase</groupId>
<artifactId>Spark_Hbase</artifactId>
<version>0.0.1-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!-- Scala and Spark dependencies -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.11</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-avro_2.11</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.0</version>
<scope>provided</scope>
</dependency>
<!-- <dependency> <groupId>org.codehaus.janino</groupId> <artifactId>commons-compiler</artifactId>
<version>3.0.7</version> </dependency> <dependency> <groupId>org.apache.spark</groupId>
<artifactId>spark-network-common_2.11</artifactId> <version>2.1.1</version>
</dependency> -->
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.9.9</version>
</dependency>
<!-- <dependency> <groupId>org.mongodb.spark</groupId> <artifactId>mongo-spark-connector_2.10</artifactId>
<version>2.1.1</version> </dependency> -->
<!-- <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-mllib_2.10</artifactId>
<version>2.1.1</version> </dependency> -->
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-spark</artifactId>
<version>2.0.0-alpha4</version> <!-- Hortonworks Latest -->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-mapreduce -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>2.2.4</version>
</dependency>
</dependencies>
</project>
It would be great if anyone could help me out in resolving this issue.
I think you are missing
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.5</version>
<scope>provided</scope>
</dependency>
Hi I am trying to build an application using kotlin and spring boot while running postgres image on docker but I keep getting this error
org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'entityManagerFactory' defined in class path resource [org/springframework/boot/autoconfigure/orm/jpa/HibernateJpaConfiguration.class]: Invocation of init method failed; nested exception is java.lang.NoSuchMethodError: javax.persistence.spi.PersistenceUnitInfo.getValidationMode()Ljavax/persistence/ValidationMode;
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1702) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:579) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:501) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractBeanFactory.lambda$doGetBean$0(AbstractBeanFactory.java:317) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:228) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:315) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:199) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.context.support.AbstractApplicationContext.getBean(AbstractApplicationContext.java:1089) ~[spring-context-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:859) ~[spring-context-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:550) ~[spring-context-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext.refresh(ServletWebServerApplicationContext.java:140) ~[spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.refresh(SpringApplication.java:759) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.refreshContext(SpringApplication.java:395) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.run(SpringApplication.java:327) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1255) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
.....
this is my entity
#Entity
data class User(
#Id
#GeneratedValue
val id: Long,
val firstName: String,
val lastName: String,
val age:Int,
val gender:String
)
this is my repository
import org.springframework.data.jpa.repository.JpaRepository
import org.springframework.stereotype.Repository
#Repository
interface UserRepository : JpaRepository<User, Long>
and this is my controller
#RestController
class UserRestController {
#Autowired
lateinit var userRepository: UserRepository
#RequestMapping(value = "/addUser")
fun save(): User {
return userRepository.save(User(id=1,firstName = "chaima",lastName = "ennar",age = 20,gender = "female"))
}
}
those are my properties
server.port=${PORT:9000}
spring.cache.cache-names=googleFeeds,internationalFeeds
spring.cache.caffeine.spec=maximumSize=100,expireAfterAccess=1800s
spring.datasource.url=jdbc:postgresql://localhost:5433/irooldb
spring.datasource.username=root
spring.datasource.password=root
spring.jpa.hibernate.ddl-auto=update
spring.jpa.generate-ddl=true
and for my dependencies
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-rest</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-kotlin</artifactId>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-reflect</artifactId>
</dependency>
<dependency>
<groupId>com.rometools</groupId>
<artifactId>rome</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>caffeine</artifactId>
<version>2.5.5</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>9.4-1206-jdbc42</version>
</dependency>
<dependency>
<groupId>javax.persistence</groupId>
<artifactId>persistence-api</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
My question is: what does this error means? and what I am doing wrong ?
This seems to be a duplicate of this issue: java.lang.NoSuchMethodError: javax.persistence.spi.PersistenceUnitInfo.getValidationMode()Ljavax/persistence/ValidationMode;
It appears that you are picking up JPA v1 libraty from your runtime classpath but the method that you are attempting to invoke was ony defined in JPA v2
I am trying to add elasticsearch as sink for my DataStream using the code
val config = new HashMap[String, String]
config.put("bulk.flush.max.actions", "1")
config.put("cluster.name", "cluster")
val transports = new ArrayList[InetSocketAddress]
transports.add(new InetSocketAddress(InetAddress.getByName("127.0.0.1"), 9300))
// testing simple setup
val input:DataStream[String] = timedStream.map( _.language.toString)
input.print()
input.addSink(new ElasticsearchSink(config, transports, new ElasticsearchSinkFunction[String] {
def createIndexRequest(element: String): IndexRequest = {
val json = new java.util.HashMap[String, AnyRef]
// Map stream fields to JSON properties, format:
// json.put("json-property-name", streamField)
json.put("data", element)
Requests.indexRequest.index("test").`type`("test").source(json)
}
override def process(element: String, ctx: RuntimeContext, indexer: RequestIndexer) {
indexer.add(createIndexRequest(element))
}
}))
and my POM file dependency are
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-twitter_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch2_2.10</artifactId>
<version>1.3.2</version>
</dependency>
</dependencies>
And also I have elasticsearch version 2.4.6 in my system , during execution of my program I am getting following error ::
Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply$mcV$sp(JobManager.scala:933)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:876)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:876)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.NoSuchFieldError: LUCENE_4_10_4
at org.elasticsearch.Version.<clinit>(Version.java:230)
at org.elasticsearch.client.transport.TransportClient$Builder.build(TransportClient.java:129)
at org.apache.flink.streaming.connectors.elasticsearch2.Elasticsearch2ApiCallBridge.createClient(Elasticsearch2ApiCallBridge.java:65)
at org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkBase.open(ElasticsearchSinkBase.java:272)
at org.apache.flink.api.common.functions.util.FunctionUtils.openFunction(FunctionUtils.java:36)
at org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.open(AbstractUdfStreamOperator.java:111)
at org.apache.flink.streaming.runtime.tasks.StreamTask.openAllOperators(StreamTask.java:376)
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:253)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:702)
at java.lang.Thread.run(Thread.java:745)
Where is the problem, I have tried including elasticsearch 2.4.6 in pom but it didn't work.
I've been experimenting with multiple consumer instances in a Kafka consumer group but it always fails with kafka.common.NotLeaderForPartitionException.
My Kafka cluster consists of 3 brokers and the topic that has PartitionCount:2and ReplicationFactor:3.
SparkConsumer.java
public class SparkConsumer {
private static Function2<Integer, Integer, Integer> MyReducerFunc = (a, b) -> a + b;
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: SparkConsumer <brokers> <topics>\n" +
" <brokers> is a list of one or more Kafka brokers\n" +
" <topics> is a list of one or more kafka topics to consume from\n\n");
System.exit(1);
}
//StreamingExamples.setStreamingLogLevels();
String brokers = args[0];
String topics = args[1];
SparkConf sparkConf = new SparkConf().setMaster("local[5]").setAppName("SparkConsumer").set("spark.driver.host", "localhost");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
// Create a StreamingContext with a 2 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sc, Durations.seconds(2));
Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", brokers);
//kafkaParams.put("auto.offset.reset", "smallest");
kafkaParams.put("group.id", "SparkConsumerGrp");
kafkaParams.put("zookeeper.connect", "localhost:2181");
// Create direct kafka stream with brokers and topics
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
jssc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topicsSet
);
//Aggregate data every 30 sec
JavaPairDStream<String, String> messages2 =
messages.window(Durations.seconds(30), Durations.seconds(30));
messages2.foreachRDD(rdd -> {
long numHits = rdd.count();
if(numHits == 0)
System.out.println("No new data fetched in last 30 sec");
//Do Processing
else{
System.out.println("\n\n----------------------------------Data fetched in the last 30 seconds: " + rdd.partitions().size()
+ " partitions and " + numHits + " records------------------\n\n");
//Convert to java log object
JavaRDD<ApacheAccessLog> logs = rdd.map(x-> x._2)
.map(ApacheAccessLog::parseFromLogLine)
.cache();
//Find the bot ip addresses
JavaRDD<String> iprdd = logs.mapToPair(ip-> new Tuple2<>(ip.getIpAddress(),1))
.reduceByKey(MyReducerFunc)
.filter(botip-> botip._2 > 50)
.keys();
//If we find something, we store it in results dir on hdfs
if(iprdd.count() > 0)
{
sc.hadoopConfiguration().set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
sc.hadoopConfiguration().set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date());
//sc.hadoopConfiguration().set("mapreduce.output.basename", timeStamp);
iprdd.coalesce(1).saveAsTextFile("hdfs://quickstart.cloudera:8020/results/"+timeStamp);
JobConf jobConf=new JobConf();
System.out.println("\n\n-------------Resuts successfully written to /results on hdfs-------------\n");
}
}
});
jssc.start();
jssc.awaitTermination();
}
}
My understanding from the documentation was that we can start another consumer process with the same groupid to add that instance to an existing group. Hence, I run this code on two separate terminals. However, this is the error that I always get:
At the first instance:
17/06/06 00:53:26 ERROR DirectKafkaInputDStream: ArrayBuffer(kafka.common.NotLeaderForPartitionException, org.apache.spark.SparkException: Couldn't find leader offsets for Set([topic5,1]))
17/06/06 00:53:26 ERROR DirectKafkaInputDStream: ArrayBuffer(org.apache.spark.SparkException: Couldn't find leaders for Set([topic5,0], [topic5,1]))
17/06/06 00:53:27 ERROR JobScheduler: Error generating jobs for time 1496735582000 ms
org.apache.spark.SparkException: ArrayBuffer(org.apache.spark.SparkException: Couldn't find leaders for Set([topic5,0], [topic5,1]))
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.latestLeaderOffsets(DirectKafkaInputDStream.scala:133)
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.compute(DirectKafkaInputDStream.scala:158)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:900)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:899)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:899)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:877)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:877)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:870)
at org.apache.spark.streaming.dstream.WindowedDStream.compute(WindowedDStream.scala:65)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.ForEachDStream.generateJob(ForEachDStream.scala:48)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:117)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:116)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.DStreamGraph.generateJobs(DStreamGraph.scala:116)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:249)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:247)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.JobGenerator.generateJobs(JobGenerator.scala:247)
at org.apache.spark.streaming.scheduler.JobGenerator.org$apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:183)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:89)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:88)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
Exception in thread "main" org.apache.spark.SparkException: ArrayBuffer(org.apache.spark.SparkException: Couldn't find leaders for Set([topic5,0], [topic5,1]))
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.latestLeaderOffsets(DirectKafkaInputDStream.scala:133)
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.compute(DirectKafkaInputDStream.scala:158)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:900)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:899)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:899)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:877)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:877)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:870)
at org.apache.spark.streaming.dstream.WindowedDStream.compute(WindowedDStream.scala:65)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.ForEachDStream.generateJob(ForEachDStream.scala:48)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:117)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:116)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.DStreamGraph.generateJobs(DStreamGraph.scala:116)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:249)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:247)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.JobGenerator.generateJobs(JobGenerator.scala:247)
at org.apache.spark.streaming.scheduler.JobGenerator.org$apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:183)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:89)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:88)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
At the second instance: (seems to occur when I'm calling the rdd.count() and happens right after the first instance crashes)
Exception in thread "streaming-job-executor-0" java.lang.Error: java.lang.InterruptedException
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1148)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
at scala.concurrent.impl.Promise$DefaultPromise.tryAwait(Promise.scala:202)
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:218)
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:153)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:619)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1965)
at org.apache.spark.rdd.RDD.count(RDD.scala:1158)
at org.apache.spark.api.java.JavaRDDLike$class.count(JavaRDDLike.scala:455)
at org.apache.spark.api.java.AbstractJavaRDDLike.count(JavaRDDLike.scala:45)
at hadoopTest.hadoopTest.SparkConsumer.lambda$1(SparkConsumer.java:85)
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:627)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:627)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:256)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:256)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:256)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:255)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
... 2 more
This is my pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>hadoopTest</groupId>
<artifactId>hadoopTest</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>hadoopTest</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.8.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.8.2.1</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<artifactId>jmxri</artifactId>
<groupId>com.sun.jmx</groupId>
</exclusion>
<exclusion>
<artifactId>jms</artifactId>
<groupId>javax.jms</groupId>
</exclusion>
<exclusion>
<artifactId>jmxtools</artifactId>
<groupId>com.sun.jdmk</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.1</version>
<exclusions>
<exclusion>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-tags_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>2.10.2</version>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
<version>1.3</version>
</dependency>
</dependencies>
</project>
I have been trying to debug this the whole day and am at a loss. It says it cannot find the leaders even though all my brokers remain up. I'd appreciate any help. Thanks!
Turns out it was because I was using spark-streaming-kafka-0-8_2.11 instead of spark-streaming-kafka-0-10_2.11. Also you need to change version for kafka-clients to 0.10.2.0 to match with it.
I am using Weld, RestEasy and DeltaSpike Data for my project. The project's dependency description is as follows.
<properties>
<resteasy.version>3.0.10.Final</resteasy.version>
<deltaspike.version>1.2.1</deltaspike.version>
</properties>
<dependencies>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-jaxrs</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-client</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-multipart-provider</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-cache-core</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>async-http-servlet-3.0</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-cdi</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-jackson2-provider</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.weld.servlet</groupId>
<artifactId>weld-servlet</artifactId>
<version>2.2.9.Final</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-bean-validation-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.cdictrl</groupId>
<artifactId>deltaspike-cdictrl-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-data-module-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-data-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-jpa-module-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-jpa-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-servlet-module-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-servlet-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-core</artifactId>
<version>4.3.6.Final</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-entitymanager</artifactId>
<version>4.3.6.Final</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.33</version>
</dependency>
<dependency>
<groupId>org.jboss</groupId>
<artifactId>jandex</artifactId>
<version>1.2.3.Final</version>
</dependency>
</dependencies>
We used to use Spring for projects like this so there really was no need for the persistence file but we are needing one here so we placed it in META-INF.
java.lang.NullPointerException
at org.bitbucket.infovillafoundation.manmyanmar.hotel.cdiproducers.EntityManagerProducer.create(EntityManagerProducer.java:21)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.jboss.weld.injection.StaticMethodInjectionPoint.invoke(StaticMethodInjectionPoint.java:89)
at org.jboss.weld.injection.StaticMethodInjectionPoint.invoke(StaticMethodInjectionPoint.java:79)
at org.jboss.weld.injection.producer.ProducerMethodProducer.produce(ProducerMethodProducer.java:95)
at org.jboss.weld.injection.producer.AbstractMemberProducer.produce(AbstractMemberProducer.java:151)
at org.jboss.weld.bean.AbstractProducerBean.create(AbstractProducerBean.java:183)
at org.jboss.weld.context.unbound.DependentContextImpl.get(DependentContextImpl.java:69)
at org.jboss.weld.manager.BeanManagerImpl.getReference(BeanManagerImpl.java:744)
at org.jboss.weld.bean.builtin.InstanceImpl.getBeanInstance(InstanceImpl.java:178)
at org.jboss.weld.bean.builtin.InstanceImpl.get(InstanceImpl.java:98)
at org.apache.deltaspike.data.impl.handler.EntityManagerLookup.lookupFor(EntityManagerLookup.java:58)
at org.apache.deltaspike.data.impl.handler.QueryHandler.createContext(QueryHandler.java:104)
at org.apache.deltaspike.data.impl.handler.QueryHandler.invoke(QueryHandler.java:77)
at com.sun.proxy.$Proxy74.findAll(Unknown Source)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.service.RoomTypeService.getAllRoomTypes(RoomTypeService.java:22)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.service.RoomTypeService$Proxy$_$$_WeldClientProxy.getAllRoomTypes(Unknown Source)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.restservice.FrontOfficeRestService.hello(FrontOfficeRestService.java:23)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.restservice.FrontOfficeRestService$Proxy$_$$_WeldClientProxy.hello(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.jboss.resteasy.core.MethodInjectorImpl.invoke(MethodInjectorImpl.java:137)
at org.jboss.resteasy.core.ResourceMethodInvoker.invokeOnTarget(ResourceMethodInvoker.java:296)
at org.jboss.resteasy.core.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:250)
at org.jboss.resteasy.core.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:237)
at org.jboss.resteasy.core.SynchronousDispatcher.invoke(SynchronousDispatcher.java:356)
at org.jboss.resteasy.core.SynchronousDispatcher.invoke(SynchronousDispatcher.java:179)
at org.jboss.resteasy.plugins.server.servlet.ServletContainerDispatcher.service(ServletContainerDispatcher.java:220)
at org.jboss.resteasy.plugins.server.servlet.HttpServletDispatcher.service(HttpServletDispatcher.java:56)
at org.jboss.resteasy.plugins.server.servlet.HttpServletDispatcher.service(HttpServletDispatcher.java:51)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:848)
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:684)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1496)
at org.apache.deltaspike.servlet.impl.event.EventBridgeFilter.doFilter(EventBridgeFilter.java:59)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1484)
at org.apache.deltaspike.servlet.impl.produce.RequestResponseHolderFilter.doFilter(RequestResponseHolderFilter.java:63)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1476)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:501)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:533)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1086)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:429)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1020)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
at org.eclipse.jetty.server.Server.handle(Server.java:370)
at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
at org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(AbstractHttpConnection.java:971)
at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.headerComplete(AbstractHttpConnection.java:1033)
at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:644)
at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)
at org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
at org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
at org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
at java.lang.Thread.run(Thread.java:745)
In case, you need the code for the producer.
package org.bitbucket.infovillafoundation.manmyanmar.hotel.cdiproducers;
import javax.enterprise.inject.Disposes;
import javax.enterprise.inject.Produces;
import javax.persistence.EntityManager;
import javax.persistence.EntityManagerFactory;
import javax.persistence.PersistenceContext;
/**
* Created by Sandah Aung on 27/2/15.
*/
public class EntityManagerProducer {
#PersistenceContext
private EntityManagerFactory emf;
#Produces
public EntityManager create() {
return emf.createEntityManager();
}
public void close(#Disposes EntityManager em) {
if (em.isOpen()) {
em.close();
}
}
}
Edit: beans.xml
<beans xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/beans_1_0.xsd">
<interceptors>
<class>org.apache.deltaspike.jpa.impl.transaction.TransactionalInterceptor</class>
</interceptors>
</beans>
What have I done wrong?
You didn't give any details about your beans.xml, so this is just a guess:
Your producer class lacks a bean defining annotation, so it might be ignored by the CDI container.
Try adding #Dependent to your EntityManagerProducer class, or check which bean discovery mode your application is using.