Exception on Spark Job Test - scala

Spark rookie here. I was trying to write some Unit Test for my spark Job, but keep getting exception when I'm trying to create a new StreamingContext in test, in line ssc = new StreamingContext(conf, batchDuration)
Code looks like:
#RunWith(classOf[JUnitRunner])
class SparkJobTest extends FlatSpec with MockitoSugar with BeforeAndAfter {
private val master = "local[2]"
private val appName = "TestingAppName"
private val batchDuration = Seconds(1)
private val loggerMock = mock[Logger]
private var ssc: StreamingContext = _
private var sc: SparkContext = _
before {
val conf = new SparkConf()
.setMaster(master)
.setAppName(appName)
ssc = new StreamingContext(conf, batchDuration)
}
after {
if (ssc != null) {
ssc.stop()
}
}
"This" should "always work" in {
assert(1==1)
}
Exception I got:
An exception or error caused a run to abort: Bad return type
Exception Details:
Location:
org/apache/spark/streaming/StreamingContext.fileStream(Ljava/lang/String;Lscala/Function1;ZLscala/reflect/ClassTag;Lscala/reflect/ClassTag;Lscala/reflect/ClassTag;)Lorg/apache/spark/streaming/dstream/InputDStream; #23: areturn
Reason:
Type 'org/apache/spark/streaming/dstream/FileInputDStream' (current frame, stack[0]) is not assignable to 'org/apache/spark/streaming/dstream/InputDStream' (from method signature)
Current Frame:
bci: #23
flags: { }
locals: { 'org/apache/spark/streaming/StreamingContext', 'java/lang/String', 'scala/Function1', integer, 'scala/reflect/ClassTag', 'scala/reflect/ClassTag', 'scala/reflect/ClassTag' }
stack: { 'org/apache/spark/streaming/dstream/FileInputDStream' }
Bytecode:
0000000: bb01 9959 2a2b 2c1d b201 9eb6 01a6 1904
0000010: 1905 1906 b701 a9b0
java.lang.VerifyError: Bad return type
Exception Details:
Location:
org/apache/spark/streaming/StreamingContext.fileStream(Ljava/lang/String;Lscala/Function1;ZLscala/reflect/ClassTag;Lscala/reflect/ClassTag;Lscala/reflect/ClassTag;)Lorg/apache/spark/streaming/dstream/InputDStream; #23: areturn
Reason:
Type 'org/apache/spark/streaming/dstream/FileInputDStream' (current frame, stack[0]) is not assignable to 'org/apache/spark/streaming/dstream/InputDStream' (from method signature)
Current Frame:
bci: #23
flags: { }
locals: { 'org/apache/spark/streaming/StreamingContext', 'java/lang/String', 'scala/Function1', integer, 'scala/reflect/ClassTag', 'scala/reflect/ClassTag', 'scala/reflect/ClassTag' }
stack: { 'org/apache/spark/streaming/dstream/FileInputDStream' }
Bytecode:
0000000: bb01 9959 2a2b 2c1d b201 9eb6 01a6 1904
0000010: 1905 1906 b701 a9b0
at com.appnexus.data.spark.job.SparkJobTest$$anonfun$1.apply$mcV$sp(SparkJobTest.scala:25)
at com.appnexus.data.spark.job.SparkJobTest$$anonfun$1.apply(SparkJobTest.scala:21)
at com.appnexus.data.spark.job.SparkJobTest$$anonfun$1.apply(SparkJobTest.scala:21)
at org.scalatest.BeforeAndAfter$class.runTest(BeforeAndAfter.scala:195)
at com.appnexus.data.spark.job.SparkJobTest.runTest(SparkJobTest.scala:13)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1714)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1714)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:390)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:427)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
at org.scalatest.FlatSpecLike$class.runTests(FlatSpecLike.scala:1714)
at org.scalatest.FlatSpec.runTests(FlatSpec.scala:1683)
at org.scalatest.Suite$class.run(Suite.scala:1424)
at org.scalatest.FlatSpec.org$scalatest$FlatSpecLike$$super$run(FlatSpec.scala:1683)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1760)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1760)
at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
at org.scalatest.FlatSpecLike$class.run(FlatSpecLike.scala:1760)
at com.appnexus.data.spark.job.SparkJobTest.org$scalatest$BeforeAndAfter$$super$run(SparkJobTest.scala:13)
at org.scalatest.BeforeAndAfter$class.run(BeforeAndAfter.scala:241)
at com.appnexus.data.spark.job.SparkJobTest.run(SparkJobTest.scala:13)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722)
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043)
at org.scalatest.tools.Runner$.run(Runner.scala:883)
at org.scalatest.tools.Runner.run(Runner.scala)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
Process finished with exit code 0
I googled a lot, some said it's a version issue, I tried spark 1.2, 1.3, 1.4 but no luck.
I'm running my test on Mac OS X, Java 1.8.
EDIT: Spark dep in pom
<properties>
<!-- maven specific properties -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scala.version>2.10.4</scala.version>
<kafka.version>0.8.2.0</kafka.version>
<spark.version>1.3.1</spark.version>
<hadoop.version>2.6.0-cdh5.4.0</hadoop.version>
<samza.version>0.8.x-hadoop${hadoop.version}-kafka${kafka.version}</samza.version>
<assembly.configuration>src/main/assembly/src.xml</assembly.configuration>
<data.deps.scope>compile</data.deps.scope>
<spray.version>1.3.1</spray.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalamock</groupId>
<artifactId>scalamock-scalatest-support_2.11</artifactId>
<version>3.2</version>
<scope>test</scope>
</dependency>
<!-- spark dependencies -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.10</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>

Related

Read CSV file in Spark and insert it to HBase using created RDD

I am able to insert the data in HBase table using the Put method. But, now I want to read data from CSV file and write it to the HBase table.
I wrote this code:
object Read_CSV_Method2 {
case class HotelBooking(hotelName : String, is_cancelled : String, reservation_status : String,
reservation_status_date : String)
def main(args : Array[String]){
Logger.getLogger("org").setLevel(Level.ERROR)
val sparkConf = new SparkConf().setAppName("Spark_Hbase_Connection").setMaster("local[*]")
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()
// establish a connection
val connection:Connection = ConnectionFactory.createConnection(conf)
// Table on which different commands have to be run.
val tableName = connection.getTable(TableName.valueOf("hotelTable"))
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val textRDD = sc.textFile("/Users/sukritmehta/Documents/hotel_bookings.csv")
var c = 1
textRDD.foreachPartition{
iter =>
val hbaseConf = HBaseConfiguration.create()
val connection:Connection = ConnectionFactory.createConnection(hbaseConf)
val myTable = connection.getTable(TableName.valueOf("hotelTable"))
iter.foreach{
f =>
val col = f.split(",")
val insertData = new Put(Bytes.toBytes(c.toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("hotelName"), Bytes.toBytes(col(0).toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("is_canceled"), Bytes.toBytes(col(1).toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("reservation_status"), Bytes.toBytes(col(30).toString()))
insertData.addColumn(Bytes.toBytes("hotelFam"), Bytes.toBytes("reservation_status_date"), Bytes.toBytes(col(31).toString()))
c = c+1
myTable.put(insertData)
}
}
}
}
But after running this code, I am getting the following error:
Exception in thread "dag-scheduler-event-loop" java.lang.NoClassDefFoundError: org/apache/hadoop/mapred/InputSplitWithLocationInfo
at org.apache.spark.rdd.HadoopRDD.getPreferredLocations(HadoopRDD.scala:356)
at org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:275)
at org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:275)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:274)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal(DAGScheduler.scala:2003)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$1.apply$mcVI$sp(DAGScheduler.scala:2014)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$1.apply(DAGScheduler.scala:2013)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$1.apply(DAGScheduler.scala:2013)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2.apply(DAGScheduler.scala:2013)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2.apply(DAGScheduler.scala:2011)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal(DAGScheduler.scala:2011)
at org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1977)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$17.apply(DAGScheduler.scala:1112)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$17.apply(DAGScheduler.scala:1110)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.AbstractTraversable.map(Traversable.scala:104)
at org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1110)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:1069)
at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:1013)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2065)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2057)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2046)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.mapred.InputSplitWithLocationInfo
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 32 more
pom.xml file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>Spark_Hbase</groupId>
<artifactId>Spark_Hbase</artifactId>
<version>0.0.1-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!-- Scala and Spark dependencies -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.11</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-avro_2.11</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.0</version>
<scope>provided</scope>
</dependency>
<!-- <dependency> <groupId>org.codehaus.janino</groupId> <artifactId>commons-compiler</artifactId>
<version>3.0.7</version> </dependency> <dependency> <groupId>org.apache.spark</groupId>
<artifactId>spark-network-common_2.11</artifactId> <version>2.1.1</version>
</dependency> -->
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.9.9</version>
</dependency>
<!-- <dependency> <groupId>org.mongodb.spark</groupId> <artifactId>mongo-spark-connector_2.10</artifactId>
<version>2.1.1</version> </dependency> -->
<!-- <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-mllib_2.10</artifactId>
<version>2.1.1</version> </dependency> -->
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-spark</artifactId>
<version>2.0.0-alpha4</version> <!-- Hortonworks Latest -->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-mapreduce -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>2.2.4</version>
</dependency>
</dependencies>
</project>
It would be great if anyone could help me out in resolving this issue.
I think you are missing
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.5</version>
<scope>provided</scope>
</dependency>

spring boot kotlin error creating bean with name 'entityManagerFactory'

Hi I am trying to build an application using kotlin and spring boot while running postgres image on docker but I keep getting this error
org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'entityManagerFactory' defined in class path resource [org/springframework/boot/autoconfigure/orm/jpa/HibernateJpaConfiguration.class]: Invocation of init method failed; nested exception is java.lang.NoSuchMethodError: javax.persistence.spi.PersistenceUnitInfo.getValidationMode()Ljavax/persistence/ValidationMode;
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1702) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:579) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:501) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractBeanFactory.lambda$doGetBean$0(AbstractBeanFactory.java:317) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:228) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:315) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:199) ~[spring-beans-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.context.support.AbstractApplicationContext.getBean(AbstractApplicationContext.java:1089) ~[spring-context-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:859) ~[spring-context-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:550) ~[spring-context-5.0.5.RELEASE.jar:5.0.5.RELEASE]
at org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext.refresh(ServletWebServerApplicationContext.java:140) ~[spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.refresh(SpringApplication.java:759) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.refreshContext(SpringApplication.java:395) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.run(SpringApplication.java:327) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1255) [spring-boot-2.0.1.RELEASE.jar:2.0.1.RELEASE]
.....
this is my entity
#Entity
data class User(
#Id
#GeneratedValue
val id: Long,
val firstName: String,
val lastName: String,
val age:Int,
val gender:String
)
this is my repository
import org.springframework.data.jpa.repository.JpaRepository
import org.springframework.stereotype.Repository
#Repository
interface UserRepository : JpaRepository<User, Long>
and this is my controller
#RestController
class UserRestController {
#Autowired
lateinit var userRepository: UserRepository
#RequestMapping(value = "/addUser")
fun save(): User {
return userRepository.save(User(id=1,firstName = "chaima",lastName = "ennar",age = 20,gender = "female"))
}
}
those are my properties
server.port=${PORT:9000}
spring.cache.cache-names=googleFeeds,internationalFeeds
spring.cache.caffeine.spec=maximumSize=100,expireAfterAccess=1800s
spring.datasource.url=jdbc:postgresql://localhost:5433/irooldb
spring.datasource.username=root
spring.datasource.password=root
spring.jpa.hibernate.ddl-auto=update
spring.jpa.generate-ddl=true
and for my dependencies
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-rest</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-kotlin</artifactId>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-reflect</artifactId>
</dependency>
<dependency>
<groupId>com.rometools</groupId>
<artifactId>rome</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>caffeine</artifactId>
<version>2.5.5</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>9.4-1206-jdbc42</version>
</dependency>
<dependency>
<groupId>javax.persistence</groupId>
<artifactId>persistence-api</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
My question is: what does this error means? and what I am doing wrong ?
This seems to be a duplicate of this issue: java.lang.NoSuchMethodError: javax.persistence.spi.PersistenceUnitInfo.getValidationMode()Ljavax/persistence/ValidationMode;
It appears that you are picking up JPA v1 libraty from your runtime classpath but the method that you are attempting to invoke was ony defined in JPA v2

Getting error "java.lang.NoSuchFieldError: LUCENE_4_10_4" in flink streaming

I am trying to add elasticsearch as sink for my DataStream using the code
val config = new HashMap[String, String]
config.put("bulk.flush.max.actions", "1")
config.put("cluster.name", "cluster")
val transports = new ArrayList[InetSocketAddress]
transports.add(new InetSocketAddress(InetAddress.getByName("127.0.0.1"), 9300))
// testing simple setup
val input:DataStream[String] = timedStream.map( _.language.toString)
input.print()
input.addSink(new ElasticsearchSink(config, transports, new ElasticsearchSinkFunction[String] {
def createIndexRequest(element: String): IndexRequest = {
val json = new java.util.HashMap[String, AnyRef]
// Map stream fields to JSON properties, format:
// json.put("json-property-name", streamField)
json.put("data", element)
Requests.indexRequest.index("test").`type`("test").source(json)
}
override def process(element: String, ctx: RuntimeContext, indexer: RequestIndexer) {
indexer.add(createIndexRequest(element))
}
}))
and my POM file dependency are
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-twitter_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch2_2.10</artifactId>
<version>1.3.2</version>
</dependency>
</dependencies>
And also I have elasticsearch version 2.4.6 in my system , during execution of my program I am getting following error ::
Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply$mcV$sp(JobManager.scala:933)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:876)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:876)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.NoSuchFieldError: LUCENE_4_10_4
at org.elasticsearch.Version.<clinit>(Version.java:230)
at org.elasticsearch.client.transport.TransportClient$Builder.build(TransportClient.java:129)
at org.apache.flink.streaming.connectors.elasticsearch2.Elasticsearch2ApiCallBridge.createClient(Elasticsearch2ApiCallBridge.java:65)
at org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkBase.open(ElasticsearchSinkBase.java:272)
at org.apache.flink.api.common.functions.util.FunctionUtils.openFunction(FunctionUtils.java:36)
at org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.open(AbstractUdfStreamOperator.java:111)
at org.apache.flink.streaming.runtime.tasks.StreamTask.openAllOperators(StreamTask.java:376)
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:253)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:702)
at java.lang.Thread.run(Thread.java:745)
Where is the problem, I have tried including elasticsearch 2.4.6 in pom but it didn't work.

NotLeaderForPartitionException when using multiple instances in a consumer group in Kafka+SparkStreaming

I've been experimenting with multiple consumer instances in a Kafka consumer group but it always fails with kafka.common.NotLeaderForPartitionException.
My Kafka cluster consists of 3 brokers and the topic that has PartitionCount:2and ReplicationFactor:3.
SparkConsumer.java
public class SparkConsumer {
private static Function2<Integer, Integer, Integer> MyReducerFunc = (a, b) -> a + b;
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: SparkConsumer <brokers> <topics>\n" +
" <brokers> is a list of one or more Kafka brokers\n" +
" <topics> is a list of one or more kafka topics to consume from\n\n");
System.exit(1);
}
//StreamingExamples.setStreamingLogLevels();
String brokers = args[0];
String topics = args[1];
SparkConf sparkConf = new SparkConf().setMaster("local[5]").setAppName("SparkConsumer").set("spark.driver.host", "localhost");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
// Create a StreamingContext with a 2 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sc, Durations.seconds(2));
Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", brokers);
//kafkaParams.put("auto.offset.reset", "smallest");
kafkaParams.put("group.id", "SparkConsumerGrp");
kafkaParams.put("zookeeper.connect", "localhost:2181");
// Create direct kafka stream with brokers and topics
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
jssc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topicsSet
);
//Aggregate data every 30 sec
JavaPairDStream<String, String> messages2 =
messages.window(Durations.seconds(30), Durations.seconds(30));
messages2.foreachRDD(rdd -> {
long numHits = rdd.count();
if(numHits == 0)
System.out.println("No new data fetched in last 30 sec");
//Do Processing
else{
System.out.println("\n\n----------------------------------Data fetched in the last 30 seconds: " + rdd.partitions().size()
+ " partitions and " + numHits + " records------------------\n\n");
//Convert to java log object
JavaRDD<ApacheAccessLog> logs = rdd.map(x-> x._2)
.map(ApacheAccessLog::parseFromLogLine)
.cache();
//Find the bot ip addresses
JavaRDD<String> iprdd = logs.mapToPair(ip-> new Tuple2<>(ip.getIpAddress(),1))
.reduceByKey(MyReducerFunc)
.filter(botip-> botip._2 > 50)
.keys();
//If we find something, we store it in results dir on hdfs
if(iprdd.count() > 0)
{
sc.hadoopConfiguration().set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
sc.hadoopConfiguration().set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date());
//sc.hadoopConfiguration().set("mapreduce.output.basename", timeStamp);
iprdd.coalesce(1).saveAsTextFile("hdfs://quickstart.cloudera:8020/results/"+timeStamp);
JobConf jobConf=new JobConf();
System.out.println("\n\n-------------Resuts successfully written to /results on hdfs-------------\n");
}
}
});
jssc.start();
jssc.awaitTermination();
}
}
My understanding from the documentation was that we can start another consumer process with the same groupid to add that instance to an existing group. Hence, I run this code on two separate terminals. However, this is the error that I always get:
At the first instance:
17/06/06 00:53:26 ERROR DirectKafkaInputDStream: ArrayBuffer(kafka.common.NotLeaderForPartitionException, org.apache.spark.SparkException: Couldn't find leader offsets for Set([topic5,1]))
17/06/06 00:53:26 ERROR DirectKafkaInputDStream: ArrayBuffer(org.apache.spark.SparkException: Couldn't find leaders for Set([topic5,0], [topic5,1]))
17/06/06 00:53:27 ERROR JobScheduler: Error generating jobs for time 1496735582000 ms
org.apache.spark.SparkException: ArrayBuffer(org.apache.spark.SparkException: Couldn't find leaders for Set([topic5,0], [topic5,1]))
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.latestLeaderOffsets(DirectKafkaInputDStream.scala:133)
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.compute(DirectKafkaInputDStream.scala:158)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:900)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:899)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:899)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:877)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:877)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:870)
at org.apache.spark.streaming.dstream.WindowedDStream.compute(WindowedDStream.scala:65)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.ForEachDStream.generateJob(ForEachDStream.scala:48)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:117)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:116)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.DStreamGraph.generateJobs(DStreamGraph.scala:116)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:249)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:247)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.JobGenerator.generateJobs(JobGenerator.scala:247)
at org.apache.spark.streaming.scheduler.JobGenerator.org$apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:183)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:89)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:88)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
Exception in thread "main" org.apache.spark.SparkException: ArrayBuffer(org.apache.spark.SparkException: Couldn't find leaders for Set([topic5,0], [topic5,1]))
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.latestLeaderOffsets(DirectKafkaInputDStream.scala:133)
at org.apache.spark.streaming.kafka.DirectKafkaInputDStream.compute(DirectKafkaInputDStream.scala:158)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:900)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2$$anonfun$apply$29.apply(DStream.scala:899)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:899)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$2.apply(DStream.scala:877)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:877)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.streaming.dstream.DStream$$anonfun$slice$1.apply(DStream.scala:871)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:701)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:264)
at org.apache.spark.streaming.dstream.DStream.slice(DStream.scala:870)
at org.apache.spark.streaming.dstream.WindowedDStream.compute(WindowedDStream.scala:65)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1$$anonfun$apply$7.apply(DStream.scala:341)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1$$anonfun$1.apply(DStream.scala:340)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:335)
at org.apache.spark.streaming.dstream.DStream$$anonfun$getOrCompute$1.apply(DStream.scala:333)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.streaming.dstream.DStream.getOrCompute(DStream.scala:330)
at org.apache.spark.streaming.dstream.ForEachDStream.generateJob(ForEachDStream.scala:48)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:117)
at org.apache.spark.streaming.DStreamGraph$$anonfun$1.apply(DStreamGraph.scala:116)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.DStreamGraph.generateJobs(DStreamGraph.scala:116)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:249)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$3.apply(JobGenerator.scala:247)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.JobGenerator.generateJobs(JobGenerator.scala:247)
at org.apache.spark.streaming.scheduler.JobGenerator.org$apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:183)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:89)
at org.apache.spark.streaming.scheduler.JobGenerator$$anon$1.onReceive(JobGenerator.scala:88)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
At the second instance: (seems to occur when I'm calling the rdd.count() and happens right after the first instance crashes)
Exception in thread "streaming-job-executor-0" java.lang.Error: java.lang.InterruptedException
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1148)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
at scala.concurrent.impl.Promise$DefaultPromise.tryAwait(Promise.scala:202)
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:218)
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:153)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:619)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1965)
at org.apache.spark.rdd.RDD.count(RDD.scala:1158)
at org.apache.spark.api.java.JavaRDDLike$class.count(JavaRDDLike.scala:455)
at org.apache.spark.api.java.AbstractJavaRDDLike.count(JavaRDDLike.scala:45)
at hadoopTest.hadoopTest.SparkConsumer.lambda$1(SparkConsumer.java:85)
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:627)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:627)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:415)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:256)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:256)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:256)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:255)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
... 2 more
This is my pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>hadoopTest</groupId>
<artifactId>hadoopTest</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>hadoopTest</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.8.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.8.2.1</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<artifactId>jmxri</artifactId>
<groupId>com.sun.jmx</groupId>
</exclusion>
<exclusion>
<artifactId>jms</artifactId>
<groupId>javax.jms</groupId>
</exclusion>
<exclusion>
<artifactId>jmxtools</artifactId>
<groupId>com.sun.jdmk</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.1</version>
<exclusions>
<exclusion>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-tags_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>2.10.2</version>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
<version>1.3</version>
</dependency>
</dependencies>
</project>
I have been trying to debug this the whole day and am at a loss. It says it cannot find the leaders even though all my brokers remain up. I'd appreciate any help. Thanks!
Turns out it was because I was using spark-streaming-kafka-0-8_2.11 instead of spark-streaming-kafka-0-10_2.11. Also you need to change version for kafka-clients to 0.10.2.0 to match with it.

EntityManagerFactory cannot be injected in CDI producer

I am using Weld, RestEasy and DeltaSpike Data for my project. The project's dependency description is as follows.
<properties>
<resteasy.version>3.0.10.Final</resteasy.version>
<deltaspike.version>1.2.1</deltaspike.version>
</properties>
<dependencies>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-jaxrs</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-client</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-multipart-provider</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-cache-core</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>async-http-servlet-3.0</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-cdi</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.resteasy</groupId>
<artifactId>resteasy-jackson2-provider</artifactId>
<version>${resteasy.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.weld.servlet</groupId>
<artifactId>weld-servlet</artifactId>
<version>2.2.9.Final</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-bean-validation-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.cdictrl</groupId>
<artifactId>deltaspike-cdictrl-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-data-module-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-data-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-jpa-module-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-jpa-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-servlet-module-api</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.apache.deltaspike.modules</groupId>
<artifactId>deltaspike-servlet-module-impl</artifactId>
<version>${deltaspike.version}</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-core</artifactId>
<version>4.3.6.Final</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-entitymanager</artifactId>
<version>4.3.6.Final</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.33</version>
</dependency>
<dependency>
<groupId>org.jboss</groupId>
<artifactId>jandex</artifactId>
<version>1.2.3.Final</version>
</dependency>
</dependencies>
We used to use Spring for projects like this so there really was no need for the persistence file but we are needing one here so we placed it in META-INF.
java.lang.NullPointerException
at org.bitbucket.infovillafoundation.manmyanmar.hotel.cdiproducers.EntityManagerProducer.create(EntityManagerProducer.java:21)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.jboss.weld.injection.StaticMethodInjectionPoint.invoke(StaticMethodInjectionPoint.java:89)
at org.jboss.weld.injection.StaticMethodInjectionPoint.invoke(StaticMethodInjectionPoint.java:79)
at org.jboss.weld.injection.producer.ProducerMethodProducer.produce(ProducerMethodProducer.java:95)
at org.jboss.weld.injection.producer.AbstractMemberProducer.produce(AbstractMemberProducer.java:151)
at org.jboss.weld.bean.AbstractProducerBean.create(AbstractProducerBean.java:183)
at org.jboss.weld.context.unbound.DependentContextImpl.get(DependentContextImpl.java:69)
at org.jboss.weld.manager.BeanManagerImpl.getReference(BeanManagerImpl.java:744)
at org.jboss.weld.bean.builtin.InstanceImpl.getBeanInstance(InstanceImpl.java:178)
at org.jboss.weld.bean.builtin.InstanceImpl.get(InstanceImpl.java:98)
at org.apache.deltaspike.data.impl.handler.EntityManagerLookup.lookupFor(EntityManagerLookup.java:58)
at org.apache.deltaspike.data.impl.handler.QueryHandler.createContext(QueryHandler.java:104)
at org.apache.deltaspike.data.impl.handler.QueryHandler.invoke(QueryHandler.java:77)
at com.sun.proxy.$Proxy74.findAll(Unknown Source)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.service.RoomTypeService.getAllRoomTypes(RoomTypeService.java:22)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.service.RoomTypeService$Proxy$_$$_WeldClientProxy.getAllRoomTypes(Unknown Source)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.restservice.FrontOfficeRestService.hello(FrontOfficeRestService.java:23)
at org.bitbucket.infovillafoundation.manmyanmar.hotel.frontoffice.restservice.FrontOfficeRestService$Proxy$_$$_WeldClientProxy.hello(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.jboss.resteasy.core.MethodInjectorImpl.invoke(MethodInjectorImpl.java:137)
at org.jboss.resteasy.core.ResourceMethodInvoker.invokeOnTarget(ResourceMethodInvoker.java:296)
at org.jboss.resteasy.core.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:250)
at org.jboss.resteasy.core.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:237)
at org.jboss.resteasy.core.SynchronousDispatcher.invoke(SynchronousDispatcher.java:356)
at org.jboss.resteasy.core.SynchronousDispatcher.invoke(SynchronousDispatcher.java:179)
at org.jboss.resteasy.plugins.server.servlet.ServletContainerDispatcher.service(ServletContainerDispatcher.java:220)
at org.jboss.resteasy.plugins.server.servlet.HttpServletDispatcher.service(HttpServletDispatcher.java:56)
at org.jboss.resteasy.plugins.server.servlet.HttpServletDispatcher.service(HttpServletDispatcher.java:51)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:848)
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:684)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1496)
at org.apache.deltaspike.servlet.impl.event.EventBridgeFilter.doFilter(EventBridgeFilter.java:59)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1484)
at org.apache.deltaspike.servlet.impl.produce.RequestResponseHolderFilter.doFilter(RequestResponseHolderFilter.java:63)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1476)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:501)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:533)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1086)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:429)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1020)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
at org.eclipse.jetty.server.Server.handle(Server.java:370)
at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
at org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(AbstractHttpConnection.java:971)
at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.headerComplete(AbstractHttpConnection.java:1033)
at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:644)
at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)
at org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
at org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
at org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
at java.lang.Thread.run(Thread.java:745)
In case, you need the code for the producer.
package org.bitbucket.infovillafoundation.manmyanmar.hotel.cdiproducers;
import javax.enterprise.inject.Disposes;
import javax.enterprise.inject.Produces;
import javax.persistence.EntityManager;
import javax.persistence.EntityManagerFactory;
import javax.persistence.PersistenceContext;
/**
* Created by Sandah Aung on 27/2/15.
*/
public class EntityManagerProducer {
#PersistenceContext
private EntityManagerFactory emf;
#Produces
public EntityManager create() {
return emf.createEntityManager();
}
public void close(#Disposes EntityManager em) {
if (em.isOpen()) {
em.close();
}
}
}
Edit: beans.xml
<beans xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/beans_1_0.xsd">
<interceptors>
<class>org.apache.deltaspike.jpa.impl.transaction.TransactionalInterceptor</class>
</interceptors>
</beans>
What have I done wrong?
You didn't give any details about your beans.xml, so this is just a guess:
Your producer class lacks a bean defining annotation, so it might be ignored by the CDI container.
Try adding #Dependent to your EntityManagerProducer class, or check which bean discovery mode your application is using.