Flink consume message from kafka scala cannot flatMap - scala

I'm trying to follow example: https://blog.knoldus.com/a-quick-demo-kafka-to-flink-to-cassandra/
I'm trying to parse my ShippingOrder JSON message from kafka and parse it into object. Then group it by some properties but have an error when flatMap step.
My sbt file:
import Dependencies._
scalaVersion := "2.13.4"
version := "0.1.0-SNAPSHOT"
organization := "com.example"
organizationName := "example"
lazy val root = (project in file("."))
.settings(
name := "KafkaTest",
libraryDependencies += scalaTest % Test,
libraryDependencies += "org.apache.flink" % "flink-streaming-scala_2.12" % "1.12.1" % "provided",
libraryDependencies += "org.apache.flink" % "flink-connector-kafka_2.12" % "1.12.1",
libraryDependencies += "org.apache.flink" % "flink-clients_2.12" % "1.12.1",
libraryDependencies += "org.json4s" %% "json4s-native" % "3.6.10",
)
My main file.
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.streaming.api.scala._
import org.json4s.native.JsonMethods
import java.util.Properties
object Kafka {
def main(args: Array[String]) {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val properties = new Properties()
implicit lazy val formats = org.json4s.DefaultFormats
properties.setProperty("bootstrap.servers", "broker:9092")
properties.setProperty("group.id", "Flink")
implicit val typeInfo = TypeInformation.of(classOf[(String)])
implicit val typeInfo_2 = TypeInformation.of(classOf[(String, Int)])
implicit val typeInfo_3 = TypeInformation.of(classOf[(org.json4s.JsonAST.JValue)])
implicit val typeInfo_4 = TypeInformation.of(classOf[(ShippingOrder)])
val consumer = new FlinkKafkaConsumer[String]("ShippingOrders", new SimpleStringSchema(), properties)
consumer.setCommitOffsetsOnCheckpoints(true)
consumer.setStartFromEarliest()
val stream = env.addSource(consumer)
.flatMap(JsonMethods.parse(_).toOption)
.map(_.extract[ShippingOrder])
stream.print()
env.execute("Flink Kafka Example")
}
}
My Order Object
import scala.tools.nsc.doc.model.Trait
class ShippingOrder(
Old: Data,
New: Data,
)
class Data(
ID: String,
Action: String,
ClientID: Int,
Data: Trait,
ToLocation: Location,
ToName: String,
ToPhone: String,
Log: List[Log],
IsPartialReturn: Boolean,
Items: List[Item],
)
class Log(
Reason: String,
ReasonCode: String,
Status: String,
// UpdatedDate: java.sql.Date,
)
class Item(
Code: String,
Name: String,
Quantity: Int,
)
class Location(
// Coordinates: Trait,
Type: String,
)
I got an error went run this job
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.flink.api.java.ClosureCleaner (file:/usr/local/Cellar/apache-flink/1.12.1/libexec/lib/flink-dist_2.12-1.12.1.jar) to field java.util.Properties.serialVersionUID
WARNING: Please consider reporting this to the maintainers of org.apache.flink.api.java.ClosureCleaner
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
Job has been submitted with JobID 391088d1b7233806d15cd10da73f8660
------------------------------------------------------------
The program finished with the following exception:
org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: org.apache.flink.client.program.ProgramInvocationException: Job failed (JobID: 391088d1b7233806d15cd10da73f8660)
at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:360)
at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:213)
at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:114)
at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:816)
at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:248)
at org.apache.flink.client.cli.CliFrontend.parseAndRun(CliFrontend.java:1058)
at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:1136)
at org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:1136)
Caused by: java.util.concurrent.ExecutionException: org.apache.flink.client.program.ProgramInvocationException: Job failed (JobID: 391088d1b7233806d15cd10da73f8660)
at java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:395)
at java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1999)
at org.apache.flink.client.program.StreamContextEnvironment.getJobExecutionResult(StreamContextEnvironment.java:123)
at org.apache.flink.client.program.StreamContextEnvironment.execute(StreamContextEnvironment.java:80)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1782)
at org.apache.flink.streaming.api.scala.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.scala:746)
at Kafka$.main(Kafka.scala:34)
at Kafka.main(Kafka.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:343)
... 8 more
Caused by: org.apache.flink.client.program.ProgramInvocationException: Job failed (JobID: 391088d1b7233806d15cd10da73f8660)
at org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:125)
at java.base/java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:642)
at java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
at java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2073)
at org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$22(RestClusterClient.java:665)
at java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)
at java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)
at java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
at java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2073)
at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:394)
at java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)
at java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)
at java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
at java.base/java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:610)
at java.base/java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:1085)
at java.base/java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:478)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
at org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:123)
... 18 more
Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:118)
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:80)
at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:233)
at org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:224)
at org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:215)
at org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:665)
at org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:89)
at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:447)
at jdk.internal.reflect.GeneratedMethodAccessor109.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:306)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:213)
at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:159)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
at scala.PartialFunction.applyOrElse(PartialFunction.scala:123)
at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122)
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at akka.actor.Actor.aroundReceive(Actor.scala:517)
at akka.actor.Actor.aroundReceive$(Actor.scala:515)
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
at akka.actor.ActorCell.invoke(ActorCell.scala:561)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
at akka.dispatch.Mailbox.run(Mailbox.scala:225)
at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.NoSuchMethodError: 'scala.collection.immutable.List scala.collection.immutable.List.map(scala.Function1)'
at org.json4s.ParserUtil$Buffer.substring(ParserUtil.scala:139)
at org.json4s.ParserUtil$.unquote(ParserUtil.scala:98)
at org.json4s.native.JsonParser$Parser.parseString$1(JsonParser.scala:243)
at org.json4s.native.JsonParser$Parser.nextToken(JsonParser.scala:282)
at org.json4s.native.JsonParser$.$anonfun$astParser$1(JsonParser.scala:188)
at org.json4s.native.JsonParser$.$anonfun$astParser$1$adapted(JsonParser.scala:145)
at org.json4s.native.JsonParser$.parse(JsonParser.scala:133)
at org.json4s.native.JsonParser$.parse(JsonParser.scala:71)
at org.json4s.native.JsonMethods.parse(JsonMethods.scala:10)
at org.json4s.native.JsonMethods.parse$(JsonMethods.scala:9)
at org.json4s.native.JsonMethods$.parse(JsonMethods.scala:63)
at Kafka$.$anonfun$main$1(Kafka.scala:30)
at org.apache.flink.streaming.api.scala.DataStream$$anon$6.flatMap(DataStream.scala:681)
at org.apache.flink.streaming.api.operators.StreamFlatMap.processElement(StreamFlatMap.java:47)
at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.pushToOperator(CopyingChainingOutput.java:71)
at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:46)
at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:26)
at org.apache.flink.streaming.api.operators.CountingOutput.collect(CountingOutput.java:50)
at org.apache.flink.streaming.api.operators.CountingOutput.collect(CountingOutput.java:28)
at org.apache.flink.streaming.api.operators.StreamSourceContexts$ManualWatermarkContext.processAndCollectWithTimestamp(StreamSourceContexts.java:322)
at org.apache.flink.streaming.api.operators.StreamSourceContexts$WatermarkContext.collectWithTimestamp(StreamSourceContexts.java:426)
at org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher.emitRecordsWithTimestamps(AbstractFetcher.java:365)
at org.apache.flink.streaming.connectors.kafka.internals.KafkaFetcher.partitionConsumerRecordsHandler(KafkaFetcher.java:183)
at org.apache.flink.streaming.connectors.kafka.internals.KafkaFetcher.runFetchLoop(KafkaFetcher.java:142)
at org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase.run(FlinkKafkaConsumerBase.java:826)
at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:110)
at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:66)
at org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:241)
I have no idea about this error.
Please explain and help me fix this.

I am pretty sure this is because of the fact that You are not using fat-jar and thus the scala is missing from Your cluster. You should probably take a look here to get some info on how to create fat-jars in SBT.

Related

Flink Scala Job Runtime Error : java.lang.NoSuchMethodError

I am trying to run a simple flink streaming job on AWS EMR. The purpose is very simple for now:
Consume data from Kafka in flink
Load to another topic in kafka.
I am using the following dependencies:
scalaVersion := "2.11.8"
val flinkVersion = "1.11.1"
libraryDependencies ++= Seq(
"org.apache.flink" %% "flink-scala" % flinkVersion,
"org.apache.flink" %% "flink-streaming-scala" % flinkVersion,
"org.apache.flink" %% "flink-connector-kafka" % flinkVersion
)
Flink code that i am using is :
private val serdeSchema = new SimpleStringSchema
val env = StreamExecutionEnvironment.getExecutionEnvironment
val stream = env
.addSource(createKafkaConsumer(kafkaInputTopic
, kafkaBrokers, kafkaConfig("consumerGroupId").toString
, kafkaConfig("defaultReset").toString))
stream
.map((s: String) => s)
.addSink(createKafkaProducer(kafkaOutputTopic, kafkaBrokers))
env.execute(jobConfig("jobName").toString)
}
def createKafkaProducer(kafkaTopic: String, kafkaBrokers: String): FlinkKafkaProducer[String] = {
val producer = new FlinkKafkaProducer[String](kafkaBrokers,
kafkaTopic, serdeSchema)
producer
}
def createKafkaConsumer(kafkaInputTopic: String
, kafkaBrokers: String
, consumerGroup:String
, defaultReset: String): FlinkKafkaConsumer[String] = {
val properties = new Properties()
properties.setProperty("bootstrap.servers", kafkaBrokers)
properties.setProperty("group.id", consumerGroup)
properties.setProperty("enable.auto.commit" , "false")
properties.setProperty("auto.offset.reset" , defaultReset)
val consumer = new FlinkKafkaConsumer[String](kafkaInputTopic, serdeSchema, properties)
consumer
}
I generate a assembly jar using sbt. I use the following command to run the job on EMR
/bin/flink run -c com.example.FlinkConsumer flink/target/scala-2.11/flink-assembly-0.1.jar
Below is the stack trace
Caused by: org.apache.flink.client.program.ProgramInvocationException: Job failed (JobID: c458520153e875811c46c386b9ec605e)
at org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:112)
at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:591)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.client.program.rest.RestClusterClient.lambda$pollResourceAsync$21(RestClusterClient.java:565)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:291)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.postFire(CompletableFuture.java:575)
at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:943)
at java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147)
at org.apache.flink.client.deployment.ClusterClientJobClientAdapter.lambda$null$6(ClusterClientJobClientAdapter.java:110)
... 19 more
Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110)
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76)
at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:192)
at org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:186)
at org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:180)
at org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:484)
at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:380)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:279)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:194)
at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
at akka.actor.ActorCell.invoke(ActorCell.scala:561)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
at akka.dispatch.Mailbox.run(Mailbox.scala:225)
at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.NoSuchMethodError: org.apache.flink.api.common.serialization.SerializationSchema.open(Lorg/apache/flink/api/common/serialization/SerializationSchema$InitializationContext;)V
at org.apache.flink.streaming.connectors.kafka.internals.KafkaSerializationSchemaWrapper.open(KafkaSerializationSchemaWrapper.java:61)
at org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.open(FlinkKafkaProducer.java:808)
at org.apache.flink.api.common.functions.util.FunctionUtils.openFunction(FunctionUtils.java:36)
at org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.open(AbstractUdfStreamOperator.java:102)
at org.apache.flink.streaming.api.operators.StreamSink.open(StreamSink.java:48)
at org.apache.flink.streaming.runtime.tasks.StreamTask.initializeStateAndOpen(StreamTask.java:1007)
at org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$beforeInvoke$0(StreamTask.java:454)
at org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$SynchronizedStreamTaskActionExecutor.runThrowing(StreamTaskActionExecutor.java:94)
at org.apache.flink.streaming.runtime.tasks.StreamTask.beforeInvoke(StreamTask.java:449)
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:461)
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:707)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:532)
at java.lang.Thread.run(Thread.java:748)
It looks like a version issue, but i tried with various versions, i don't see this open method, but i think in the serialize it calls the open method and unable to find one. Can someone please help . I am new to flink.
If you're using EMR's Flink support, then most Flink libraries should be flagged as "provided" so that they're not in your jar, as they're on the classpath from the Flink installation that EMR is providing. You'll still need to explicitly include anything that's not provided by EMR (e.g. flink-connector-kafka).

Spark and Kafka integration - KafkaSourceProvider could not be instantiated

I'm working on an integration project of Kafka and Spark and I'm trying to read a Kafka topic using Spark 2.4.5, Scala 2.12.11 and Kafka 2.5.0.
My sbt file is:
name := "Test"
version := "1.0"
scalaVersion := "2.12.11"
libraryDependencies ++= Seq(
"org.apache.spark" % "spark-sql_2.12" % "2.4.5",
"org.apache.spark" % "spark-sql-kafka-0-10_2.12" % "2.4.5",
"org.apache.spark" % "spark-streaming-kafka-0-10-assembly_2.12" % "2.4.5",
"org.apache.kafka" % "kafka-clients" % "2.5.0"
)
my code is:
object Test{
def main(args: Array[String]) = {
import org.apache.spark.sql.SparkSession
val spark = SparkSession
.builder()
.appName("SparkTest")
.master("local[*]")
.getOrCreate()
import spark.implicits._
val df = spark.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "localhost:9092")
.option("subscribe", "test")
.option("startingOffsets", "earliest")
.load()
df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
.as[(String, String)]
df.printSchema()
}}
After having created the topic on Kafka, started zookeeper and Kafka itself, when I launch the code with:
./spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:2.4.5 --class Test /home/luca/Projects/Test/target/scala-2.12/test_2.12-1.0.jar
I run into the following error:
20/05/06 15:40:29 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
Exception in thread "main" java.util.ServiceConfigurationError: org.apache.spark.sql.sources.DataSourceRegister: Provider org.apache.spark.sql.kafka010.KafkaSourceProvider could not be instantiated
at java.util.ServiceLoader.fail(ServiceLoader.java:232)
at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.filterImpl(TraversableLike.scala:247)
at scala.collection.TraversableLike$class.filter(TraversableLike.scala:259)
at scala.collection.AbstractTraversable.filter(Traversable.scala:104)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:630)
at org.apache.spark.sql.streaming.DataStreamReader.load(DataStreamReader.scala:161)
at Test$.main(projectfile.scala:24)
at Test.main(projectfile.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NoSuchMethodError: org.apache.spark.internal.Logging.$init$(Lorg/apache/spark/internal/Logging;)V
at org.apache.spark.sql.kafka010.KafkaSourceProvider.<init>(KafkaSourceProvider.scala:44)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at java.lang.Class.newInstance(Class.java:442)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:380)
... 26 more
Can someone help me out with this?
Kafka- client version can be one of the reasons. Otherwise, try with spark 2.4.0 and scala 2.12 earlier versions. Looks like a compatibility issue

Http Client timeout when using DataFrame returned from Hive query

This seems like a very odd and specific issue which has me stumped.
When using a DataFrame built by a spark.sql("select * from table") query on a Hive table, I get a timeout exception whenever I try to use an HTTP client in a transform or action step on that DataFrame.
Example:
import scalaj.http._
import org.apache.spark.sql.SparkSession
object Example {
def postDoc(doc: String): Unit = {
val resp = Http("https://example.com/endpoint")
.postData(doc)
.header("content-type", "application/json")
.asString
}
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().enableHiveSupport().getOrCreate()
import spark.implicits._
val df = spark.sql("select id, json_doc from some_table")
df.map(r => r.getAs[String]("json_doc")).foreach(postDoc _)
}
}
I can however hit the service through a DataFrame I create manually; i.e. Seq((1, "{\"a\": 1}")).toDF("id", "json_doc").foreach(postDoc _).
I've also tried creating temp tables and using spark.sql to select from them; which works on a DataFrame that I create manually but not on ones sourced from a Hive table.
My partial build.sbt
scalaVersion := "2.11.12"
libraryDependencies ++= {
val sparkVersion = "2.1.3"
Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.scalaj" %% "scalaj-http" % "2.4.2"
)
}
Stacktrace
java.net.SocketTimeoutException: Read timed out
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at sun.net.www.protocol.http.HttpURLConnection$10.run(HttpURLConnection.java:1950)
at sun.net.www.protocol.http.HttpURLConnection$10.run(HttpURLConnection.java:1945)
at java.security.AccessController.doPrivileged(Native Method)
at sun.net.www.protocol.http.HttpURLConnection.getChainedException(HttpURLConnection.java:1944)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1514)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1498)
at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:352)
at scalaj.http.HttpRequest.scalaj$http$HttpRequest$$doConnection(Http.scala:367)
at scalaj.http.HttpRequest.exec(Http.scala:343)
at scalaj.http.HttpRequest.asString(Http.scala:492)
at com.gm.avalanche.collect.Collector$.postDoc(Collector.scala:34)
at com.gm.avalanche.collect.Collector$$anonfun$sqlTest$2.apply(Collector.scala:71)
at com.gm.avalanche.collect.Collector$$anonfun$sqlTest$2.apply(Collector.scala:71)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$28.apply(RDD.scala:918)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$28.apply(RDD.scala:918)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1954)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1954)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:100)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
at sun.security.ssl.InputRecord.read(InputRecord.java:503)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:975)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:933)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:735)
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:678)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1593)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1498)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:268)
at scalaj.http.HttpRequest.scalaj$http$HttpRequest$$doConnection(Http.scala:365)
... 17 more
Turns out the socket was being closed remotely by the Ingress controller running in front of the Kubernates environment where the Elasticsearch instance is running. It was set to the default of a one minute timeout.

groupByKey transformation not working for Spark

I am trying to perform following operations but I am getting the error while using groupByKey transformation. I'm using Spark in standalone mode.
sample.sbt contains:
name := "Spark Join"
version := "1.0"
scalaVersion := "2.10.4"
libraryDependencies += "org.apache.spark" %% "spark-core" % "1.3.0"
fork := true
My Scala Code
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import java.util.Properties
object yelpDataJoin {
def main(args: Array[String]) {
val reviewFile = " /home/prasad/Desktop/BigData/psp150030_HW3/data/review3.csv"
val conf = new SparkConf().setAppName("SparkJoins")
val sc = new SparkContext(conf)
val reviewData = sc.textFile(reviewFile, 2)
val groupReviewData = reviewData.map(line => line.split("::")).map(word => (word(2),(word(20),1))).groupByKey().foreach(println)
}
}
I'm getting following error message:
15/07/20 16:10:48 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 32.0 KB, free 265.4 MB)
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/rdd/RDD$
at yelpDataJoin$.main(HW3_Question2.scala:14)
at yelpDataJoin.main(HW3_Question2.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.rdd.RDD$
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 9 more
Please let me know if I'm doing anything wrong here.
Thanks & Regards,
Prasad

Circumflex orm doesn't work with Play 2.1

I've updated the play framework to version 2.1 in my application and it don't work more.
Here is a simple controller:
package controllers
import play.api._
import play.api.mvc._
import models.Country
import ru.circumflex.orm._
object Application extends Controller {
def index = Action {
val co = Country AS "co"
val q = SELECT (co.*) FROM (co)
val countries: Seq[Country] = q.list()
Ok(views.html.index(countries))
}
}
And here is model of a country:
package models
import ru.circumflex.orm.{TextField, Table, Record}
import java.util.regex.Pattern
class Country extends Record[String, Country] {
def PRIMARY_KEY = code
def relation = Country
def this(code: String, name: String) = {
this()
this.code := code
this.name := name
}
val code: TextField[Country] = "code".VARCHAR(2).NOT_NULL
val name = "name".TEXT.NOT_NULL
def cities = inverseMany(City.country)
override def toString = name.getOrElse("unknow")
}
object Country extends Country with Table[String, Country] {
val codeKey = UNIQUE(code)
val codeChk = CONSTRAINT("code_chk").CHECK("code IN ('ch', 'us', 'uk', 'fr', 'es', 'it', 'pt', 'by')")
val idx = "country_code_idx".INDEX("LOWER(code)").USING("btree").UNIQUE
validation.notNull(_.code)
.notEmpty(_.code)
.pattern(_.code, Pattern.compile("(?i:[a-z]{2})"))
}
When page is opened I get a follow error:
[error] application -
! #6d9l5j0i0 - Internal server error, for (GET) [/] ->
play.api.Application$$anon$1: Execution exception[[RuntimeException: java.lang.ExceptionInInitial
izerError]]
at play.api.Application$class.handleError(Application.scala:289) ~[play_2.10.jar:2.1.0]
at play.api.DefaultApplication.handleError(Application.scala:383) [play_2.10.jar:2.1.0]
at play.core.server.netty.PlayDefaultUpstreamHandler$$anonfun$12$$anonfun$apply$24.apply(
PlayDefaultUpstreamHandler.scala:314) [play_2.10.jar:2.1.0]
at play.core.server.netty.PlayDefaultUpstreamHandler$$anonfun$12$$anonfun$apply$24.apply(
PlayDefaultUpstreamHandler.scala:312) [play_2.10.jar:2.1.0]
at play.api.libs.concurrent.PlayPromise$$anonfun$extend1$1.apply(Promise.scala:113) [play
_2.10.jar:2.1.0]
at play.api.libs.concurrent.PlayPromise$$anonfun$extend1$1.apply(Promise.scala:113) [play
_2.10.jar:2.1.0]
java.lang.RuntimeException: java.lang.ExceptionInInitializerError
at play.api.mvc.ActionBuilder$$anon$1.apply(Action.scala:222) ~[play_2.10.jar:2.1.0]
at play.api.mvc.Action$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$5$$anonfun$apply$
6.apply(Action.scala:109) ~[play_2.10.jar:2.1.0]
at play.api.mvc.Action$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$5$$anonfun$apply$
6.apply(Action.scala:109) ~[play_2.10.jar:2.1.0]
at play.utils.Threads$.withContextClassLoader(Threads.scala:18) ~[play_2.10.jar:2.1.0]
at play.api.mvc.Action$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$5.apply(Action.sc
ala:108) ~[play_2.10.jar:2.1.0]
at play.api.mvc.Action$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$5.apply(Action.sc
ala:106) ~[play_2.10.jar:2.1.0]
Caused by: java.lang.ExceptionInInitializerError: null
at controllers.Application$$anonfun$index$1.apply(Application.scala:18) ~[na:na]
at controllers.Application$$anonfun$index$1.apply(Application.scala:11) ~[na:na]
at play.api.mvc.ActionBuilder$$anonfun$apply$11.apply(Action.scala:254) ~[play_2.10.jar:2
.1.0]
at play.api.mvc.ActionBuilder$$anonfun$apply$11.apply(Action.scala:254) ~[play_2.10.jar:2
.1.0]
at play.api.mvc.ActionBuilder$$anon$1.apply(Action.scala:217) ~[play_2.10.jar:2.1.0]
at play.api.mvc.Action$$anonfun$apply$1$$anonfun$apply$2$$anonfun$apply$5$$anonfun$apply$
6.apply(Action.scala:109) ~[play_2.10.jar:2.1.0]
Caused by: java.lang.ClassNotFoundException: models.Country
at java.net.URLClassLoader$1.run(Unknown Source) ~[na:1.7.0_09]
at java.net.URLClassLoader$1.run(Unknown Source) ~[na:1.7.0_09]
at java.security.AccessController.doPrivileged(Native Method) ~[na:1.7.0_09]
at java.net.URLClassLoader.findClass(Unknown Source) ~[na:1.7.0_09]
at java.lang.ClassLoader.loadClass(Unknown Source) ~[na:1.7.0_09]
at java.lang.ClassLoader.loadClass(Unknown Source) ~[na:1.7.0_09]
Can anyone help me to solve this problem?
I fixed it using these info: reference
First take last version of circumflex from Github:
git clone git://github.com/inca/circumflex.git
Following the suggestion, change line in the file orm/src/main/scala/relation.scala
line 55:
val _recordClass: Class[R] = Class.forName(
to
val _recordClass: Class[R] = this.getClass().getClassLoader().loadClass(
Now we can compile it, install and use in our Play 2.1 App
cd circumflex
mvn clean install
in case you need only one module:
cd circumflex
mvn clean install -pl <circumflex-orm> -am
Now edit your project/Build.scala file to add dependency and local rep
val appDependencies = Seq(
"pro.savant.circumflex" % "circumflex-orm" % "3.1-SNAPSHOT"
)
val main = play.Project(appName, appVersion, appDependencies).settings(defaultScalaSettings:_*).settings(
resolvers += "Local Maven Repository" at "file:"+Path.userHome.absolutePath+"/.m2/repository"
)
Circumflex 2.x doesnt appear to be built against scala 2.10 (which is needed for play 2.1).
The circumflex 3.0 is being built in 2.10, but its current only in snapshot.
You can get the snapshot here:
https://oss.sonatype.org/content/repositories/snapshots/
Note that the package names have changed from ru.circumflex to pro.savant.circumflex so you may need to refactor your import statements.