InvocationTargetException in spark job test scala - scala

I was trying to write scala test cases for my sparkJob but its not working, and giving InvocationTargetException.
I am trying to create new sqlContext and read a json file from my localbox.
I am using eclipse.
sc = new SparkContext(conf)
sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
the full exception is :
java.lang.reflect.InvocationTargetException at
java.lang.reflect.Constructor.newInstance(Constrructor.java:423) at
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:67)
at
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
at
org.apache.spark.broadcast.TorrentBroadcast.org$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
at
org.apache.spark.broadcast.TorrentBroadcast.(TorrentBroadcast.scala:80)
at
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
at
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager..scala:63)
at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1318) at
org.apache.spark.SparkContext$$anonfun$hadoopFile$1.apply(SparkContext.scala:1006)
at
org.apache.spark.SparkContext$$anonfun$hadoopFile$1.apply(SparkContext.scala:1003)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.SparkCContext.withScope(SparkContext.scala:700) at
org.apache.spark.SparkContext.hadoopFile(SparkContext.scala:1003) at
org.apache.spark.SparkContext$$anonfun$textFile$1.appply(SparkContext.scala:818)
at
org.apache.spark.SparkContext$$anonfun$textFile$1.apply(SparkContext.scala:816))
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:700) at
org.apache.spark.SparkContext.texxtFile(SparkContext.scala:816) at
com.databricks.spark.csv.util.TextFile$.withCharset(TextFile.scalaa:30)
at
com.databricks.spark.csv.DefaultSource$$anonfuun$createRelation$1.apply(DefaultSource.scala:146)
at
com.databricks.spark.csv.DefaultSource$$anonfun$createRelation$1.apply(DDefaultSource.scala:146)
at
com.databricks.spark.csv.CsvRelation.firstLine$lzycompute(CsvRelation.scalaa:265)
at
com.databricks.spark.csv.CsvRelation.firstLine(CsvRelation.scala:263)
at com.databricks.spark.csv.CsvRelation.tokenRdd(CsvRelation.scala:89)
at
com.databricks.spark.csv.CsvRRelation.buildScan(CsvRelation.scala:173)
at
org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$4.apply(DataSourceStrategy.scala:60)
at
org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$4.apply(DataSourceStrategy.scala:60)
at
org.apache.spark.sql.execution.datasources.DataSourceStrategy$$$anonfun$pruneFilterProject$1.apply(DataSourceStrategy.scala:279)
at
org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$pruneFilterProject$1.apply(DataSourceStrattegy.scala:278)
at
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProjectRaw(DataSourceStrategy.scala:3100)
at
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProject(DataSourceStrategy.scala:274)
at
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.apply(DataSourceStrategy.scala:556)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sqll.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$AAggregation$.apply(SparkStrategies.scala:235)
at
org.apache.spark.sql.catalyst.pllanning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlaanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.SQLContext$QueryExecuution.sparkPlan$lzycompute(SQLContext.scala:920)
at
org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:918)
at
org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:924)
at
org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:924)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:53)
at
org.apache..spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:1904)
at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:1385)) at
org.apache.spark.sql.DataFrame.count(DataFrame.scala:14033) at
com.amazon.smt.ec.datapopulator.common.DataFrameOperationsTest$$anonfun$1.apply$mcV$sp(DataFrameOperationsTest.scala:55)
at
com.amazon.smt.ec.datapopulator.common.DataFrameOperatiionsTest$$anonfun$1.apply(DataFrameOperationsTest.scala:47)
at
com..amazon.smt.ec.datapopulator.common.DataFrameOperationsTest$$anonfun$1.apply(DataFrameOperationsTest.scala:47)
at
org.scalatest.Transformer$$anonfun$apply$1.apply(Transformer.scala:22)
at
org.scalatest.Transformer$$anonfun$apply$1.apply(Transformer.scala:22)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at
org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at
org.scalatest.Transformer.apply(Transformer.scala:22) at
org.scalatest.Transformer.apply(Transformer.scala:20) at
org.scalatest.FunSuiteLike$$anon$1.apply(FunSuuiteLike.scala:158) at
org.scalatest.Suite$class.withFixture(Suite.scala:1121) at
org.scalatest.FunSuite.withFixture(FunSuite.scala:1559) at
org.scalatest.FuunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:155)
at
org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:167)
at
org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.sscala:167)
at orrg.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at
org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:167) at
org.scalatest.FunSuite.runTestt(FunSuite.scala:1559) at
org.scalatest..FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:200)
at
org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:200)
at
org.scalatest.SuperEngine$$anonfun$traaverseSubNodes$1$1.apply(Engine.scala:413)
at
org.scalatest.SuperEngine$$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
at scala.collection.immutable.List.foreach(LList.scala:318) at
org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at
org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) at
org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:200) at
org.scalatest.FunSuite.runTests(FunSuite.scala:15559) at
org.scalatest.Suite$class.run(Suite.scala:1423) at
org.scalatest.FunSSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1559)
at
org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:204)
at
org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:204)
at org.scalatest.SuperEngine.runImpl(Engine.scala:545) at
org.scalatest.FunSuiteLike$class.run(FuunSuiteLike.scala:204) at
com.amazon.smt.ec.datapopulator.common.DataFrameOperationsTest.org$scalatest$BeforeAndAfterAll$$super$run(DataFrameOperationsTest.scala:21)
at
org.scalatest.BBeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257)
at
org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256)
at
com.amazon.smt.ec.datapopulator.common.DattaFrameOperationsTest.run(DataFrameOperationsTest.scala:21)
at oorg.scalatest.junit.JUnitRunner.run(JUnitRunner.scala:99) Caused
by: java.lang.IllegalArgumentException at
org.apache.spaark.io.SnappyCompressionCodec.(CompressionCodec.scala:151)
Can someone help me? what am I missing here ?

Related

Intermittent org.apache.spark.SparkException: Could not find spark-version-info.properties in K8s

My application uses K8s cronjob to schedule the application run, consequently creating a pod for each occurrence.
In most of the cases, the application runs well, but in some of them, it fails with the following error:
java.lang.ExceptionInInitializerError
2022-12-23T10:45:32.899555393Z at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
2022-12-23T10:45:32.899572625Z at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
2022-12-23T10:45:32.899576309Z at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
2022-12-23T10:45:32.899590250Z at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
2022-12-23T10:45:32.899601166Z at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:600)
2022-12-23T10:45:32.899604720Z at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:678)
2022-12-23T10:45:32.899608169Z at java.base/java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:737)
2022-12-23T10:45:32.899610934Z at java.base/java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:159)
2022-12-23T10:45:32.899613557Z at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateParallel(ForEachOps.java:173)
2022-12-23T10:45:32.899629628Z at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
2022-12-23T10:45:32.899633296Z at java.base/java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:497)
2022-12-23T10:45:32.899637988Z at java.base/java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:661)
[...]
2022-12-23T10:45:32.899959490Z Caused by: java.lang.ExceptionInInitializerError
2022-12-23T10:45:32.899972035Z at org.apache.spark.package$.<init>(package.scala:93)
2022-12-23T10:45:32.899985422Z at org.apache.spark.package$.<clinit>(package.scala)
2022-12-23T10:45:32.899990812Z at org.apache.spark.SparkContext.$anonfun$new$1(SparkContext.scala:183)
2022-12-23T10:45:32.899996770Z at org.apache.spark.internal.Logging.logInfo(Logging.scala:54)
2022-12-23T10:45:32.899998834Z at org.apache.spark.internal.Logging.logInfo$(Logging.scala:53)
2022-12-23T10:45:32.900029297Z at org.apache.spark.SparkContext.logInfo(SparkContext.scala:73)
2022-12-23T10:45:32.900031985Z at org.apache.spark.SparkContext.<init>(SparkContext.scala:183)
2022-12-23T10:45:32.900033999Z at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2526)
2022-12-23T10:45:32.900049879Z at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
2022-12-23T10:45:32.900055477Z at scala.Option.getOrElse(Option.scala:189)
2022-12-23T10:45:32.900061375Z at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
[...]
2022-12-23T10:45:32.900111374Z at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
2022-12-23T10:45:32.900134359Z at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1655)
2022-12-23T10:45:32.900137775Z at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
2022-12-23T10:45:32.900145849Z at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
2022-12-23T10:45:32.900159046Z at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:746)
2022-12-23T10:45:32.900178955Z at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:290)
2022-12-23T10:45:32.900183168Z at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1020)
2022-12-23T10:45:32.900206355Z at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1656)
2022-12-23T10:45:32.900214981Z at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1594)
2022-12-23T10:45:32.900227137Z at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:183)
2022-12-23T10:45:32.900377715Z Caused by: org.apache.spark.SparkException: Could not find spark-version-info.properties
2022-12-23T10:45:32.900381131Z at org.apache.spark.package$SparkBuildInfo$.<init>(package.scala:62)
2022-12-23T10:45:32.900396317Z at org.apache.spark.package$SparkBuildInfo$.<clinit>(package.scala)
2022-12-23T10:45:32.900399396Z ... 25 more
How spark sesssion is being build
lazy val spark: SparkSession = SparkSession.builder
.appName("My application")
.master("local") //Error occur here
.getOrCreate()
Spark version: 2.4.8
Scala version: 2.12
Anyone already had a similar problem?

Azure Databricks, could not initialize class org.apache.spark.eventhubs.EventHubsConf

I'm pretty new to Scala and I'm trying to create a notebook to elaborate data written in an Azure Event Hub. This is my code:
import org.apache.spark.eventhubs._
val connectionString = ConnectionStringBuilder("MY-CONNECTION-STRING")
.setEventHubName("EVENT-HUB-NAME")
.build
val eventHubsConf = EventHubsConf(connectionString)
.setStartingPosition(EventPosition.fromEndOfStream)
val eventhubs = spark.readStream
.format("eventhubs")
.options(eventHubsConf.toMap)
.load()
And I get the following error: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.eventhubs.EventHubsConf$
Cluster Configuration:
Databricks Runtime Version: 7.0 (includes Apache Spark 3.0.0, Scala 2.12)
Driver & Worker Type: 14.0 GB Memory, 4 Cores, 0.75 DBU
Standard_DS3_v2
I have installed the following library:
com.microsoft.azure:azure-eventhubs-spark_2.11:2.3.17
cluster libraries
The other JAR installed is to resolve a problem with Logging
The code crashes as soon as I try to create eventHubsConf.
Complete traceback:
java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.eventhubs.EventHubsConf$
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2632683088190841:7)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2632683088190841:70)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2632683088190841:72)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2632683088190841:74)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2632683088190841:76)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-2632683088190841:78)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw$$iw.<init>(command-2632683088190841:80)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw$$iw.<init>(command-2632683088190841:82)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw$$iw.<init>(command-2632683088190841:84)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$$iw.<init>(command-2632683088190841:86)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read.<init>(command-2632683088190841:88)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$.<init>(command-2632683088190841:92)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$read$.<clinit>(command-2632683088190841)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$eval$.$print$lzycompute(<notebook>:7)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$eval$.$print(<notebook>:6)
at line14a6ae940dd14957b7172a4cf8f6cdd348.$eval.$print(<notebook>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:745)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1021)
at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:574)
at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:41)
at es Scala 2.12 but yoscala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:37)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:573)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:600)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:570)
at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:215)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:202)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:714)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:667)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:202)
at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$10(DriverLocal.scala:396)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:238)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:233)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:230)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:49)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:275)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:268)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:49)
at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:373)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:653)
at scala.util.Try$.apply(Try.scala:213)
at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:645)
at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:486)
at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:598)
at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:391)
at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:337)
at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:219)
at java.lang.Thread.run(Thread.java:748)
It seems that your Runtime includes Scala 2.12 but your package is from scala 2.11
Try installing this one
com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.17

ArrayIndexOutOfBoundsException when accessing triplets of a Graph

I'm playing a bit with GraphX and got stuck with an Exception I can't explain.
My code generates 10 random nodes on a graph (of type Point) and then connects some of them. The logic itself doesn't really matter (and actually doesn't have any meaning). I just wanted to build a somewhat connected graph and got this Exception. The code goes as follows:
import scala.util.Random
case class Point(x:Double, y:Double, z:Double)
val vertices = sc.parallelize(
(1 to 10).map(i => (i.toLong, Point(Random.nextInt(10), Random.nextInt(10), Random.nextInt(10))))
)
val tmpGroups = vertices.map(x => (Random.nextInt(5), x) )
val edges = tmpGroups.cartesian(tmpGroups)
.filter{case(x,y) => x._2._1 != y._2._1}
.filter{case(x,y) => Math.abs(x._1 - y._1) <= 1}
.map{case(x,y) => Edge(x._2._1, y._2._1, 1.)}
val graph = Graph(vertices, edges)
So far everything works, and when I collect vertices and edges they look fine:
graph.vertices.collect.foreach(println)
=> (4,Point(6.0,7.0,7.0))
(8,Point(6.0,0.0,5.0))
(1,Point(8.0,4.0,7.0))
...
graph.edges.collect.foreach(println)
=> Edge(1,2,1.0)
Edge(2,1,1.0)
Edge(1,3,1.0)
...
Their types are (as expected):
org.apache.spark.graphx.VertexRDD[Point]
org.apache.spark.graphx.EdgeRDD[Double]
But when I try to collect triplets I get the following error:
graph.triplets.collect
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 40431.0 failed 1 times, most recent failure: Lost task 2.0 in stage 40431.0 (TID 8029, localhost): java.lang.ArrayIndexOutOfBoundsException
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.collect(RDD.scala:926)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$afab7c86681139df3241c999f2dafc38$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:214)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$afab7c86681139df3241c999f2dafc38$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:219)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$afab7c86681139df3241c999f2dafc38$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:221)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$afab7c86681139df3241c999f2dafc38$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:223)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$afab7c86681139df3241c999f2dafc38$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:225)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3baf9f919752f0ab1f5a31ad94af9f4$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:227)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3baf9f919752f0ab1f5a31ad94af9f4$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:229)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3baf9f919752f0ab1f5a31ad94af9f4$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:231)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3baf9f919752f0ab1f5a31ad94af9f4$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:233)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3baf9f919752f0ab1f5a31ad94af9f4$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:235)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$b968e173293ba7cd5c79f2d1143fd$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:237)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$b968e173293ba7cd5c79f2d1143fd$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:239)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$b968e173293ba7cd5c79f2d1143fd$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:241)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$b968e173293ba7cd5c79f2d1143fd$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:243)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$b968e173293ba7cd5c79f2d1143fd$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:245)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$b968e173293ba7cd5c79f2d1143fd$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:247)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$17f9c57b34a761248de8af38492ff086$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:249)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$17f9c57b34a761248de8af38492ff086$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:251)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$17f9c57b34a761248de8af38492ff086$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:253)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$17f9c57b34a761248de8af38492ff086$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:255)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$17f9c57b34a761248de8af38492ff086$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:257)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$bec1ee5c9e2e4d5af247761bdfbc3b3$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:259)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$bec1ee5c9e2e4d5af247761bdfbc3b3$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:261)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$bec1ee5c9e2e4d5af247761bdfbc3b3$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:263)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$bec1ee5c9e2e4d5af247761bdfbc3b3$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:265)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$bec1ee5c9e2e4d5af247761bdfbc3b3$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:267)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$5acc5a6ce0af8ab20753597dcc84fc0$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:269)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$5acc5a6ce0af8ab20753597dcc84fc0$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:271)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$5acc5a6ce0af8ab20753597dcc84fc0$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:273)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$5acc5a6ce0af8ab20753597dcc84fc0$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:275)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$5acc5a6ce0af8ab20753597dcc84fc0$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:277)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$33d793dde4292884a4720419646f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:279)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$33d793dde4292884a4720419646f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:281)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$33d793dde4292884a4720419646f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:283)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$33d793dde4292884a4720419646f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:285)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$33d793dde4292884a4720419646f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:287)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$33d793dde4292884a4720419646f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:289)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$725d9ae18728ec9520b65ad133e3b55$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:291)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$725d9ae18728ec9520b65ad133e3b55$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:293)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$725d9ae18728ec9520b65ad133e3b55$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:295)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$725d9ae18728ec9520b65ad133e3b55$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:297)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$725d9ae18728ec9520b65ad133e3b55$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:299)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3d99ae6e19b65c7f617b22f29b431fb$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:301)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3d99ae6e19b65c7f617b22f29b431fb$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:303)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3d99ae6e19b65c7f617b22f29b431fb$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:305)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3d99ae6e19b65c7f617b22f29b431fb$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:307)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$3d99ae6e19b65c7f617b22f29b431fb$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:309)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$ad149dbdbd963d0c9dc9b1d6f07f5e$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:311)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$ad149dbdbd963d0c9dc9b1d6f07f5e$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:313)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$ad149dbdbd963d0c9dc9b1d6f07f5e$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:315)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$ad149dbdbd963d0c9dc9b1d6f07f5e$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:317)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$ad149dbdbd963d0c9dc9b1d6f07f5e$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:319)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$6e49527b15a75f3b188beeb1837a4f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:321)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$6e49527b15a75f3b188beeb1837a4f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:323)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$6e49527b15a75f3b188beeb1837a4f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:325)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$6e49527b15a75f3b188beeb1837a4f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:327)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$6e49527b15a75f3b188beeb1837a4f1$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:329)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$93297bcd59dca476dd569cf51abed168$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:331)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$93297bcd59dca476dd569cf51abed168$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:333)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$93297bcd59dca476dd569cf51abed168$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:335)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$93297bcd59dca476dd569cf51abed168$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:337)
at $iwC$$iwC$$iwC$$iwC$$iwC$$$$93297bcd59dca476dd569cf51abed168$$$$$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:339)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:341)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:343)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:345)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:347)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:349)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:351)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:353)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:355)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:357)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:359)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:361)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:363)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:365)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:367)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:369)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:371)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:373)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:375)
at $iwC$$iwC$$iwC$$iwC$$iwC.(:377)
at $iwC$$iwC$$iwC$$iwC.(:379)
at $iwC$$iwC$$iwC.(:381)
at $iwC$$iwC.(:383)
at $iwC.(:385)
at (:387)
at .(:391)
at .()
at .(:7)
at .()
at $print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.zeppelin.spark.SparkInterpreter.interpretInput(SparkInterpreter.java:810)
at org.apache.zeppelin.spark.SparkInterpreter.interpret(SparkInterpreter.java:753)
at org.apache.zeppelin.spark.SparkInterpreter.interpret(SparkInterpreter.java:746)
at org.apache.zeppelin.interpreter.LazyOpenInterpreter.interpret(LazyOpenInterpreter.java:94)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:341)
at org.apache.zeppelin.scheduler.Job.run(Job.java:176)
at org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:139)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ArrayIndexOutOfBoundsException
I can only guess the $iwC stuff are the anonymous functions in my code (filter and map). I've tried to test them separately and collect with/without some of them and nothing led me to a solution.
What am I missing?
EDIT:
I got it to work, but now it's even weirder...
If I collect and re-parallelize the edges RDD, it seems to work fine:
val graph = Graph(vertices, sc.parallelize(edges.collect))
graph.triplets.collect.foreach(println)
=> ((2,Point(5.0,9.0,0.0)),(3,Point(3.0,7.0,8.0)),1.0)
((2,Point(5.0,9.0,0.0)),(4,Point(2.0,5.0,4.0)),1.0)
((2,Point(5.0,9.0,0.0)),(5,Point(0.0,3.0,3.0)),1.0)
...
Can someone please explain this? I don't like voodoos...
What happens on re-parallelization? I guess partitions may change? Does that have anything to do with the original problem?
It's Spark's laziness along with the usage of Random that bites you:
Since tmpGroups and edges aren't cached and are accessed twice, the map operation that is used to create them creates random values more than once, ending up with different values each time.
One way to solve this (more elegant usable than collecting and parallelizing again) is caching. By adding .cache() at the end of the line that creates tmpGroups, the result will be created exactly once:
val tmpGroups = vertices.map(x => (Random.nextInt(5), x) ).cache()

Getting NullPointerrException while using RollingSink

I am using windows* plateform. I am reading messages from kafka and want to store in files using RollingSink. I am getting messages but when I add rolling sink to DataStream it throws Null pointer exception. Below are the Code and the stack Trace.
It is creating the folder structure but no data int it.
i.e
Folder 2016-07-13--2031 and three files in this folder
._part-0-0.in-progress.crc, _part-0-0.in-progress, _part-0-0.pending
StreamExecutionEnvironment sev = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> kafkaStream = sev.addSource(new FlinkKafkaConsumer08<String>("test", new SimpleStringSchema(), properties));
String basePath = "C:\\project\\IOT\\testData\\SinkData";
RollingSink<String> rollingSink = new RollingSink<String>(basePath);
kafkaStream.addSink(rollingSink);
07/14/2016 00:48:48 Job execution switched to status FAILED.
Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply$mcV$sp(JobManager.scala:717)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:663)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:663)
at `enter code here`scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:41)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:401)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.NullPointerException
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:445)
at org.apache.hadoop.util.Shell.run(Shell.java:418)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:739)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:722)
at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:559)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:534)
at org.apache.hadoop.fs.LocatedFileStatus.<init>(LocatedFileStatus.java:42)
at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1698)
at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1680)
at org.apache.hadoop.fs.FileSystem$5.hasNext(FileSystem.java:1733)
at org.apache.flink.streaming.connectors.fs.RollingSink.open(RollingSink.java:339)
at org.apache.flink.api.common.functions.util.FunctionUtils.openFunction(FunctionUtils.java:38)
at org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.open(AbstractUdfStreamOperator.java:91)
at org.apache.flink.streaming.runtime.tasks.StreamTask.openAllOperators(StreamTask.java:317)
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:215)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:559)
at java.lang.Thread.run(Thread.java:745)

Spark Streaming Stateful Network Word Count

This is the example code that came with Spark. I copied the code here and this is the link to it: https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala. However, when I was trying to run the program using command "bin/run-example org.apache.spark.examples.streaming.StatefulNetworkWordCount localhost 9999", I was given the following error:
14/07/20 11:52:57 ERROR ActorSystemImpl: Uncaught fatal error from thread [spark-akka.actor.default-dispatcher-4] shutting down ActorSystem [spark]
java.lang.NoSuchMethodError: java.util.concurrent.ConcurrentHashMap.keySet()Ljava/util/concurrent/ConcurrentHashMap$KeySetView;
at org.apache.spark.streaming.scheduler.JobScheduler.getPendingTimes(JobScheduler.scala:114)
at org.apache.spark.streaming.Checkpoint.<init>(Checkpoint.scala:43)
at org.apache.spark.streaming.scheduler.JobGenerator.doCheckpoint(JobGenerator.scala:259)
at org.apache.spark.streaming.scheduler.JobGenerator.org$apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:167)
at org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$start$1$$anon$1$$anonfun$receive$1.applyOrElse(JobGenerator.scala:76)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Exception in thread "Thread-37" org.apache.spark.SparkException: Job cancelled because SparkContext was shut down
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:639)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:638)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:638)
at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.postStop(DAGScheduler.scala:1215)
at akka.actor.dungeon.FaultHandling$class.akka$actor$dungeon$FaultHandling$$finishTerminate(FaultHandling.scala:201)
at akka.actor.dungeon.FaultHandling$class.terminate(FaultHandling.scala:163)
at akka.actor.ActorCell.terminate(ActorCell.scala:338)
at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:431)
at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:240)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
14/07/20 11:53:00 ERROR Executor: Exception in task ID 0
java.lang.IllegalStateException: cannot create children while terminating or terminated
at akka.actor.dungeon.Children$class.makeChild(Children.scala:184)
at akka.actor.dungeon.Children$class.attachChild(Children.scala:42)
at akka.actor.ActorCell.attachChild(ActorCell.scala:338)
at akka.actor.ActorSystemImpl.actorOf(ActorSystem.scala:518)
at org.apache.spark.streaming.receiver.ReceiverSupervisorImpl.<init>(ReceiverSupervisorImpl.scala:67)
at org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverLauncher$$anonfun$9.apply(ReceiverTracker.scala:263)
at org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverLauncher$$anonfun$9.apply(ReceiverTracker.scala:257)
at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1080)
at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1080)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
at org.apache.spark.scheduler.Task.run(Task.scala:51)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
14/07/20 11:53:06 ERROR ExecutorUncaughtExceptionHandler: Uncaught exception in thread Thread[spark-akka.actor.default-dispatcher-13,5,main]
org.apache.spark.SparkException: Error sending message to BlockManagerMaster [message = HeartBeat(BlockManagerId(<driver>, x-131-212-225-148.uofm-secure.wireless.umn.edu, 47668, 0))]
at org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:251)
at org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:51)
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:113)
at org.apache.spark.storage.BlockManager$$anonfun$initialize$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(BlockManager.scala:158)
at org.apache.spark.util.Utils$.tryOrExit(Utils.scala:790)
at org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:158)
at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80)
at akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241)
at akka.actor.LightArrayRevolverScheduler$TaskHolder.run(Scheduler.scala:464)
at akka.actor.LightArrayRevolverScheduler$$anonfun$close$1.apply(Scheduler.scala:281)
at akka.actor.LightArrayRevolverScheduler$$anonfun$close$1.apply(Scheduler.scala:280)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at akka.actor.LightArrayRevolverScheduler.close(Scheduler.scala:279)
at akka.actor.ActorSystemImpl.stopScheduler(ActorSystem.scala:630)
at akka.actor.ActorSystemImpl$$anonfun$_start$1.apply$mcV$sp(ActorSystem.scala:582)
at akka.actor.ActorSystemImpl$$anonfun$_start$1.apply(ActorSystem.scala:582)
at akka.actor.ActorSystemImpl$$anonfun$_start$1.apply(ActorSystem.scala:582)
at akka.actor.ActorSystemImpl$$anon$3.run(ActorSystem.scala:596)
at akka.actor.ActorSystemImpl$TerminationCallbacks$$anonfun$run$1.runNext$1(ActorSystem.scala:750)
at akka.actor.ActorSystemImpl$TerminationCallbacks$$anonfun$run$1.apply$mcV$sp(ActorSystem.scala:753)
at akka.actor.ActorSystemImpl$TerminationCallbacks$$anonfun$run$1.apply(ActorSystem.scala:746)
at akka.actor.ActorSystemImpl$TerminationCallbacks$$anonfun$run$1.apply(ActorSystem.scala:746)
at akka.util.ReentrantGuard.withGuard(LockUtil.scala:15)
at akka.actor.ActorSystemImpl$TerminationCallbacks.run(ActorSystem.scala:746)
at akka.actor.ActorSystemImpl$$anonfun$terminationCallbacks$1.apply(ActorSystem.scala:593)
at akka.actor.ActorSystemImpl$$anonfun$terminationCallbacks$1.apply(ActorSystem.scala:593)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:32)
at akka.dispatch.BatchingExecutor$Batch$$anonfun$run$1.processBatch$1(BatchingExecutor.scala:67)
at akka.dispatch.BatchingExecutor$Batch$$anonfun$run$1.apply$mcV$sp(BatchingExecutor.scala:82)
at akka.dispatch.BatchingExecutor$Batch$$anonfun$run$1.apply(BatchingExecutor.scala:59)
at akka.dispatch.BatchingExecutor$Batch$$anonfun$run$1.apply(BatchingExecutor.scala:59)
at scala.concurrent.BlockContext$.withBlockContext(BlockContext.scala:72)
at akka.dispatch.BatchingExecutor$Batch.run(BatchingExecutor.scala:58)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:42)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: akka.pattern.AskTimeoutException: Recipient[Actor[akka://spark/user/BlockManagerMaster#1887396223]] had already been terminated.
at akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:134)
at org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:236)
****************CODE********************
object StatefulNetworkWordCount {
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println("Usage: StatefulNetworkWordCount <hostname> <port>")
System.exit(1)
}
StreamingExamples.setStreamingLogLevels()
val updateFunc = (values: Seq[Int], state: Option[Int]) => {
val currentCount = values.foldLeft(0)(_ + _)
val previousCount = state.getOrElse(0)
Some(currentCount + previousCount)
}
val sparkConf = new SparkConf().setAppName("StatefulNetworkWordCount")
// Create the context with a 1 second batch size
val ssc = new StreamingContext(sparkConf, Seconds(1))
ssc.checkpoint(".")
// Create a NetworkInputDStream on target ip:port and count the
// words in input stream of \n delimited test (eg. generated by 'nc')
val lines = ssc.socketTextStream(args(0), args(1).toInt)
val words = lines.flatMap(_.split(" "))
val wordDstream = words.map(x => (x, 1))
// Update the cumulative count using updateStateByKey
// This will give a Dstream made of state (which is the cumulative count of the words)
val stateDstream = wordDstream.updateStateByKey[Int](updateFunc)
stateDstream.print()
ssc.start()
ssc.awaitTermination()
}
}
I wonder if it is because, it is trying to set up the checkpoint at my local file system by doing commands "ssc.checkpoint(".")", while the file is not a file compatible with hadoop? (the file must be compatible with hadoop in order to set the checkpoint) If it is, how could I fix it? Thanks!
what's your JRE run time release, 1.7 or 1.8 , I have the similar issue that, I compile spark-source-code in 1.8, but if I use 1.7 to run the code, this issue will happen, change back to 1.8 as run time issue will solve.
jdk 1.8, concurrenthashmap.java(line 812):
// views
private transient KeySetView<K,V> keySet;
private transient ValuesView<K,V> values;
private transient EntrySetView<K,V> entrySet;
jdk 1.7 don't have above code
Hope it will help ^_^