java.lang.NoSuchMethodError: org.apache.http.conn.ssl.SSLConnectionSocketFactory - scala

How can I list all file names of parquet files in the S3 directory in Amazon?
I found this way:
val s3 = AmazonS3ClientCuilder.standard.build()
var objs = s3.listObjects("bucketname","directory")
val summaries = objs.getObjectSummaries()
while (objs.isTruncated()) {
objs = s3.listNextBatchOfObjects(objs)
summaries.addAll(objs.getObjectSummaries())
}
val listOfFiles = summaries.toArray
But it throws the error:
java.lang.NoSuchMethodError: org.apache.http.conn.ssl.SSLConnectionSocketFactory
I added the dependency for httpclient 4.5.2 as indicated in many answers, but I still get the same error.
Also I did:
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion exclude("commons-httpclient", "commons-httpclient"),
"org.apache.spark" %% "spark-mllib" % sparkVersion exclude("commons-httpclient", "commons-httpclient"),
"org.sedis" %% "sedis" % "1.2.2",
"org.scalactic" %% "scalactic" % "3.0.0",
"org.scalatest" %% "scalatest" % "3.0.0" % "test",
"com.github.nscala-time" %% "nscala-time" % "2.14.0",
"com.amazonaws" % "aws-java-sdk-s3" % "1.11.53",
"org.apache.httpcomponents" % "httpclient" % "4.5.2",
"net.java.dev.jets3t" % "jets3t" % "0.9.3",
"org.apache.hadoop" % "hadoop-aws" % "2.6.0",
"com.github.scopt" %% "scopt" % "3.3.0"
)

Related

Exception in thread "main" java.lang.NoSuchMethodError: scala.Product.$init$(Lscala/Product;)V

I am trying to run the code in Intelli but getting below error.Please help me find the error "Exception in thread "main" java.lang.NoSuchMethodError: scala.Product.$init$(Lscala/Product;)". Please help me to find the issue. My scala version is 2.11.12 and spark 2.4.4
Metorikku$: Starting Metorikku - Parsing configuration
ConfigurationParser$: Starting Metorikku - Parsing configuration
Exception in thread "main" java.lang.NoSuchMethodError:scala.Product.$init$(Lscala/Product;)V
at org.apache.spark.SparkConf$DeprecatedConfig.<init>(SparkConf.scala:810)
at org.apache.spark.SparkConf$.<init>(SparkConf.scala:644)
at org.apache.spark.SparkConf$.<clinit>(SparkConf.scala)
at org.apache.spark.SparkConf.set(SparkConf.scala:95)
at org.apache.spark.SparkConf.$anonfun$loadFromSystemProperties$3(SparkConf.scala:77)
at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
at scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:221)
at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:428)
at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:428)
at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
at org.apache.spark.SparkConf.loadFromSystemProperties(SparkConf.scala:76)
at org.apache.spark.SparkConf.<init>(SparkConf.scala:71)
at org.apache.spark.SparkConf.<init>(SparkConf.scala:58)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$5(SparkSession.scala:927)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at com.yotpo.metorikku.utils.FileUtils$.getHadoopPath(FileUtils.scala:63)
at com.yotpo.metorikku.utils.FileUtils$.readFileWithHadoop(FileUtils.scala:72)
at com.yotpo.metorikku.utils.FileUtils$.readConfigurationFile(FileUtils.scala:56)
at com.yotpo.metorikku.configuration.job.ConfigurationParser$.parse(ConfigurationParser.scala:34)
at com.yotpo.metorikku.Metorikku$.delayedEndpoint$com$yotpo$metorikku$Metorikku$1(Metorikku.scala:12)
at com.yotpo.metorikku.Metorikku$delayedInit$body.apply(Metorikku.scala:9)
at scala.Function0$class.apply$mcV$sp(Function0.scala:34)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at com.yotpo.metorikku.Metorikku$.main(Metorikku.scala:9)
at com.yotpo.metorikku.Metorikku.main(Metorikku.scala)
built.sbt file
scalaVersion := "2.11.12"
val sparkVersion = Option(System.getProperty("sparkVersion")).getOrElse("2.4.5")
val jacksonVersion = "2.9.9"
lazy val excludeJpountz = ExclusionRule(organization = "net.jpountz.lz4", name = "lz4")
lazy val excludeNetty = ExclusionRule(organization = "io.netty", name = "netty")
lazy val excludeNettyAll = ExclusionRule(organization = "io.netty", name = "netty-all")
lazy val excludeAvro = ExclusionRule(organization = "org.apache.avro", name = "avro")
lazy val excludeSpark = ExclusionRule(organization = "org.apache.spark")
lazy val excludeFasterXML = ExclusionRule(organization = "com.fasterxml.jackson.module", name= "jackson-module-scala_2.12")
lazy val excludeMetricsCore = ExclusionRule(organization = "io.dropwizard.metrics", name= "metrics-core")
lazy val excludeLog4j = ExclusionRule(organization = "org.apache.logging.log4j")
lazy val excludeParquet = ExclusionRule(organization = "org.apache.parquet")
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.apache.spark" %% "spark-mllib" % sparkVersion % "provided",
"org.apache.spark" %% "spark-hive" % sparkVersion % "provided",
"org.apache.spark" %% "spark-sql-kafka-0-10" % sparkVersion % "provided" excludeAll(excludeJpountz),
"org.apache.spark" %% "spark-streaming" % sparkVersion % "provided",
"org.apache.spark" %% "spark-avro" % sparkVersion % "provided",
"com.datastax.spark" %% "spark-cassandra-connector" % "2.4.2",
"com.holdenkarau" %% "spark-testing-base" % "2.4.3_0.12.0" % "test",
"com.github.scopt" %% "scopt" % "3.6.0",
"RedisLabs" % "spark-redis" % "0.3.2",
"org.json4s" %% "json4s-native" % "3.5.2",
"io.netty" % "netty-all" % "4.1.32.Final",
"io.netty" % "netty" % "3.10.6.Final",
"com.google.guava" % "guava" % "16.0.1",
"com.typesafe.play" %% "play-json" % "2.6.2",
"com.databricks" %% "spark-redshift" % "3.0.0-preview1" excludeAll excludeAvro,
"com.amazon.redshift" % "redshift-jdbc42" % "1.2.1.1001",
"com.segment.analytics.java" % "analytics" % "2.0.0",
"org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.6",
"org.scala-lang" % "scala-compiler" % "2.11.12",
"com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonVersion,
"com.fasterxml.jackson.dataformat" % "jackson-dataformat-cbor" % jacksonVersion,
"com.fasterxml.jackson.core" % "jackson-core" % jacksonVersion,
"com.fasterxml.jackson.core" % "jackson-annotations" % jacksonVersion,
"com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion,
"com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % jacksonVersion,
"com.groupon.dse" % "spark-metrics" % "2.0.0" excludeAll excludeMetricsCore,
"org.apache.commons" % "commons-text" % "1.6",
"org.influxdb" % "influxdb-java" % "2.14",
"org.apache.kafka" %% "kafka" % "2.2.0" % "provided",
"za.co.absa" % "abris_2.11" % "3.1.1" % "provided" excludeAll(excludeAvro, excludeSpark),
"org.apache.hudi" %% "hudi-spark-bundle" % "0.5.2-incubating" "provided" excludeAll excludeFasterXML,
"org.apache.parquet" % "parquet-avro" % "1.10.1" % "provided",
"org.apache.avro" % "avro" % "1.8.2" % "provided",
"org.apache.hive" % "hive-jdbc" % "2.3.3" % "provided" excludeAll(excludeNetty, excludeNettyAll, excludeLog4j, excludeParquet),
"org.apache.hadoop" % "hadoop-aws" % "2.7.3" % "provided"
)

NoClassDefFoundError: kafka/api/OffsetRequest for Storm jar

I am trying to submit Storm topology to the cluster but I constantly get the same error:
Exception in thread "main" java.lang.NoClassDefFoundError: kafka/api/OffsetRequest
at org.apache.storm.kafka.KafkaConfig.<init>(KafkaConfig.java:48)
at org.apache.storm.kafka.trident.TridentKafkaConfig.<init>(TridentKafkaConfig.java:30)
at storm.StormStreaming$.main(StormStreaming.scala:41)
at storm.StormStreaming.main(StormStreaming.scala)
Caused by: java.lang.ClassNotFoundException: kafka.api.OffsetRequest
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 4 more
I submit jar file using
./storm jar /patht/storm-app.jar storm.StormStreaming
How can it be fixed? I tried aligning resources versions - Kafka and Storm - but it does not seem to work.
My build.sbt file:
scalaVersion := "2.12.8"
val sparkVersion = "2.4.4"
val flinkVersion = "1.9.1"
val stormVersion = "2.1.0"
val kafkaVersion = "2.4.0"
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-core" % "2.9.6"
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.9.6"
dependencyOverrides += "com.fasterxml.jackson.module" % "jackson-module-scala_2.11" % "2.9.6"
libraryDependencies ++= Seq(
"org.apache.kafka" %% "kafka" % kafkaVersion excludeAll(
ExclusionRule("org.slf4j", "slf4j-log4j12"),
ExclusionRule("log4j", "log4j"),
ExclusionRule("org.apache.zookeeper", "zookeeper")
),
"org.apache.spark" %% "spark-sql-kafka-0-10" % sparkVersion,
"org.apache.spark" %% "spark-core" % sparkVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.spark" %% "spark-streaming" % sparkVersion,
"org.apache.spark" %% "spark-sql" % sparkVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.spark" %% "spark-streaming-kafka-0-10" % sparkVersion,
"com.typesafe" % "config" % "1.3.3",
"org.twitter4j" % "twitter4j-core" % "4.0.7",
"org.twitter4j" % "twitter4j-stream" % "4.0.7",
"org.apache.flink" % "flink-core" % flinkVersion % "provided",
"org.apache.flink" %% "flink-streaming-scala" % flinkVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.flink" %% "flink-scala" % flinkVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.flink" %% "flink-clients" % flinkVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.flink" %% "flink-connector-kafka" % flinkVersion,
"org.apache.flink" %% "flink-runtime-web" % flinkVersion,
"org.apache.flink" % "flink-avro-confluent-registry" % flinkVersion,
"org.apache.storm" % "storm-core" % stormVersion % "provided" excludeAll(
ExclusionRule("org.slf4j", "slf4j-log4j12"),
ExclusionRule("org.slf4j", "log4j-over-slf4j")
),
"org.apache.storm" % "storm-kafka-client" % stormVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.storm" % "storm-sql-core" % stormVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.storm" % "storm-sql-runtime" % stormVersion excludeAll("org.slf4j", "slf4j-log4j12"),
"org.apache.storm" % "storm-kafka" % "1.2.3" excludeAll("org.slf4j", "slf4j-log4j12")
)
You are using the wrong Kafka jar. You should depend on org.apache.kafka:kafka-clients instead of org.apache.kafka:kafka_2.xx, which is the Kafka server side jar.
The dependence on kafka/api/OffsetRequest is coming from storm-kafka, which should not be used. It's using an old Kafka client API which is no longer present in Kafka. Use storm-kafka-client instead.

Unresolved slick dependency on Scala 2.11.9

Similarly to:
Why is UNRESOLVED DEPENDENCIES error with com.typesafe.slick#slick_2.11;2.0.2: not found?
I got the next error message:
events/*:update) sbt.ResolveException: unresolved dependency: com.typesafe.slick#slick-extensions_2.11;3.1.0: not found
My scala build.sbt has:
lazy val events = (project in file("modules/events")).settings(commonSettings).
settings(Seq(libraryDependencies ++= Seq(
cache,
ws,
evolutions,
specs2,
"com.softwaremill.macwire" %% "macros" % "2.2.5" % "provided",
"com.softwaremill.macwire" %% "util" % "2.2.0",
"ch.qos.logback" % "logback-classic" % "1.1.8",
"de.svenkubiak" % "jBCrypt" % "0.4.1",
"org.scalatestplus.play" %% "scalatestplus-play" % "1.5.0" % "test",
"org.mockito" % "mockito-core" % "2.0.45-beta" % "test",
"mysql" % "mysql-connector-java" % "5.1.34",
"org.postgresql" % "postgresql" % "9.4.1207.jre7",
"com.vividsolutions" % "jts" % "1.13",
"com.typesafe.play" % "play-slick_2.11" % "2.0.2",
"com.typesafe.play" %% "play-slick-evolutions" % "2.0.0",
"com.github.tminglei" %% "slick-pg" % "0.12.1",
"com.github.tminglei" %% "slick-pg_date2" % "0.12.1",
"com.github.tminglei" %% "slick-pg_play-json" % "0.12.1",
"com.typesafe.slick" %% "slick-extensions" % "3.1.0",
"org.scalikejdbc" %% "scalikejdbc" % "2.4.2",
"org.scalikejdbc" %% "scalikejdbc-config" % "2.4.2",
"joda-time" % "joda-time" % "2.9.4",
"com.typesafe.play" %% "play-json" % "2.5.9",
"io.circe" %% "circe-core" % circeVersion,
"io.circe" %% "circe-generic" % circeVersion,
"io.circe" %% "circe-parser" % circeVersion,
"io.circe" %% "circe-jawn" % circeVersion,
"com.github.julien-truffaut" %% "monocle-core" % monocleVersion,
"com.github.julien-truffaut" %% "monocle-macro" % monocleVersion,
"com.github.julien-truffaut" %% "monocle-law" % monocleVersion % "test",
"com.microsoft.sqlserver" % "mssql-jdbc" % "7.4.1.jre8"
)))
I am also using Scala 2.11.9. I also tried adding
resolvers += "typesafe" at "http://repo.typesafe.com/typesafe/releases/"
but no luck. Any suggestions, please?
Actually slick-extensions is not located in http://repo.typesafe.com/typesafe/releases/. If you will look there you will see that com/typesafe/slick/slick-extensions_2.11/ is empty.
But I have found it here https://typesafe.bintray.com/commercial-maven-releases/com/typesafe/slick/slick-extensions_2.11/3.1.0/
And here some information about slick-extensions: https://index.scala-lang.org/slick/slick/slick-extensions/3.1.0.
They recommend using that:
libraryDependencies += "com.typesafe.slick" %% "slick-extensions" % "3.1.0"
resolvers += Resolver.bintrayRepo("typesafe", "commercial-maven-releases")

How to convert RDD[some case class] to csv file using scala?

I have an RDD[some case class] and I want to convert it to a csv file. I am using spark 1.6 and scala 2.10.5 .
stationDetails.toDF.coalesce(1).write.format("com.databricks.spark.csv").save("data/myData.csv")
gives error
Exception in thread "main" java.lang.ClassNotFoundException: Failed to find data source: com.databricks.spark.csv. Please find packages at http://spark-packages.org
at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.lookupDataSource(ResolvedDataSource.scala:77)
at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:219)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139)
I am not able to add the dependencies for "com.databricks.spark.csv" in my build.sbt file.
dependencies I added in build.sbt file are:
libraryDependencies ++= Seq(
"org.apache.commons" % "commons-csv" % "1.1",
"com.univocity" % "univocity-parsers" % "1.5.1",
"org.slf4j" % "slf4j-api" % "1.7.5" % "provided",
"org.scalatest" %% "scalatest" % "2.2.1" % "test",
"com.novocode" % "junit-interface" % "0.9" % "test"
)
I also tried this
stationDetails.toDF.coalesce(1).write.csv("data/myData.csv")
but it gives error : csv cannot be resolved.
Please change your build.sbt to below -
libraryDependencies ++= Seq(
"org.apache.commons" % "commons-csv" % "1.1",
"com.databricks" %% "spark-csv" % "1.4.0",
"com.univocity" % "univocity-parsers" % "1.5.1",
"org.slf4j" % "slf4j-api" % "1.7.5" % "provided",
"org.scalatest" %% "scalatest" % "2.2.1" % "test",
"com.novocode" % "junit-interface" % "0.9" % "test"
)

"Exception in thread "main" java.lang.NoSuchMethod" error while making a model in Apache Spark in scala

I am trying to make a model using spark scala to predict on my label but,my IDE shows me the following error:
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.spark.ml.regression.LinearRegression.fit(Lorg/apache/spark/sql/Dataset;)Lorg/apache/spark/ml/PredictionModel;
Here is my TrainingDF dataframe that I want to make a model on it:
+--------------------+-----+
| features|label|
+--------------------+-----+
|[1.497325796E9,19...| 19|
|[1.497325796E9,19...| 19|
|[1.497325796E9,19...| 19|
|[1.497325796E9,10...| 10|
|[1.497325796E9,10...| 10|
|[1.497325796E9,10...| 10|
|[1.497325796E9,10...| 10|
|[1.497325796E9,10...| 10|
+--------------------+-----+
The error is on val model1=lir.fit(TrainingDF) code and I completely puzzled about this error. The input of my fit function is dataframe. Here is my code:
val final_df = Dataframe.withColumn(
"features",
toVec4(
// casting into Timestamp to parse the string, and then into Int
$"time_stamp_0".cast(TimestampType).cast(IntegerType),
$"count",
$"sender_ip_1",
$"receiver_ip_2"
)
).withColumn("label", (Dataframe("count"))).select("features", "label")
final_df.show()
val trainingTest = final_df.randomSplit(Array(0.3, 0.7))
val TrainingDF = trainingTest(0).toDF()
val TestingDF=trainingTest(1).toDF()
TrainingDF.show()
TestingDF.show()
Until this part everything works perfectly. However the TrainingDF dataframe cannot be accepted by lir.fit.
///lets create our liner regression
val lir= new LinearRegression()
.setRegParam(0.3)
.setElasticNetParam(0.8)
.setMaxIter(100)
.setTol(1E-6)
//////make model -> Error is here
val model1=lir.fit(TrainingDF)
}
}
Also here is my dependencies in my build.sbt file:
name := "untitled"
version := "1.0"
scalaVersion := "2.10.4"
retrieveManaged := true
resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/releases/"
resolvers += "MapR Repository" at "http://repository.mapr.com/maven/"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "1.6.1",
"org.apache.spark" %% "spark-sql" % "2.1.1",
"org.apache.spark" %% "spark-hive" % "1.6.1",
"org.apache.spark" %% "spark-streaming" % "1.6.1",
"org.apache.spark" %% "spark-streaming-kafka" % "1.6.1",
"org.apache.spark" %% "spark-streaming-flume" % "1.6.1",
"org.apache.spark" %% "spark-mllib" % "1.6.1",
"org.apache.logging.log4j" % "log4j-api" % "2.5",
"com.typesafe.play" %% "play-json" % "2.3.4",
"com.fasterxml.jackson.core" % "jackson-databind" % "2.3.3",
"com.fasterxml.jackson.module" % "jackson-module-scala_2.10" % "2.3.3",
"mysql" % "mysql-connector-java" % "5.1.31",
"com.github.scopt" %% "scopt" % "3.4.0", //OptinsParser
"org.scalatest" %% "scalatest" % "2.2.1" % "test",
"com.holdenkarau" %% "spark-testing-base" % "0.0.1" % "test"
)
I should mention that I use IntelliJ IDEA. Can you help me please?
The cause of this problem is likely a discrepancy between the versions of the Spark libraries you're using. Make sure you're using the same versions for spark-core and spark-mllib.
The issue is due to multiple version of spark dependencies. Try using
"org.apache.spark" %% "spark-core" % "2.1.1",
"org.apache.spark" %% "spark-sql" % "2.1.1",
"org.apache.spark" %% "spark-hive" % "2.1.1",
"org.apache.spark" %% "spark-streaming" % "2.1.1",
"org.apache.spark" %% "spark-streaming-kafka" % "2.1.1",
"org.apache.spark" %% "spark-streaming-flume" % "2.1.1",
"org.apache.spark" %% "spark-mllib" % "2.1.1",
"org.apache.logging.log4j" % "log4j-api" % "2.5",
"com.typesafe.play" %% "play-json" % "2.3.4",
"com.fasterxml.jackson.core" % "jackson-databind" % "2.3.3",
"com.fasterxml.jackson.module" % "jackson-module-scala_2.10" % "2.3.3",
"mysql" % "mysql-connector-java" % "5.1.31",
"com.github.scopt" %% "scopt" % "3.4.0", //OptinsParser
"org.scalatest" %% "scalatest" % "2.2.1" % "test",
"com.holdenkarau" %% "spark-testing-base" % "0.0.1" % "test"
I also suggest to update the scala version.
Hope this helps!