"datasource not a member of org.apache.phoenix" when trying to Save DataFrames to Phoenix using DataSourceV2 - scala

I am trying to Save DataFrames to Phoenix using DataSourceV2 following the below mentioned source:
Apache Spark plugin
I created a dataframe and I want to save it to phoenix in the following way:
import org.apache.spark.SparkContext
import org.apache.phoenix.spark.datasource.v2.PhoenixDataSource
val conf = new SparkConf().setAppName("Spark sql to convert rdd to df")
val sc = new SparkContext(conf)
val sqlContext= new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
val MasterDF = MasterRecordSeq.toDF()
MasterDF.write
.format("phoenix")
.mode(SaveMode.Overwrite)
.options(Map("table" -> masterTableName, PhoenixDataSource.ZOOKEEPER_URL -> "phoenix-server:2181"))
.save()
But the import org.apache.phoenix.spark.datasource.v2.PhoenixDataSource is not being recognized. It throws the following error:
object datasource is not a member of package org.apache.phoenix.spark
I have searched through a lot of internet but I'm not able to find what the bug is.
The following are the dependencies I added in build.sbt:
libraryDependencies += "org.apache.phoenix" % "phoenix-spark" % "5.0.0-HBase-2.0"
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.4.5"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.5"
libraryDependencies += "org.apache.phoenix" % "phoenix-core" % "5.0.0-HBase-2.0"
The following is the complete build file:
import NativePackagerHelper._
import java.util.Properties
import com.typesafe.sbt.packager.MappingsHelper._
//import sbtrelease.ReleaseStateTransformations._
name := """gavel"""
//scapegoatVersion in ThisBuild := "1.1.0"
//version := sys.env.get("BUILD_NUMBER").getOrElse("3.0-LOCAL")
version := "3.0"
scalaVersion := "2.11.12"
//crossScalaVersions := Seq("2.11.11", "2.12.3")
//scapegoatVersion in ThisBuild := "1.3.5"
scalaBinaryVersion in ThisBuild := "2.12"
javacOptions ++= Seq("-source", "1.6", "-target", "1.6")
scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
scalacOptions in (Compile, doc) ++= Seq("-unchecked", "-deprecation", "-diagrams", "-implicits", "-skip-packages", "samples")
lazy val root = (project in file(".")).enablePlugins(PlayScala,sbtdocker.DockerPlugin,JavaAppPackaging).settings(
watchSources ++= (baseDirectory.value / "public/frontend" ** "*").get
)
mainClass := Some("play.core.server.ProdServerStart")
fullClasspath in assembly += Attributed.blank(PlayKeys.playPackageAssets.value)
mappings in Universal ++= directory(baseDirectory.value / "public")
unmanagedBase := baseDirectory.value / "libs"
routesGenerator := InjectedRoutesGenerator
resolvers += "scalaz-bintray" at "https://dl.bintray.com/scalaz/releases"
libraryDependencies ++= Seq(
"com.typesafe" % "config" % "1.3.1",
"mysql" % "mysql-connector-java" % "5.1.34",
"com.typesafe.play" %% "play-slick" % "3.0.0",
"com.typesafe.play" %% "play-slick-evolutions" % "3.0.0",
"com.typesafe.play" %% "play-json" % "2.6.0",
"org.scalatestplus.play" %% "scalatestplus-play" % "3.0.0" % "test",
specs2 % Test,
// "io.rest-assured" % "rest-assured" % "3.0.0" % "test",
// "io.rest-assured" % "scala-support" % "3.0.0" % "test",
// "com.squareup.okhttp" % "mockwebserver" % "2.5.0" % "test",
"javax.mail" % "mail" % "1.4",
"io.swagger" %% "swagger-play2" % "1.6.1",
"com.fasterxml.jackson.core" % "jackson-databind" % "2.4.0",
"com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.4.0",
"com.google.code.gson" % "gson" % "1.7.1",
"commons-io" % "commons-io" % "2.4",
"com.typesafe.akka" %% "akka-actor" % "2.4.16",
"com.typesafe.akka" %% "akka-testkit" % "2.4.16" % "test",
"org.typelevel" %% "macro-compat" % "1.1.1",
"org.scala-lang" % "scala-reflect" % scalaVersion.value % "provided",
"org.scalatest" %% "scalatest" % "3.0.0" % "test",
compilerPlugin("org.scalamacros" %% "paradise" % "2.1.0" cross CrossVersion.full),
guice
)
libraryDependencies ++= Seq(
"com.101tec" % "zkclient" % "0.4",
"org.apache.kafka" % "kafka_2.10" % "0.8.1.1"
exclude("javax.jms", "jms")
exclude("com.sun.jdmk", "jmxtools")
exclude("com.sun.jmx", "jmxri")
)
libraryDependencies += ws
libraryDependencies += ehcache
// https://mvnrepository.com/artifact/org.apache.phoenix/phoenix-spark
libraryDependencies += "org.apache.phoenix" % "phoenix-spark" % "5.0.0-HBase-2.0"
libraryDependencies += "com.google.protobuf" % "protobuf-java" % "2.4.0"
libraryDependencies += "org.codehaus.jackson" % "jackson-mapper-asl" % "1.9.13"
libraryDependencies += "com.google.code.gson" % "gson" % "2.3"
libraryDependencies += "org.apache.phoenix" % "phoenix-queryserver-client" % "4.13.1-HBase-1.2"
libraryDependencies += "com.github.takezoe" %% "solr-scala-client" % "0.0.19"
libraryDependencies += "com.squareup.okhttp" % "okhttp" % "2.7.0"
libraryDependencies += "org.threeten" % "threetenbp" % "1.2"
libraryDependencies += "io.gsonfire" % "gson-fire" % "1.0.1"
libraryDependencies += "au.com.bytecode" % "opencsv" % "2.4"
libraryDependencies += "org.simplejavamail" % "simple-java-mail" % "5.0.8"
libraryDependencies += "org.apache.solr" % "solr-solrj" % "6.6.2"
libraryDependencies += "com.jcraft" % "jsch" % "0.1.55"
libraryDependencies += "com.vmware" % "vijava" % "5.1"
libraryDependencies += "com.microsoft.sqlserver" % "mssql-jdbc" % "6.1.0.jre8" % Test
//libraryDependencies += "com.microsoft.sqlserver" % "sqljdbc4" % "4.0"
libraryDependencies += "org.apache.poi" % "poi" % "3.17"
libraryDependencies += "org.apache.poi" % "poi-ooxml" % "3.17"
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.4.5"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.5"
libraryDependencies += "org.apache.phoenix" % "phoenix-core" % "5.0.0-HBase-2.0"
crossSbtVersions := Seq("0.13.17", "1.1.6")
publishTo := {
val isSnapshotValue = isSnapshot.value
val nexus = "https://oss.sonatype.org/"
if(isSnapshotValue) Some("snapshots" at nexus + "content/repositories/snapshots")
else Some("releases" at nexus + "service/local/staging/deploy/maven2")
}
publishMavenStyle := true
publishArtifact in Test := false
parallelExecution in Test := false
dockerfile in docker := {
// The assembly task generates a fat JAR file
val artifact: File = assembly.value
val artifactTargetPath = s"/app/${artifact.name}"
new Dockerfile {
from("java")
from("mysql:5.7")
add(artifact, artifactTargetPath)
entryPoint("java", "-jar", artifactTargetPath)
}
}
val appProperties = settingKey[Properties]("The application properties")
appProperties := {
val prop = new Properties()
IO.load(prop, new File("./conf/database.conf"))
prop
}
javaOptions in Test += "-Dconfig.file=conf/application.test.conf"
resolvers += "Sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots/"
sourceDirectories in (Compile, TwirlKeys.compileTemplates) :=
(unmanagedSourceDirectories in Compile).value
flywayDriver := "com.mysql.jdbc.Driver"
flywayUrl := appProperties.value.getProperty("slick.dbs.default.db.url").replaceAll("\"", "")
flywayUser := appProperties.value.getProperty("slick.dbs.default.db.user")
flywayPassword := appProperties.value.getProperty("slick.dbs.default.db.password").replaceAll("\"", "")
flywayLocations := Seq("filesystem:conf/db/default")
fork in run := true
//coverageEnabled := false
//coverageMinimum := 70
//coverageFailOnMinimum := true
//coverageHighlighting := true
publishArtifact in Test := false
parallelExecution in Test := false
enablePlugins(SbtProguard)
import com.lightbend.sbt.SbtProguard._
javaOptions in (Proguard, proguard) := Seq("-Xmx2G")
proguardOptions in Proguard ++= Seq("-dontnote", "-dontwarn", "-ignorewarnings")
proguardOptions in Proguard += ProguardOptions.keepMain("some.MainClass")
proguardMergeStrategies in Proguard += ProguardMerge.append("*.conf")
proguardMergeStrategies in Proguard ++= Seq(
ProguardMerge.discard("\\.zip$".r),
ProguardMerge.discard("\\.xml$".r),
ProguardMerge.discard("\\.txt$".r),
ProguardMerge.discard("\\.conf$".r),
ProguardMerge.discard("\\.jar$".r)
)
My phoenix version is 5.0. My Hbase version is 2.0.2.3.1.0.0-78. Am I missing any configuration?

I had the same problem (error), but in my specific case it was for a scala script in a Hortonworks Big Data cluster to be executed by Spark
I managed to solve it by compiling the phoenix-spark repository available on github and importing the jar into the spark directory.
Here are the commands I ran to build the jar, I hope it helps.
sudo yum install maven
wget https://github.com/apache/phoenix-connectors/archive/master.zip
unzip master.zip
cd phoenix-connectors/phoenix-spark
mvn clean compile
mvn package
cd target/scala-2.12/
cp phoenix-spark-1.0.0-SNAPSHOT.jar /usr/hdp/current/spark2-client/jars

Related

sbt complains about JmhPlugin not found

I'm trying to use sbt JmhPlugin and I'm following the instructions found here: https://github.com/sbt/sbt-jmh
So I added the plugin to project/plugins.sbt and then I added to build.sbt the enablePlugins(JmhPlugin) line so my build files look like this:
project/plugins.sbt:
addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.4")
project/build.properties:
sbt.version = 1.8.2
build.sbt:
ThisBuild / version := "0.1.0-SNAPSHOT"
ThisBuild / scalaVersion := "2.13.10"
lazy val root = (project in file("."))
.settings(
name := "myproj"
)
libraryDependencies += "org.scalactic" %% "scalactic" % "3.2.15"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.15" % "test"
libraryDependencies += "org.typelevel" %% "cats-effect" % "2.5.3"
val catsVersion = "2.9.0"
libraryDependencies += "org.typelevel" %% "cats-core" % catsVersion
libraryDependencies += "org.typelevel" %% "cats-free" % catsVersion
libraryDependencies += "org.typelevel" %% "cats-laws" % catsVersion
libraryDependencies += "org.typelevel" %% "cats-mtl-core" % "0.7.1"
libraryDependencies += "org.typelevel" %% "simulacrum" % "1.0.1"
libraryDependencies += "org.scalamacros" %% "resetallattrs" % "1.0.0"
libraryDependencies += "org.scalameta" %% "munit" % "0.7.22"
libraryDependencies += "org.typelevel" %% "discipline-munit" % "1.0.6"
scalacOptions ++= Seq(
"-deprecation",
"-encoding", "UTF-8",
"-feature",
"-language:_",
"-Ymacro-annotations"
)
enablePlugins(JmhPlugin)
but when I'm running sbt build it complains that it cannot find the JmhPlugin:
error: not found: value JmhPlugin
enablePlugins(JmhPlugin)
^
What am I doing wrong here? Also, how should I debug this issue?
Thanks!

Why does Spark with Play fail with "NoClassDefFoundError: Could not initialize class org.apache.spark.SparkConf$"?

I am trying to use this project (https://github.com/alexmasselot/spark-play-activator) as an integration of Play and Spark example to do the same in my project. So, I created an object that starts Spark and a Controller that read a Json file using RDD. Below is my Object that starts Spark:
package bootstrap
import org.apache.spark.sql.SparkSession
object SparkCommons {
val sparkSession = SparkSession
.builder
.master("local")
.appName("ApplicationController")
.getOrCreate()
}
and my build.sbt is like this:
import play.sbt.PlayImport._
name := """crypto-miners-demo"""
version := "1.0-SNAPSHOT"
lazy val root = (project in file(".")).enablePlugins(PlayScala)
scalaVersion := "2.12.4"
libraryDependencies += guice
libraryDependencies += evolutions
libraryDependencies += jdbc
libraryDependencies += filters
libraryDependencies += ws
libraryDependencies += "com.h2database" % "h2" % "1.4.194"
libraryDependencies += "com.typesafe.play" %% "anorm" % "2.5.3"
libraryDependencies += "org.scalatestplus.play" %% "scalatestplus-play" % "3.1.0" % Test
libraryDependencies += "com.typesafe.play" %% "play-slick" % "3.0.0"
libraryDependencies += "com.typesafe.play" %% "play-slick-evolutions" % "3.0.0"
libraryDependencies += "org.xerial" % "sqlite-jdbc" % "3.19.3"
libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.2.0"
libraryDependencies += "org.apache.spark" % "spark-sql_2.11" % "2.2.0"
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.8.7"
But when I try to call a controller that uses the RDD I get this error on Play framework:
java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.SparkConf$
I am using the RDD like this: val rdd = SparkCommons.sparkSession.read.json("downloads/tweet-json").
The application that I am trying to copy the configuration is working well. I only could import the jackson-databind lib to my build.sbt. I have an error when I copy libraryDependencies ++= Dependencies.sparkAkkaHadoop and ivyScala := ivyScala.value map { _.copy(overrideScalaVersion = true) } to my build.sbt.
I will write 100000 times on the black board and never forget. Spark 2.2.0 still doesn't work with Scala 2.12. I also edited the Jackson lib version. Below is my build.sbt.
import play.sbt.PlayImport._
name := """crypto-miners-demo"""
version := "1.0-SNAPSHOT"
lazy val root = (project in file(".")).enablePlugins(PlayScala)
scalaVersion := "2.11.8"
libraryDependencies += guice
libraryDependencies += evolutions
libraryDependencies += jdbc
libraryDependencies += filters
libraryDependencies += ws
libraryDependencies += "com.h2database" % "h2" % "1.4.194"
libraryDependencies += "com.typesafe.play" %% "anorm" % "2.5.3"
libraryDependencies += "org.scalatestplus.play" %% "scalatestplus-play" % "3.1.0" % Test
libraryDependencies += "com.typesafe.play" %% "play-slick" % "3.0.0"
libraryDependencies += "com.typesafe.play" %% "play-slick-evolutions" % "3.0.0"
libraryDependencies += "org.xerial" % "sqlite-jdbc" % "3.19.3"
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.2.0"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.2.0"
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.6.5"

unable to import dependency hadoop-core to build.sbt in intellij

my build.sbt looks like this
name := "Kafak"
version := "1.0"
scalaVersion := "2.12.2"
libraryDependencies += "com.google.code.gson" % "gson" % "2.8.1"
libraryDependencies += "org.apache.kafka" % "kafka-clients" %
"0.10.2.1"
libraryDependencies += "org.slf4j" % "slf4j-simple" % "1.6.1"
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-
client
libraryDependencies += "org.apache.hadoop" % "hadoop-client" %
"3.0.0-alpha3"
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-
core
libraryDependencies += "org.apache.hadoop" % "hadoop-core" %
"2.6.0-mr1-cdh5.9.0"
i am getting the following error while importing it

Some SBT dependencies are ignored

I use SBT to include dependencies in my project, but I couldn't find the reason why some dependencies are ignored randomly. Even if they exist in .ivy2/cache directory, I tried to delete the content of it and retry but I still have the same problem.
The version of my SBT is 0.13.15 here is an example:
import org.scalatra.sbt._
import org.scalatra.sbt.PluginKeys._
import ScalateKeys._
val ScalatraVersion = "2.4.1"
ScalatraPlugin.scalatraSettings
scalateSettings
organization := "com.*****"
name := "****"
version := "0.1.0-SNAPSHOT"
scalaVersion := "2.10.5"
val sparkVersion = "1.6.0"
resolvers += Classpaths.typesafeReleases
libraryDependencies ++= Seq(
"org.scalatra" %% "scalatra-json" % ScalatraVersion,
"org.json4s" %% "json4s-jackson" % "3.2.11",
"org.scalatra" %% "scalatra" % ScalatraVersion,
"org.scalatra" %% "scalatra-scalate" % ScalatraVersion,
"org.scalatra" %% "scalatra-specs2" % ScalatraVersion % "test",
"ch.qos.logback" % "logback-classic" % "1.1.5" % "runtime",
"org.eclipse.jetty" % "jetty-webapp" % "9.2.15.v20160210" % "container",
"javax.servlet" % "javax.servlet-api" % "3.1.0" % "provided"
)
libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion
libraryDependencies += "org.scalaz" %% "scalaz-core" % "7.2.14"
scalateTemplateConfig in Compile := {
val base = (sourceDirectory in Compile).value
Seq(
TemplateConfig(
base / "webapp" / "WEB-INF" / "templates",
Seq.empty, /* default imports should be added here */
Seq(
Binding("context", "_root_.org.scalatra.scalate.ScalatraRenderContext", importMembers = true, isImplicit = true)
), /* add extra bindings here */
Some("templates")
)
)
}
enablePlugins(JettyPlugin)
In my example, sometimes the scalatra jsonis ignored and when I retried to create a new project the sparkdependencies was ignored
I finally found the solution of my issues, my project had to be converted to eclipse project through SBT with sbt eclipse command.
Initially the Scalatra project was created via SBT with this command:
sbt new scalatra/scalatra-sbt.g8
The trick is to eclipsify the project before beginning to import dependencies.

Stop SBT from including plugin jars in build

How can I tell SBT to not include JARs from SBT plugins in my project's build?
I've noticed that plugin JARs, such as graphSettings, sbt-git and the likes get packaged in my final build.
Additionally, even if I set a particular dependency as "provided", it still gets packaged in build.
Thank you all in advance.
My Build.scala file:
object DALBuild extends Build {
sbtPlugin := true
val akkaVersion = "2.2.3"
val sprayVersion = "1.2.1"
val scalatraVersion = "2.2.2"
lazy val dal = Project(
id = "dal",
base = file("."),
settings = Project.defaultSettings ++ SbtOneJar.oneJarSettings ++ Seq(
name := "DAL",
organization := "com.foo.bar",
version := "0.5.28-SNAPSHOT",
scalaVersion := "2.10.4",
scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature"),
mainClass in SbtOneJar.oneJar := Some("com.foo.bar.http.server.BootDAL"),
retrieveManaged := true,
parallelExecution in Test := false,
fork in run := false,
fork in Test := true,
javaOptions in run += "-Dlogback.configurationFile=logback-dev.xml",
javaOptions in Test += "-Dlogback.configurationFile=logback-test.xml",
javaOptions in run += "-DUssdDb.env=dev",
resolvers += "Typesafe Releases" at "http://repo.typesafe.com/typesafe/releases",
resolvers += "sbt-idea-repo" at "http://mpeltonen.github.com/maven/",
resolvers += "Sonatype OSS Snapshots" at "http://oss.sonatype.org/content/repositories/snapshots/",
resolvers += "spray repo" at "http://repo.spray.io",
libraryDependencies += "com.typesafe.akka" % "akka-actor_2.10" % akkaVersion,
libraryDependencies += "com.typesafe.slick" % "slick_2.10" % "1.0.0",
libraryDependencies += "net.sourceforge.jtds" % "jtds" % "1.2.4",
libraryDependencies += "org.apache.cxf" % "cxf-rt-frontend-jaxws" % "2.7.5" exclude("commons-logging", "commons-logging"),
libraryDependencies += "org.apache.cxf" % "cxf-rt-transports-http-hc" % "2.7.5" exclude("commons-logging", "commons-logging"),
libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.0.13",
libraryDependencies += "org.apache.tomcat" % "tomcat-jdbc" % "7.0.30",
libraryDependencies += "net.sourceforge.expectj" % "expectj" % "2.0.1" exclude("commons-logging", "commons-logging"),
libraryDependencies += "commons-net" % "commons-net" % "3.2",
libraryDependencies += "commons-beanutils" % "commons-beanutils" % "1.9.1" exclude("commons-logging", "commons-logging"),
libraryDependencies += "commons-logging" % "commons-logging" % "1.1.1" % "provided",
libraryDependencies += "org.slf4j" % "jcl-over-slf4j" % "1.7.7",
libraryDependencies += "org.scalatra" %% "scalatra" % scalatraVersion,
libraryDependencies += "org.scalatra" %% "scalatra-swagger" % scalatraVersion exclude("org.slf4j", "slf4j-log4j12"),
libraryDependencies += "org.scalatra" %% "scalatra-scalate" % scalatraVersion,
libraryDependencies += "org.scalatra" %% "scalatra-json" % scalatraVersion,
libraryDependencies += "org.eclipse.jetty" % "jetty-webapp" % "9.0.2.v20130417",
libraryDependencies += "org.json4s" %% "json4s-jackson" % "3.2.4",
libraryDependencies += "org.json4s" %% "json4s-native" % "3.2.4",
libraryDependencies += "io.spray" % "spray-client" % sprayVersion,
libraryDependencies += "org.scalatest" % "scalatest_2.10" % "1.9.1" % "test",
libraryDependencies += "org.scalatra" %% "scalatra-scalatest" % scalatraVersion % "test",
libraryDependencies += "com.typesafe.akka" % "akka-testkit_2.10" % akkaVersion % "test",
testOptions += Setup(cl => cl.loadClass("org.slf4j.LoggerFactory").
getMethod("getLogger",cl.loadClass("java.lang.String")).
invoke(null,"ROOT"))
)
)
.settings(net.virtualvoid.sbt.graph.Plugin.graphSettings: _*)
}
Looks like an issue with SbtOneJar - you may want to file an issue with them directly.
You might be able to change the settings- eg: mappings in oneJar[1] although your milage may vary. Alternatively, look for a library that supports provided [2].
[1] https://github.com/sbt/sbt-onejar/blob/master/src/main/scala/com/github/retronym/SbtOneJar.scala#L37
[2] https://github.com/sbt/sbt-assembly#-provided-configuration