Not able run code on spark-submit - scala

I have this code of scala which I want to run on terminal using spark-submit command. There seems to no problem while running it in intellij IDE.
Code
package com.scryAnalytics.NLPAnnotationController.Work
import java.net.MalformedURLException
import java.util.{ArrayList, Arrays}
import com.scryAnalytics.NLPAnnotationController.Configuration.{VOCPConstants, VocpConfiguration}
import com.scryAnalytics.NLPAnnotationController.DAO.NLPEntitiesDAO
import com.scryAnalytics.NLPGeneric.{NLPEntities, _}
import com.vocp.ner.main.GateNERImpl
import gate.util.GateException
import org.apache.hadoop.hbase.client.{HBaseAdmin, Put}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{MultiTableOutputFormat, TableInputFormat, TableOutputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HTableDescriptor, TableName}
import org.apache.hadoop.mapreduce.Job
import org.apache.log4j.Logger
import org.apache.spark.{SparkConf, SparkContext}
class NLPProcessingLog {
var log: Logger = Logger.getLogger(classOf[NLPProcessingLog])
log.info("Logger Initialized .....")
}
object NlpProcessing {
val logger = new NLPProcessingLog
#throws(classOf[Exception])
def nlpAnnotationExtraction(conf: org.apache.hadoop.conf.Configuration, batchString: String): Int = {
logger.log.info("In Main Object..")
//Initializing Spark Context
val sc = new SparkContext(new SparkConf().setAppName("NLPAnnotationController").setMaster("local"))
val batchId =
if (batchString == "newbatch")
java.lang.Long.toString(System.currentTimeMillis())
else batchString
conf.set("batchId", batchId)
val inputCfs = Arrays.asList(conf.get(VOCPConstants.INPUTCOLUMNFAMILIES).split(","): _*)
try {
conf.set(TableInputFormat.INPUT_TABLE, conf.get(VOCPConstants.INPUTTABLE))
conf.set(TableOutputFormat.OUTPUT_TABLE, conf.get(VOCPConstants.OUTPUTTABLE))
val job: Job = Job.getInstance(conf, "NLPAnnotationJob")
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, conf.get(VOCPConstants.OUTPUTTABLE))
job.setOutputFormatClass(classOf[MultiTableOutputFormat])
val admin = new HBaseAdmin(conf)
if (!admin.isTableAvailable(conf.get(VOCPConstants.OUTPUTTABLE))) {
val tableDesc = new HTableDescriptor(TableName.valueOf(conf.get(VOCPConstants.OUTPUTTABLE)))
admin.createTable(tableDesc)
}
val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val processedFilteredRDD = hBaseRDD.map(x => x._2).filter { result =>
val flag = Bytes.toString(result.getValue(Bytes.toBytes("f"),
Bytes.toBytes("is_processed")))
(flag == null) || (flag == 0)
}
println(processedFilteredRDD.count())
val messageRDD = processedFilteredRDD.filter { x => x != null }.map { result =>
val message = Bytes.toString(result.getValue(Bytes.toBytes("p"),
Bytes.toBytes("message")))
(Bytes.toString(result.getRow()), message)
}
println("Number of partitions " + messageRDD.getNumPartitions)
val pluginHome = conf.get(VOCPConstants.GATE_PLUGIN_ARCHIVE)
val requiredNLPEntities = new ArrayList[NLPEntities]()
requiredNLPEntities.add(NLPEntities.POS_TAGGER)
requiredNLPEntities.add(NLPEntities.VP_CHUNKER)
requiredNLPEntities.add(NLPEntities.NP_CHUNKER)
val nlpGenericRDD = messageRDD.mapPartitions { iter =>
val nlpModule = new GateGenericNLP(pluginHome, requiredNLPEntities)
iter.map { x =>
val nlpGenericJson = nlpModule.generateNLPEntities(x._2)
val genericNLPObject = Utility.jsonToGenericNLP(nlpGenericJson)
(x._1, x._2, genericNLPObject)
}
}
val requiredNEREntities = new ArrayList[String]()
requiredNEREntities.add("DRUG")
requiredNEREntities.add("SE")
requiredNEREntities.add("REG")
requiredNEREntities.add("ALT_THERAPY")
requiredNEREntities.add("ALT_DRUG")
val nlpRDD = nlpGenericRDD.mapPartitions { iter =>
val nerModule = new GateNERImpl(pluginHome, requiredNEREntities)
iter.map { x =>
val nerJson = nerModule.generateNER(x._2, Utility.objectToJson(x._3))
val nerJsonObject = Utility.jsonToGateNer(nerJson)
val nlpEntities: NLPEntitiesDAO = new NLPEntitiesDAO
nlpEntities.setToken(x._3.getToken())
nlpEntities.setSpaceToken(x._3.getSpaceToken())
nlpEntities.setSentence(x._3.getSentence())
nlpEntities.setSplit(x._3.getSplit())
nlpEntities.setVG(x._3.getVG)
nlpEntities.setNounChunk(x._3.getNounChunk)
nlpEntities.setDRUG(nerJsonObject.getDRUG())
nlpEntities.setREG(nerJsonObject.getREG())
nlpEntities.setSE(nerJsonObject.getSE())
nlpEntities.setALT_DRUG(nerJsonObject.getALT_DRUG())
nlpEntities.setALT_THERAPY(nerJsonObject.getALT_THERAPY())
(x._1, nlpEntities)
}
}
//outputRDD.foreach(println)
val newRDD = nlpRDD.map { k => convertToPut(k) }
newRDD.saveAsNewAPIHadoopDataset(job.getConfiguration())
return 0
} catch {
case e: MalformedURLException => {
e.printStackTrace()
return 1
}
case e: GateException =>
{
e.printStackTrace()
return 1
}
}
}
def convertToPut(genericNlpWithRowKey: (String, NLPEntitiesDAO)): (ImmutableBytesWritable, Put) = {
val rowkey = genericNlpWithRowKey._1
val genericNLP = genericNlpWithRowKey._2
val put = new Put(Bytes.toBytes(rowkey))
val genCFDataBytes = Bytes.toBytes("gen")
val nerCFDataBytes = Bytes.toBytes("ner")
val flagCFDataBytes = Bytes.toBytes("f")
put.add(genCFDataBytes, Bytes.toBytes("token"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getToken()))));
put.add(genCFDataBytes, Bytes.toBytes("spaceToken"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getSpaceToken()))));
put.add(genCFDataBytes, Bytes.toBytes("sentence"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getSentence()))));
put.add(genCFDataBytes, Bytes.toBytes("verbGroup"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getVG()))));
put.add(genCFDataBytes, Bytes.toBytes("split"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getSplit()))));
put.add(genCFDataBytes, Bytes.toBytes("nounChunk"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getNounChunk()))));
put.add(nerCFDataBytes, Bytes.toBytes("drug"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getDRUG()))))
put.add(nerCFDataBytes, Bytes.toBytes("sideEffect"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getSE()))))
put.add(nerCFDataBytes, Bytes.toBytes("regimen"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getREG()))))
put.add(nerCFDataBytes, Bytes.toBytes("altTherapy"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getALT_THERAPY()))))
put.add(nerCFDataBytes, Bytes.toBytes("altDrug"),
Bytes.toBytes(Utility.objectToJson((genericNLP.getALT_DRUG()))))
put.add(flagCFDataBytes, Bytes.toBytes("is_processed"),
Bytes.toBytes("1"))
put.add(flagCFDataBytes, Bytes.toBytes("dStatus"),
Bytes.toBytes("0"))
put.add(flagCFDataBytes, Bytes.toBytes("rStatus"),
Bytes.toBytes("0"))
put.add(flagCFDataBytes, Bytes.toBytes("adStatus"),
Bytes.toBytes("0"))
put.add(flagCFDataBytes, Bytes.toBytes("atStatus"),
Bytes.toBytes("0"))
(new ImmutableBytesWritable(Bytes.toBytes(rowkey)), put)
}
def pipeLineExecute(args: Array[String]): Int = {
var batchString = ""
val usage = "Usage: NLPAnnotationController" + " -inputTable tableName -outputTable tableName" +
" -batchId batchId / -newbatch \n"
if (args.length == 0) {
System.err.println(usage)
return -1
}
val conf = VocpConfiguration.create
for (i <- 0 until args.length by 2) {
if ("-inputTable" == args(i)) {
conf.set(VOCPConstants.INPUTTABLE, args(i + 1))
} else if ("-outputTable" == args(i)) {
conf.set(VOCPConstants.OUTPUTTABLE, args(i + 1))
} else if ("-batchId" == args(i)) {
batchString = args(i)
} else if ("-newbatch" == args(i)) {
batchString = "newbatch"
} else {
throw new IllegalArgumentException("arg " + args(i) + " not recognized")
}
}
val result = nlpAnnotationExtraction(conf, batchString)
result
}
def main(args: Array[String]) {
val res = pipeLineExecute(args)
System.exit(res)
}
}
I am trying to make a fat jar file to be executed using spark-submit.
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.scryAnalytics</groupId>
<artifactId>NLPAnnotationController</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>NLPAnnotationController2</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>2.6.0-cdh5.7.2</hadoop.version>
<jdk.version>1.7</jdk.version>
<sdk.version>2.10.5</sdk.version>
<hbase.version>0.98.16-hadoop2</hbase.version>
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>scala-tools.org</id>
<name>Scala-tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</pluginRepository>
</pluginRepositories>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.10.5</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.10</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-spark</artifactId>
<version>1.2.0-cdh5.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit-dep</artifactId>
<version>4.8.2</version>
</dependency>
<dependency>
<groupId>uk.ac.gate</groupId>
<artifactId>gate-core</artifactId>
<version>8.1</version>
</dependency>
<dependency>
<groupId>uk.ac.gate</groupId>
<artifactId>gate-compiler-jdt</artifactId>
<version>4.3.2-P20140317-1600</version>
</dependency>
<dependency>
<groupId>com.thoughtworks.xstream</groupId>
<artifactId>xstream</artifactId>
<version>1.4.8</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>com.scryAnalytics</groupId>
<artifactId>NLPGeneric</artifactId>
<version>1.1</version>
</dependency>
<dependency>
<groupId>NER</groupId>
<artifactId>NER</artifactId>
<version>1.2</version>
</dependency>
</dependencies>
<build>
<finalName>NLPAnnotationController</finalName>
<plugins>
<!-- download source code in Eclipse, best practice -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.9</version>
<configuration>
<downloadSources>true</downloadSources>
<downloadJavadocs>false</downloadJavadocs>
</configuration>
</plugin>
<!-- Set a compiler level -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>${jdk.version}</source>
<target>${jdk.version}</target>
</configuration>
</plugin>
<!-- Maven Assembly Plugin -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<!-- get all project dependencies -->
<descriptors>
<descriptor>src/main/assembly/hadoop-job.xml</descriptor>
</descriptors>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<!-- MainClass in mainfest make a executable jar -->
<archive>
<manifest>
<mainClass>com.scryAnalytics.NLPAnnotationController.Work.NlpProcessing</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<!-- bind to the packaging phase -->
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
<resources>
<resource>
<directory>conf</directory>
</resource>
</resources>
</build>
Error
spark-submit target/NLPAnnotationController-job.jar -inputTable posts -outputTable posts -batchId 1
java.lang.ClassNotFoundException: com.scryAnalytics.NLPAnnotationController.Work.NlpProcessing
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:278)
at org.apache.spark.util.Utils$.classForName(Utils.scala:174)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:689)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
As I have said, the works perfectly fine on intellij.
Any help would be appreciated.

Related

I cannot used dynamic valude during the request time in gatling scala

Please help me.
I write this code.
When I want to get a dynamic value and when I use this dynamic value during the request body it does not generate or I cannot use this value.
The request body is written as this type.
body:FormUrlEncodedRequestBody{patchedContentType='null', charset=UTF-8, content=age=30&name=Test+Name&description=Test+Request&token1=%23%7Btoken1%7D&token2=%23%7Btoken2%7D}
I write 2 example userFeeder1 and userFeeder2
My code is there
package tests
import io.gatling.core.Predef.*
import io.gatling.core.feeder.Feeder
import io.gatling.core.scenario.Simulation
import io.gatling.http.Predef.*
class FirstTestCase extends Simulation {
private val httpProtocol = http
.baseUrl("https://test.test.com")
// .inferHtmlResources(BlackList(""".*\.js""", """.*\.css""", """.*\.gif""", """.*\.jpeg""", """.*\.jpg""", """.*\.ico""", """.*\.woff""", """.*\.woff2""", """.*\.(t|o)tf""", """.*\.png""", """.*detectportal\.firefox\.com.*"""), WhiteList())
.acceptHeader("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
.acceptEncodingHeader("gzip, deflate")
.acceptLanguageHeader("en-US,en;q=0.9,az;q=0.8,tr;q=0.7")
.contentTypeHeader("application/x-www-form-urlencoded")
.originHeader("https://test.test.com")
.upgradeInsecureRequestsHeader("1")
.userAgentHeader("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36")
private val headers_0 = Map(
"sec-ch-ua" -> """Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104""",
"sec-ch-ua-mobile" -> "?0",
"sec-ch-ua-platform" -> "macOS",
"sec-fetch-dest" -> "document",
"sec-fetch-mode" -> "navigate",
"sec-fetch-site" -> "same-site",
"sec-fetch-user" -> "?1"
)
val Age = "30"
val Name = "Test Name"
val Description = "Test Request"
def generateToken: Map[String, String] = {
val tokens = (Age.length + Age + Name.length + Name + Description.length + Description).toString().toLowerCase()
Map(
"token1" -> tokens
)
}
val userFeeder1: Feeder[String] = Iterator.continually(generateToken)
val userFeeder2: Feeder[Any] =
Iterator.continually(
Map(
"token2" -> "dfngvdndksfdslfkdsergerfewrwehewrhwefnsdnf"
)
)
val search =
exec(http("request_0")
.post("/test/test")
.formParam("age", _ => Age)
.formParam("name", _ => Name)
.formParam("description", _ => Description)
.formParam("token1", _ => "#{token1}")
.formParam("token2", _ => "#{token2}")
)
val scn = scenario("Scenario Name")
.feed(userFeeder1)
.feed(userFeeder2)
.exec(search)
{
setUp(
scn.inject(rampUsers(1).during(1))
).protocols(httpProtocol)
}
}
pom XML is there
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.gatling.tests</groupId>
<artifactId>gatling_scala_project</artifactId>
<!-- <version>2.0-SNAPSHOT</version>-->
<!-- <properties>-->
<!-- <maven.compiler.source>1.8</maven.compiler.source>-->
<!-- <maven.compiler.target>1.8</maven.compiler.target>-->
<!-- <encoding>UTF-8</encoding>-->
<!-- <gatling.version>3.8.4</gatling.version>-->
<!-- <gatling-maven-plugin.version>4.2.7</gatling-maven-plugin.version>-->
<!-- <maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version>-->
<!-- <maven-jar-plugin.version>3.2.2</maven-jar-plugin.version>-->
<!-- </properties>-->
<version>3.8.4</version>
<properties>
<!-- use the following if you're compiling with JDK 8-->
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<!-- comment the 2 lines above and uncomment the line bellow if you're compiling with JDK 11 or 17 -->
<!-- <maven.compiler.release>11</maven.compiler.release>-->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<gatling.version>${project.version}</gatling.version>
<gatling-maven-plugin.version>4.2.7</gatling-maven-plugin.version>
<maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version>
<maven-jar-plugin.version>3.2.2</maven-jar-plugin.version>
</properties>
<dependencies>
<dependency>
<groupId>io.gatling.highcharts</groupId>
<artifactId>gatling-charts-highcharts</artifactId>
<version>${gatling.version}</version>
</dependency>
<dependency>
<groupId>io.gatling</groupId>
<artifactId>gatling-app</artifactId>
<version>${gatling.version}</version>
</dependency>
<dependency>
<groupId>io.gatling</groupId>
<artifactId>gatling-recorder</artifactId>
<version>${gatling.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.11</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>;
<artifactId>guava</artifactId>
<version>23.0</version>
</dependency>
</dependencies>
<build>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>io.gatling</groupId>
<artifactId>gatling-maven-plugin</artifactId>
<version>${gatling-maven-plugin.version}</version>
</plugin>
</plugins>
</build>
</project>
Do I need to download any plugins?

itext java.lang.NoClassDefFoundError: DefaultAccessibilityProperties

I don't understand why I get an exception in this very basic test of iText :
package com.itextpdf.testpdf4;
import com.itextpdf.io.font.FontConstants;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.List;
import com.itextpdf.layout.element.ListItem;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.text.DocumentException;
import com.itextpdf.licensekey.LicenseKey;
import com.itextpdf.test.annotations.WrapToTest;
import java.io.File;
import java.io.IOException;
#WrapToTest
public class HelloWorld {
public static final String DEST = "result/hello.pdf";
public static void main(String[] args)
throws DocumentException, IOException {
LicenseKey.loadLicenseFile("C:\\dev\\testPDF4\\src\\main\\java\\com\\itextpdf\\testpdf4\\itextkey1544447451310_0.xml");
File file = new File(DEST);
file.getParentFile().mkdirs();
new HelloWorld().createPdf(DEST);
}
public void createPdf(String dest) throws DocumentException, IOException {
PdfWriter writer = new PdfWriter(dest);
//Initialize PDF document
PdfDocument pdf = new PdfDocument(writer);
// Initialize document
Document document = new Document(pdf);
// Create a PdfFont
PdfFont font = PdfFontFactory.createFont(FontConstants.TIMES_ROMAN);
// Add a Paragraph
document.add(new Paragraph("iText is:").setFont(font));
// Create a List
List list = new List()
.setSymbolIndent(12)
.setListSymbol("\u2022")
.setFont(font);
// Add ListItem objects
list.add(new ListItem("Never gonna give you up"))
.add(new ListItem("Never gonna let you down"))
.add(new ListItem("Never gonna run around and desert you"))
.add(new ListItem("Never gonna make you cry"))
.add(new ListItem("Never gonna say goodbye"))
.add(new ListItem("Never gonna tell a lie and hurt you"));
// Add the list
document.add(list);
//Close document
document.close();
}
}
Exception in thread "main" java.lang.NoClassDefFoundError:
com/itextpdf/kernel/pdf/tagutils/DefaultAccessibilityProperties at
com.itextpdf.testpdf4.HelloWorld.createPdf(HelloWorld.java:56)
(line 56 is : document.add(new Paragraph("iText is:").setFont(font)); )
This code comes from here: https://developers.itextpdf.com/fr/content/itext-7-jump-start-tutorial/examples/chapter-1 -> C01E02_RickAstley.java
In the POM.XML :
<modelVersion>4.0.0</modelVersion>
<groupId>com.itextpdf</groupId>
<artifactId>testPDF4</artifactId>
<version>1.0</version>
(package is : package com.itextpdf.testpdf4;)
Here is the complete POM.XML :
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.itextpdf</groupId>
<artifactId>testPDF4</artifactId>
<version>1.0</version>
<properties>
<itext.version>7.1.4</itext.version>
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<junit.version>4.12</junit.version>
</properties>
<repositories>
<repository>
<id>itext</id>
<name>iText Repository - releases</name>
<url>https://repo.itextsupport.com/releases</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>kernel</artifactId>
<version>7.0.4</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>io</artifactId>
<version>7.0.4</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>layout</artifactId>
<version>7.1.4</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>forms</artifactId>
<version>7.0.4</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>pdfa</artifactId>
<version>7.0.4</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>pdftest</artifactId>
<version>7.0.4</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.18</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-licensekey</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-sandbox-parent</artifactId>
<version>2</version>
<type>pom</type>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
<excludes>
<exclude>**/*.p12</exclude>
</excludes>
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.0</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<configuration>
<excludePackageNames>com.itextpdf.xml</excludePackageNames>
</configuration>
<executions>
<execution>
<id>attach-javadocs</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>external.atlassian.jgitflow</groupId>
<artifactId>jgitflow-maven-plugin</artifactId>
<version>1.0-m5.1</version>
<configuration>
<!-- see goals wiki page for configuration options -->
<flowInitContext>
<masterBranchName>master</masterBranchName>
<developBranchName>develop</developBranchName>
<featureBranchPrefix>feature/</featureBranchPrefix>
<releaseBranchPrefix>release/</releaseBranchPrefix>
<hotfixBranchPrefix>hotfix/</hotfixBranchPrefix>
<versionTagPrefix />
</flowInitContext>
<allowUntracked>true</allowUntracked>
<autoVersionSubmodules>true</autoVersionSubmodules>
<updateDependencies>true</updateDependencies>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>public</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
<configuration>
<excludes>
<exclude>com/itextpdf/xml/**</exclude>
<exclude>**/*.p12</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>internal</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
<configuration>
<excludes>
<exclude>**/*.p12</exclude>
</excludes>
<classifier>INTERNAL</classifier>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
Does anyone see something wrong ? I don't
Thanks
You're mixing different core itext artifact versions, 7.0.4 and 7.1.4.
...
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>io</artifactId>
<version>7.0.4</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>layout</artifactId>
<version>7.1.4</version>
<type>jar</type>
</dependency>
...
Don't mix these. Use the same version of all your core itext artifacts.
By the way, you put your test project into the itext group:
<groupId>com.itextpdf</groupId>
<artifactId>testPDF4</artifactId>
You shouldn't do that, in particular not with production use projects.
Thanks a lot mkl ;
Bad group IP, Bad versions, and a wrong nbaction.xml

Unable To Read Messages From Kafka Topic Using Spark Streaming Kafka

The following code is meant to read messages from Kafka using Spark Submit.
The code executes and terminates without errors but reads no messages(The output file is empty and the log inside rdd.foreachPartition does not print).Please indicate what i am missing.
package hive;
import java.net.URI;
import java.util.*;
import org.apache.spark.SparkConf;
import org.apache.spark.TaskContext;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.StreamingContext;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka010.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import scala.Tuple2;
public class SparkKafka1 {
private static final Logger logger = LoggerFactory.getLogger(SparkKafka1.class);
public static void main(String[] args) {
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "http://192.168.1.214:9092,http://192.168.1.214:9093");
kafkaParams.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
kafkaParams.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//kafkaParams.put("group.id", "StreamingGroup");
kafkaParams.put("auto.offset.reset", "smallest");
kafkaParams.put("enable.auto.commit", false);
String user = "ankit";
String password = "noida#123";
Collection<String> topics = Arrays.asList("StreamingTopic");
SparkConf conf = new SparkConf().setMaster("spark://192.168.1.214:7077")
.set("spark.deploy.mode", "cluster").set("user",user)
.set("password",password).set("spark.driver.memory", "1g").set("fs.defaultFS", "hdfs://192.168.1.214:9000")
.setAppName("NetworkWordCount");
JavaStreamingContext streamingContext = new JavaStreamingContext(conf,new Duration(500));
JavaInputDStream<ConsumerRecord<String, String>> stream =
KafkaUtils.createDirectStream(
streamingContext,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
);
stream.mapToPair(record -> new Tuple2<>(record.key(), record.value()));
stream.foreachRDD(rdd ->{
rdd.foreachPartition(item ->{
while (item.hasNext()) {
System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>"+item.next());
logger.info("next item="+item.next());
}
});
});
logger.info("demo log="+stream.count());
stream.foreachRDD(rdd -> {
OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
rdd.foreachPartition(consumerRecords -> {
OffsetRange o = offsetRanges[TaskContext.get().partitionId()];
System.out.println(
o.topic() + " " + o.partition() + " " + o.fromOffset() + " " + o.untilOffset());
rdd.saveAsTextFile("/home/ankit/work/warehouse/Manish.txt");
logger.info("tokenizing inside processElement method");
});
});
}
}
The following is the pom.xml:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>SparkTest</groupId>
<artifactId>SparkTest</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>SparkTest</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.scala-lang/scala-library -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
<scope>provided </scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
<scope>provided </scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.1.0</version>
<scope>provided </scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.0</version>
<scope>provided </scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-flume_2.11</artifactId>
<version>2.1.0</version>
<scope>provided </scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<!-- or whatever version you use -->
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/LICENSE</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
<filter>
<artifact>org.apache.spark:spark-streaming-kafka-0-10_2.11</artifact>
<includes> <include>org/apache/spark/streaming/kafka010/**</include>
</includes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
The following command submits the job:
./spark-submit --class hive.SparkKafka1 --master spark://192.168.1.214:6066 --deploy-mode cluster --supervise --executor-memory 2G --total-executor-cores 4 hdfs://192.168.1.214:9000/input/SparkTest-0.0.1-SNAPSHOT.jar
i haven't run this program to see but it seems you are using kafka 0.10.2 and smallest is deprecated please use earliest instead.
You need add this two commands;
streamingContext.start();//start this app.
streamingContext.awaitTermination();//prevent this app close.
And I see you use http* value for bootstrap.servers. Delete the http prefix.
By the way, if you set spark conf in the code. It's useless set the same value in the command line.
Just check it. If the error exist as before. please let me know.

“value $ is not a member of StringContext” - Missing Scala plugin?

I'm using maven with scala archetype. I'm getting that error:
“value $ is not a member of StringContext”
I already tried to add several things in pom.xml, but nothing worked very well...
My code:
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit}
// To see less warnings
import org.apache.log4j._
Logger.getLogger("org").setLevel(Level.ERROR)
// Start a simple Spark Session
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder().getOrCreate()
// Prepare training and test data.
val data = spark.read.option("header","true").option("inferSchema","true").format("csv").load("USA_Housing.csv")
// Check out the Data
data.printSchema()
// See an example of what the data looks like
// by printing out a Row
val colnames = data.columns
val firstrow = data.head(1)(0)
println("\n")
println("Example Data Row")
for(ind <- Range(1,colnames.length)){
println(colnames(ind))
println(firstrow(ind))
println("\n")
}
////////////////////////////////////////////////////
//// Setting Up DataFrame for Machine Learning ////
//////////////////////////////////////////////////
// A few things we need to do before Spark can accept the data!
// It needs to be in the form of two columns
// ("label","features")
// This will allow us to join multiple feature columns
// into a single column of an array of feautre values
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.linalg.Vectors
// Rename Price to label column for naming convention.
// Grab only numerical columns from the data
val df = data.select(data("Price").as("label"),$"Avg Area Income",$"Avg Area House Age",$"Avg Area Number of Rooms",$"Area Population")
// An assembler converts the input values to a vector
// A vector is what the ML algorithm reads to train a model
// Set the input columns from which we are supposed to read the values
// Set the name of the column where the vector will be stored
val assembler = new VectorAssembler().setInputCols(Array("Avg Area Income","Avg Area House Age","Avg Area Number of Rooms","Area Population")).setOutputCol("features")
// Use the assembler to transform our DataFrame to the two columns
val output = assembler.transform(df).select($"label",$"features")
// Create a Linear Regression Model object
val lr = new LinearRegression()
// Fit the model to the data
// Note: Later we will see why we should split
// the data first, but for now we will fit to all the data.
val lrModel = lr.fit(output)
// Print the coefficients and intercept for linear regression
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
// Summarize the model over the training set and print out some metrics!
// Explore this in the spark-shell for more methods to call
val trainingSummary = lrModel.summary
println(s"numIterations: ${trainingSummary.totalIterations}")
println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
trainingSummary.residuals.show()
println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
println(s"MSE: ${trainingSummary.meanSquaredError}")
println(s"r2: ${trainingSummary.r2}")
and my pom.xml is that:
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>test</groupId>
<artifactId>outrotest</artifactId>
<version>1.0-SNAPSHOT</version>
<name>${project.artifactId}</name>
<description>My wonderfull scala app</description>
<inceptionYear>2015</inceptionYear>
<licenses>
<license>
<name>My License</name>
<url>http://....</url>
<distribution>repo</distribution>
</license>
</licenses>
<properties>
<maven.compiler.source>1.6</maven.compiler.source>
<maven.compiler.target>1.6</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.11.5</scala.version>
<scala.compat.version>2.11</scala.compat.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>com.databricks</groupId>
<artifactId>spark-csv_2.11</artifactId>
<version>1.5.0</version>
</dependency>
<!-- Test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.specs2</groupId>
<artifactId>specs2-junit_${scala.compat.version}</artifactId>
<version>2.4.16</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.specs2</groupId>
<artifactId>specs2-core_${scala.compat.version}</artifactId>
<version>2.4.16</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.compat.version}</artifactId>
<version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<!-- see http://davidb.github.com/scala-maven-plugin -->
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.0</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<!--<arg>-make:transitive</arg>-->
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.18.1</version>
<configuration>
<useFile>false</useFile>
<disableXmlReport>true</disableXmlReport>
<!-- If you have classpath issue like NoDefClassError,... -->
<!-- useManifestOnlyJar>false</useManifestOnlyJar -->
<includes>
<include>**/*Test.*</include>
<include>**/*Suite.*</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</project>
I have no idea about how to fix it. Does anybody have any idea?
Add this.. it will work
val spark = SparkSession.builder().getOrCreate()
import spark.implicits._ // << add this
You can use the col function instead just import it like this :
import org.apache.spark.sql.functions.col
And then change the $"column" to col("column")
Hope it helps
#Apurva's answer initially worked for me in that the error vanished from IntelliJ
But then it resulted in "Could not find implicit value for spark" during sbt compile phase
I found a strange work-around by importing spark.implicits._ from SparkSession referenced from DataFrame instead of one obtained by getOrCreate
import df.sparkSession.implicits._
where df is a DataFrame
This could be because my code was placed inside a case class that received an implicit val spark: SparkSession parameter; but I'm not really sure as to why this fix worked for me
I'm using spark 1.6. The above answers are great but unfortunately doesn't work in 1.6
The way I solved it was by using df.col("column-name")
val df = df_mid
.withColumn("dt", date_format(df_mid.col("timestamp"), "yyyy-MM-dd"))
.filter("dt != 'null'")

java.lang.NoSuchMethodError: scala.reflect.api.JavaUniverse.runtimeMirror

java.lang.NoSuchMethodError: scala.reflect.api.JavaUniverse.runtimeMirror(Ljava/lang/ClassLoader;)Lscala/reflect/api/JavaMirrors$JavaMirror;
at org.elasticsearch.spark.serialization.ReflectionUtils$.org$elasticsearch$spark$serialization$ReflectionUtils$$checkCaseClass(ReflectionUtils.scala:42)
at org.elasticsearch.spark.serialization.ReflectionUtils$$anonfun$checkCaseClassCache$1.apply(ReflectionUtils.scala:84)
it is seems scala version uncompatible,but i see the document of spark ,spark 2.10 and scala 2.11.8 is ok.
that is my pom.xml and that is just a test for spark to write to elasticsearch with es-hadoop,i have no idea how to solve this exception. `
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.jhTian</groupId>
<artifactId>sparkLink</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>${project.artifactId}</name>
<description>My wonderfull scala app</description>
<inceptionYear>2015</inceptionYear>
<licenses>
<license>
<name>My License</name>
<url>http://....</url>
<distribution>repo</distribution>
</license>
</licenses>
<properties>
<encoding>UTF-8</encoding>
<scala.version>2.11.8</scala.version>
<scala.compat.version>2.11</scala.compat.version>
</properties>
<repositories>
<repository>
<id>ainemo</id>
<name>xylink</name>
<url>http://10.170.209.180:8081/nexus/content/groups/public/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.4</version><!-- 2.64 -->
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!--<dependency>-->
<!--<groupId>org.scala-lang</groupId>-->
<!--<artifactId>scala-compiler</artifactId>-->
<!--<version>${scala.version}</version>-->
<!--</dependency>-->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.4</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>5.3.0 </version>
</dependency>
<!-- Test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.specs2</groupId>
<artifactId>specs2-core_${scala.compat.version}</artifactId>
<version>2.4.16</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.compat.version}</artifactId>
<version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>'
this is my code
import org.apache.spark.{SparkConf, SparkContext}
import org.elasticsearch.spark._
/**
* Created by jhTian on 2017/4/19.
*/
object EsWrite {
def main(args: Array[String]) {
val sparkConf = new SparkConf()
.set("es.nodes", "1.1.1.1")
.set("es.port", "9200")
.set("es.index.auto.create", "true")
.setAppName("es-spark-demo")
val sc = new SparkContext(sparkConf)
val job1 = Job("C开发工程师","http://job.c.com","c公司","10000")
val job2 = Job("C++开发工程师","http://job.c++.com","c++公司","10000")
val job3 = Job("C#开发工程师","http://job.c#.com","c#公司","10000")
val job4 = Job("Java开发工程师","http://job.java.com","java公司","10000")
val job5 = Job("Scala开发工程师","http://job.scala.com","java公司","10000")
// val numbers = Map("one" -> 1, "two" -> 2, "three" -> 3)
// val airports = Map("arrival" -> "Otopeni", "SFO" -> "San Fran")
// val rdd=sc.makeRDD(Seq(numbers,airports))
val rdd=sc.makeRDD(Seq(job1,job2,job3,job4,job5))
rdd.saveToEs("job/info")
sc.stop()
}
}
case class Job(jobName:String, jobUrl:String, companyName:String, salary:String)'
Generally NoSuchMethodError implies the caller was compiled with a different version than was found on the classpath at runtime (or you have multiple versions on the CP).
In your case, I'd guess that es-hadoop is built against a different version of Scala I've not used maven in a little while but I think the command you need to get some useful into is mvn depdencyTree. Use the output to see which version of Scala es-hadoop is built with and then configure your project to use the same Scala version.
To get stable/reproducible builds I'd recommend using something like the maven-enforcer-plugin:
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.4.1</version>
<executions>
<execution>
<id>enforce</id>
<configuration>
<rules>
<dependencyConvergence />
</rules>
</configuration>
<goals>
<goal>enforce</goal>
</goals>
</execution>
</executions>
</plugin>
it can be annoying initially but once you have all your dependencies sorted you shouldn't get issues like this anymore.
use dependency like this
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_2.11</artifactId>
<version>5.2.2</version>
</dependency>
for spark 2.0 and scala 2.11