Here is the code:
package scalafx.ensemble.example.charts
import javafx.scene.chart.PieChart.Data
import scala.util.Random
import scalafx.application.JFXApp
import scalafx.scene.Scene
import scalafx.Includes._
import scalafx.collections.ObservableBuffer
import scalafx.scene.chart.PieChart
import scalafx.scene.input.MouseEvent
object BasicPie extends JFXApp {
// data level 1
val pieChartDataBuffer: ObservableBuffer[Data] = ObservableBuffer(
PieChart.Data("A", 20),
PieChart.Data("B", 30),
PieChart.Data("C", 10),
PieChart.Data("D", 40)
)
pieChartDataBuffer.foreach(
(data: Data) => data.node().onMouseClicked = {
(_: MouseEvent) => pieChart.data = pieChartDataSubBuffer(data.name())
}
)
val pieChart = new PieChart {
data = pieChartDataBuffer
title = "DrillDown Pie Chart"
}
// data level 2
val pieChartDataSubBuffer: Map[String, ObservableBuffer[Data]] = _
Array("A", "B", "C", "D").foreach(
(letter: String) => {
val subData: ObservableBuffer[Data] = _
(1 to 10).foreach(
i => {
val subName = letter + "-" + i
subData.add(PieChart.Data(subName, Random.nextInt(100)))
}
)
pieChartDataSubBuffer.updated(letter, subData)
}
)
pieChartDataSubBuffer.foreach(
(nameDataBufferTuple) => {
nameDataBufferTuple._2.foreach(
(data: Data) => {
data.node().onMouseClicked = {
(_: MouseEvent) => pieChart.data = pieChartDataBuffer
}
}
)
}
)
// set listener for one data node
def drillDownData = (pieData: Data, pie: PieChart) => {
pieData.node().onMouseClicked = (_: MouseEvent) => pie.data = pieChartDataSubBuffer(pieData.name())
}
stage = new JFXApp.PrimaryStage {
title = "Drilldown Pie Chart Example"
scene = new Scene {
root = pieChart
}
}
}
One would expect subData to be initialized with null, but instead of that I get an error: Unbound placeholder parameter. Why?
You cannot create an uninitialized val, because you can never assign anything useful to it afterwards.
Say you make that explicit:
val subData: ObservableBuffer[Data] = null
subData = ObservableBuffer.empty -> error: reassignment to val!
subData.add(data) -> NullPointerException
You either initialize it as a var
var subData: ObservableBuffer[Data] = _
or (especially in case of mutable collections) as empty buffer
val subData: ObservableBuffer[Data] = ObservableBuffer.empty // comes to mind
Your code would suggest that you would want to do the latter.
Just for the record, here is the modified version that compiles:
package scalafx.ensemble.example.charts
import javafx.scene.chart.PieChart.Data
import scala.util.Random
import scalafx.application.JFXApp
import scalafx.scene.Scene
import scalafx.Includes._
import scalafx.collections.ObservableBuffer
import scalafx.scene.chart.PieChart
import scalafx.scene.input.MouseEvent
import scala.collection.mutable.{HashMap => MMap}
object BasicPie extends JFXApp {
stage = new JFXApp.PrimaryStage {
title = "Drilldown Pie Chart Example"
scene = new Scene {
root = {
val pieChartDataBuffer = ObservableBuffer(
PieChart.Data("A", 20),
PieChart.Data("B", 30),
PieChart.Data("C", 10),
PieChart.Data("D", 40)
)
val pieChart = new PieChart {
data = pieChartDataBuffer
title = "DrillDown Pie Chart"
}
def subPieChartData: MMap[String, ObservableBuffer[Data]] = {
val subDataMap: MMap[String, ObservableBuffer[Data]] = MMap.empty
for(letter <- Array("A", "B", "C", "D")) {
val dataBuffer: ObservableBuffer[Data] = ObservableBuffer.empty
for(i <- 1 to 10) {
val subLabel = letter + i
dataBuffer.add(PieChart.Data(subLabel, Random.nextInt(100)))
}
subDataMap.update(letter, dataBuffer)
}
subDataMap
}
def drillDownData = (pie: PieChart, pieData: PieChart.Data) => {
val labelPrefix = pieData.name()
val subData = subPieChartData(pieData.name())
pieData.node().onMouseClicked = (_: MouseEvent) => pie.data = subData
subData.foreach(
(data) => climbUpData(pie, data)
)
}
def climbUpData = (pie: PieChart, pieData: PieChart.Data) => {
val node = pieData.node()
if(node != null) {
node.onMouseClicked = (_: MouseEvent) => pie.data = pieChartDataBuffer
}
}
pieChartDataBuffer.foreach((data: Data) => drillDownData(pieChart, data))
pieChart
}
}
}
}
Related
I define val like this :
val config = Config(args)
val product_type = config.product_type
then I send product_type as "AA"
and my code is this :
val scores = df.mapPartitions(iterator => {
val inputStream =
if(product_type == "AA" ) {
getClass().getClassLoader().getResourceAsStream("my_aa.hdf5")
}
else {
getClass().getClassLoader().getResourceAsStream("my_bb.hdf5")
}
val multiLayerNetwork: MultiLayerNetwork = KerasModelImport.importKerasSequentialModelAndWeights(inputStream, false)
val wrapped: ParallelInference = new ParallelInference.Builder(multiLayerNetwork).build()
val res = iterator.map(row => {
wrapped.output(row).toDoubleVector
})
res
})
But my inputStream equals "my_bb.hdf5" which is not correct. This value comes from else statement. So why my product_type variable cant read in mappartition?
I print my product_type value before code and I checked it , it is : "AA"
it occurs because of i get this variable from argument in spark submit.sh
and it can not read from mappartition.
It works like this:
val scores =
if (product_type == "AA") {
df.mapPartitions(iterator => {
val inputStream = getClass().getClassLoader().getResourceAsStream("AA.hdf5")
val multiLayerNetwork: MultiLayerNetwork = KerasModelImport.importKerasSequentialModelAndWeights(inputStream, false)
val wrapped: ParallelInference = new ParallelInference.Builder(multiLayerNetwork).build()
val res = iterator.map(row => {
wrapped.output(row).toDoubleVector
})
res
})
} else {
df.mapPartitions(iterator => {
val inputStream = getClass().getClassLoader().getResourceAsStream("BB.hdf5")
val multiLayerNetwork: MultiLayerNetwork = KerasModelImport.importKerasSequentialModelAndWeights(inputStream, false)
val wrapped: ParallelInference = new ParallelInference.Builder(multiLayerNetwork).build()
val res = iterator.map(row => {
wrapped.output(row).toDoubleVector
})
res
})
}
val patterns = ctx.getBroadcastState(patternStateDescriptor)
The imports I made
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.{MapStateDescriptor, ValueState, ValueStateDescriptor}
import org.apache.flink.api.scala.typeutils.Types
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.datastream.BroadcastStream
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
Here's the code
val env = StreamExecutionEnvironment.getExecutionEnvironment
val properties = new Properties()
properties.setProperty("bootstrap.servers","localhost:9092")
val patternStream = new FlinkKafkaConsumer010("patterns", new SimpleStringSchema, properties)
val patterns = env.addSource(patternStream)
var patternData = patterns.map {
str =>
val splitted_str = str.split(",")
PatternStream(splitted_str(0).trim, splitted_str(1).trim, splitted_str(2).trim)
}
val logsStream = new FlinkKafkaConsumer010("logs", new SimpleStringSchema, properties)
// logsStream.setStartFromEarliest()
val logs = env.addSource(logsStream)
var data = logs.map {
str =>
val splitted_str = str.split(",")
LogsTest(splitted_str.head.trim, splitted_str(1).trim, splitted_str(2).trim)
}
val keyedData: KeyedStream[LogsTest, String] = data.keyBy(_.metric)
val bcStateDescriptor = new MapStateDescriptor[Unit, PatternStream]("patterns", Types.UNIT, Types.of[PatternStream]) // first type defined is for the key and second data type defined is for the value
val broadcastPatterns: BroadcastStream[PatternStream] = patternData.broadcast(bcStateDescriptor)
val alerts = keyedData
.connect(broadcastPatterns)
.process(new PatternEvaluator())
alerts.print()
// println(alerts.getClass)
// val sinkProducer = new FlinkKafkaProducer010("output", new SimpleStringSchema(), properties)
env.execute("Flink Broadcast State Job")
}
class PatternEvaluator()
extends KeyedBroadcastProcessFunction[String, LogsTest, PatternStream, (String, String, String)] {
private lazy val patternStateDescriptor = new MapStateDescriptor("patterns", classOf[String], classOf[String])
private var lastMetricState: ValueState[String] = _
override def open(parameters: Configuration): Unit = {
val lastMetricDescriptor = new ValueStateDescriptor("last-metric", classOf[String])
lastMetricState = getRuntimeContext.getState(lastMetricDescriptor)
}
override def processElement(reading: LogsTest,
readOnlyCtx: KeyedBroadcastProcessFunction[String, LogsTest, PatternStream, (String, String, String)]#ReadOnlyContext,
out: Collector[(String, String, String)]): Unit = {
val metrics = readOnlyCtx.getBroadcastState(patternStateDescriptor)
if (metrics.contains(reading.metric)) {
val metricPattern: String = metrics.get(reading.metric)
val metricPatternValue: String = metrics.get(reading.value)
val lastMetric = lastMetricState.value()
val logsMetric = (reading.metric)
val logsValue = (reading.value)
if (logsMetric == metricPattern) {
if (metricPatternValue == logsValue) {
out.collect((reading.timestamp, reading.value, reading.metric))
}
}
}
}
override def processBroadcastElement(
update: PatternStream,
ctx: KeyedBroadcastProcessFunction[String, LogsTest, PatternStream, (String, String, String)]#Context,
out: Collector[(String, String, String)]
): Unit = {
val patterns = ctx.getBroadcastState(patternStateDescriptor)
if (update.metric == "IP") {
patterns.put(update.metric /*,update.operator*/ , update.value)
}
// else if (update.metric == "username"){
// patterns.put(update.metric, update.value)
// }
// else {
// println("No required data found")
// }
// }
}
}
Sample Data :- Logs Stream
"21/09/98","IP", "5.5.5.5"
Pattern Stream
"IP","==","5.5.5.5"
I'm unable to analyse data by getting desired result, i.e = 21/09/98,IP,5.5.5.5
There's no error as of now, it's just not analysing the data
The code is reading streams (Checked)
One common source of trouble in cases like this is that the API offers no control over the order in which the patterns and the data are ingested. It could be that processElement is being called before processBroadcastElement.
I have a protobuf file ...
which I transform to a Scala file using ScalaPB. This way I can then ...
use it inside my Juypter notebook* for transformation. Sadly, when I ...
run the specific cell I get an type
mismatch error and I don't know why?
As the protobuf file is working with Python and the Scala code is generated, what is not right here? Could this be a bug?
*The notebook uses com.google.protobuf:protobuf-java:3.5.0,com.thesamet.scalapb:sparksql-scalapb_2.11:0.7.0 as imports
Sources & Error
protobuf file:
syntax = "proto2";
import "scalapb/scalapb.proto";
option (scalapb.options) = {
flat_package: true
single_file: true
};
message JSONEntry {
required uint64 ts = 1;
required string data = 2;
}
message JSONOutput {
optional string metadata = 1;
repeated JSONEntry entry = 2;
}
Scala (generated) code
// Generated by the Scala Plugin for the Protocol Buffer Compiler.
// Do not edit!
//
// Protofile syntax: PROTO2
#SerialVersionUID(0L)
final case class JSONEntry(
ts: _root_.scala.Long,
data: _root_.scala.Predef.String
) extends scalapb.GeneratedMessage with scalapb.Message[JSONEntry] with scalapb.lenses.Updatable[JSONEntry] {
#transient
private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0
private[this] def __computeSerializedValue(): _root_.scala.Int = {
var __size = 0
__size += _root_.com.google.protobuf.CodedOutputStream.computeUInt64Size(1, ts)
__size += _root_.com.google.protobuf.CodedOutputStream.computeStringSize(2, data)
__size
}
final override def serializedSize: _root_.scala.Int = {
var read = __serializedSizeCachedValue
if (read == 0) {
read = __computeSerializedValue()
__serializedSizeCachedValue = read
}
read
}
def writeTo(`_output__`: _root_.com.google.protobuf.CodedOutputStream): Unit = {
_output__.writeUInt64(1, ts)
_output__.writeString(2, data)
}
def mergeFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): JSONEntry = {
var __ts = this.ts
var __data = this.data
var __requiredFields0: _root_.scala.Long = 0x3L
var _done__ = false
while (!_done__) {
val _tag__ = _input__.readTag()
_tag__ match {
case 0 => _done__ = true
case 8 =>
__ts = _input__.readUInt64()
__requiredFields0 &= 0xfffffffffffffffeL
case 18 =>
__data = _input__.readString()
__requiredFields0 &= 0xfffffffffffffffdL
case tag => _input__.skipField(tag)
}
}
if (__requiredFields0 != 0L) { throw new _root_.com.google.protobuf.InvalidProtocolBufferException("Message missing required fields.") }
JSONEntry(
ts = __ts,
data = __data
)
}
def withTs(__v: _root_.scala.Long): JSONEntry = copy(ts = __v)
def withData(__v: _root_.scala.Predef.String): JSONEntry = copy(data = __v)
def getFieldByNumber(__fieldNumber: _root_.scala.Int): scala.Any = {
(__fieldNumber: #_root_.scala.unchecked) match {
case 1 => ts
case 2 => data
}
}
def getField(__field: _root_.scalapb.descriptors.FieldDescriptor): _root_.scalapb.descriptors.PValue = {
require(__field.containingMessage eq companion.scalaDescriptor)
(__field.number: #_root_.scala.unchecked) match {
case 1 => _root_.scalapb.descriptors.PLong(ts)
case 2 => _root_.scalapb.descriptors.PString(data)
}
}
def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToUnicodeString(this)
def companion = JSONEntry
}
object JSONEntry extends scalapb.GeneratedMessageCompanion[JSONEntry] {
implicit def messageCompanion: scalapb.GeneratedMessageCompanion[JSONEntry] = this
def fromFieldsMap(__fieldsMap: scala.collection.immutable.Map[_root_.com.google.protobuf.Descriptors.FieldDescriptor, scala.Any]): JSONEntry = {
require(__fieldsMap.keys.forall(_.getContainingType() == javaDescriptor), "FieldDescriptor does not match message type.")
val __fields = javaDescriptor.getFields
JSONEntry(
__fieldsMap(__fields.get(0)).asInstanceOf[_root_.scala.Long],
__fieldsMap(__fields.get(1)).asInstanceOf[_root_.scala.Predef.String]
)
}
implicit def messageReads: _root_.scalapb.descriptors.Reads[JSONEntry] = _root_.scalapb.descriptors.Reads{
case _root_.scalapb.descriptors.PMessage(__fieldsMap) =>
require(__fieldsMap.keys.forall(_.containingMessage == scalaDescriptor), "FieldDescriptor does not match message type.")
JSONEntry(
__fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).get.as[_root_.scala.Long],
__fieldsMap.get(scalaDescriptor.findFieldByNumber(2).get).get.as[_root_.scala.Predef.String]
)
case _ => throw new RuntimeException("Expected PMessage")
}
def javaDescriptor: _root_.com.google.protobuf.Descriptors.Descriptor = DataProto.javaDescriptor.getMessageTypes.get(0)
def scalaDescriptor: _root_.scalapb.descriptors.Descriptor = DataProto.scalaDescriptor.messages(0)
def messageCompanionForFieldNumber(__number: _root_.scala.Int): _root_.scalapb.GeneratedMessageCompanion[_] = throw new MatchError(__number)
lazy val nestedMessagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_]] = Seq.empty
def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = throw new MatchError(__fieldNumber)
lazy val defaultInstance = JSONEntry(
ts = 0L,
data = ""
)
implicit class JSONEntryLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, JSONEntry]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, JSONEntry](_l) {
def ts: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Long] = field(_.ts)((c_, f_) => c_.copy(ts = f_))
def data: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Predef.String] = field(_.data)((c_, f_) => c_.copy(data = f_))
}
final val TS_FIELD_NUMBER = 1
final val DATA_FIELD_NUMBER = 2
}
#SerialVersionUID(0L)
final case class JSONOutput(
metadata: scala.Option[_root_.scala.Predef.String] = None,
entry: _root_.scala.collection.Seq[JSONEntry] = _root_.scala.collection.Seq.empty
) extends scalapb.GeneratedMessage with scalapb.Message[JSONOutput] with scalapb.lenses.Updatable[JSONOutput] {
#transient
private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0
private[this] def __computeSerializedValue(): _root_.scala.Int = {
var __size = 0
if (metadata.isDefined) { __size += _root_.com.google.protobuf.CodedOutputStream.computeStringSize(1, metadata.get) }
entry.foreach(entry => __size += 1 + _root_.com.google.protobuf.CodedOutputStream.computeUInt32SizeNoTag(entry.serializedSize) + entry.serializedSize)
__size
}
final override def serializedSize: _root_.scala.Int = {
var read = __serializedSizeCachedValue
if (read == 0) {
read = __computeSerializedValue()
__serializedSizeCachedValue = read
}
read
}
def writeTo(`_output__`: _root_.com.google.protobuf.CodedOutputStream): Unit = {
metadata.foreach { __v =>
_output__.writeString(1, __v)
};
entry.foreach { __v =>
_output__.writeTag(2, 2)
_output__.writeUInt32NoTag(__v.serializedSize)
__v.writeTo(_output__)
};
}
def mergeFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): JSONOutput = {
var __metadata = this.metadata
val __entry = (_root_.scala.collection.immutable.Vector.newBuilder[JSONEntry] ++= this.entry)
var _done__ = false
while (!_done__) {
val _tag__ = _input__.readTag()
_tag__ match {
case 0 => _done__ = true
case 10 =>
__metadata = Option(_input__.readString())
case 18 =>
__entry += _root_.scalapb.LiteParser.readMessage(_input__, JSONEntry.defaultInstance)
case tag => _input__.skipField(tag)
}
}
JSONOutput(
metadata = __metadata,
entry = __entry.result()
)
}
def getMetadata: _root_.scala.Predef.String = metadata.getOrElse("")
def clearMetadata: JSONOutput = copy(metadata = None)
def withMetadata(__v: _root_.scala.Predef.String): JSONOutput = copy(metadata = Option(__v))
def clearEntry = copy(entry = _root_.scala.collection.Seq.empty)
def addEntry(__vs: JSONEntry*): JSONOutput = addAllEntry(__vs)
def addAllEntry(__vs: TraversableOnce[JSONEntry]): JSONOutput = copy(entry = entry ++ __vs)
def withEntry(__v: _root_.scala.collection.Seq[JSONEntry]): JSONOutput = copy(entry = __v)
def getFieldByNumber(__fieldNumber: _root_.scala.Int): scala.Any = {
(__fieldNumber: #_root_.scala.unchecked) match {
case 1 => metadata.orNull
case 2 => entry
}
}
def getField(__field: _root_.scalapb.descriptors.FieldDescriptor): _root_.scalapb.descriptors.PValue = {
require(__field.containingMessage eq companion.scalaDescriptor)
(__field.number: #_root_.scala.unchecked) match {
case 1 => metadata.map(_root_.scalapb.descriptors.PString).getOrElse(_root_.scalapb.descriptors.PEmpty)
case 2 => _root_.scalapb.descriptors.PRepeated(entry.map(_.toPMessage)(_root_.scala.collection.breakOut))
}
}
def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToUnicodeString(this)
def companion = JSONOutput
}
object JSONOutput extends scalapb.GeneratedMessageCompanion[JSONOutput] {
implicit def messageCompanion: scalapb.GeneratedMessageCompanion[JSONOutput] = this
def fromFieldsMap(__fieldsMap: scala.collection.immutable.Map[_root_.com.google.protobuf.Descriptors.FieldDescriptor, scala.Any]): JSONOutput = {
require(__fieldsMap.keys.forall(_.getContainingType() == javaDescriptor), "FieldDescriptor does not match message type.")
val __fields = javaDescriptor.getFields
JSONOutput(
__fieldsMap.get(__fields.get(0)).asInstanceOf[scala.Option[_root_.scala.Predef.String]],
__fieldsMap.getOrElse(__fields.get(1), Nil).asInstanceOf[_root_.scala.collection.Seq[JSONEntry]]
)
}
implicit def messageReads: _root_.scalapb.descriptors.Reads[JSONOutput] = _root_.scalapb.descriptors.Reads{
case _root_.scalapb.descriptors.PMessage(__fieldsMap) =>
require(__fieldsMap.keys.forall(_.containingMessage == scalaDescriptor), "FieldDescriptor does not match message type.")
JSONOutput(
__fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).flatMap(_.as[scala.Option[_root_.scala.Predef.String]]),
__fieldsMap.get(scalaDescriptor.findFieldByNumber(2).get).map(_.as[_root_.scala.collection.Seq[JSONEntry]]).getOrElse(_root_.scala.collection.Seq.empty)
)
case _ => throw new RuntimeException("Expected PMessage")
}
def javaDescriptor: _root_.com.google.protobuf.Descriptors.Descriptor = DataProto.javaDescriptor.getMessageTypes.get(1)
def scalaDescriptor: _root_.scalapb.descriptors.Descriptor = DataProto.scalaDescriptor.messages(1)
def messageCompanionForFieldNumber(__number: _root_.scala.Int): _root_.scalapb.GeneratedMessageCompanion[_] = {
var __out: _root_.scalapb.GeneratedMessageCompanion[_] = null
(__number: #_root_.scala.unchecked) match {
case 2 => __out = JSONEntry
}
__out
}
lazy val nestedMessagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_]] = Seq.empty
def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = throw new MatchError(__fieldNumber)
lazy val defaultInstance = JSONOutput(
)
implicit class JSONOutputLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, JSONOutput]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, JSONOutput](_l) {
def metadata: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Predef.String] = field(_.getMetadata)((c_, f_) => c_.copy(metadata = Option(f_)))
def optionalMetadata: _root_.scalapb.lenses.Lens[UpperPB, scala.Option[_root_.scala.Predef.String]] = field(_.metadata)((c_, f_) => c_.copy(metadata = f_))
def entry: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.collection.Seq[JSONEntry]] = field(_.entry)((c_, f_) => c_.copy(entry = f_))
}
final val METADATA_FIELD_NUMBER = 1
final val ENTRY_FIELD_NUMBER = 2
}
object DataProto extends _root_.scalapb.GeneratedFileObject {
lazy val dependencies: Seq[_root_.scalapb.GeneratedFileObject] = Seq(
scalapb.options.ScalapbProto
)
lazy val messagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_]] = Seq(
JSONEntry,
JSONOutput
)
private lazy val ProtoBytes: Array[Byte] =
scalapb.Encoding.fromBase64(scala.collection.Seq(
"""CgpkYXRhLnByb3RvGhVzY2FsYXBiL3NjYWxhcGIucHJvdG8iLwoJSlNPTkVudHJ5Eg4KAnRzGAEgAigEUgJ0cxISCgRkYXRhG
AIgAigJUgRkYXRhIkoKCkpTT05PdXRwdXQSGgoIbWV0YWRhdGEYASABKAlSCG1ldGFkYXRhEiAKBWVudHJ5GAIgAygLMgouSlNPT
kVudHJ5UgVlbnRyeUIH4j8EEAEoAQ=="""
).mkString)
lazy val scalaDescriptor: _root_.scalapb.descriptors.FileDescriptor = {
val scalaProto = com.google.protobuf.descriptor.FileDescriptorProto.parseFrom(ProtoBytes)
_root_.scalapb.descriptors.FileDescriptor.buildFrom(scalaProto, dependencies.map(_.scalaDescriptor))
}
lazy val javaDescriptor: com.google.protobuf.Descriptors.FileDescriptor = {
val javaProto = com.google.protobuf.DescriptorProtos.FileDescriptorProto.parseFrom(ProtoBytes)
com.google.protobuf.Descriptors.FileDescriptor.buildFrom(javaProto, Array(
scalapb.options.ScalapbProto.javaDescriptor
))
}
#deprecated("Use javaDescriptor instead. In a future version this will refer to scalaDescriptor.", "ScalaPB 0.5.47")
def descriptor: com.google.protobuf.Descriptors.FileDescriptor = javaDescriptor
}
Error
<console>:82: error: type mismatch;
found : JSONEntry.type
required: scalapb.GeneratedMessageCompanion[_]
def companion = JSONEntry
^
I was able to successfully compile your proto file with the following code
project/scalapb.sbt
addSbtPlugin("com.thesamet" % "sbt-protoc" % "0.99.16")
libraryDependencies += "com.thesamet.scalapb" %% "compilerplugin" % "0.7.0"
build.sbt
lazy val root = (project in file(".")).
settings(
inThisBuild(List(
organization := "com.example",
scalaVersion := "2.12.4",
version := "0.1.0-SNAPSHOT"
)),
name := "protobuf",
libraryDependencies ++= Seq(
"com.thesamet.scalapb" %% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf"
),
PB.targets in (Compile) := Seq(
scalapb.gen() -> (sourceManaged in Compile).value
)
)
Now if you copy and paste your photo file into src/main/protobuf as hello.proto and do a sbt clean compile
The only thing I did differently is that I added a package to the photo file
syntax = "proto2";
import "scalapb/scalapb.proto";
option (scalapb.options) = {
package_name: "com.abhi"
flat_package: true
single_file: true
};
message JSONEntry {
required uint64 ts = 1;
required string data = 2;
}
message JSONOutput {
optional string metadata = 1;
repeated JSONEntry entry = 2;
}
Now finally use the generated code in your app
package example
import com.abhi.JSONEntry
import java.io._
object Hello extends App {
val jsonEntry = JSONEntry(10L, "foo")
val target = new FileOutputStream(new File("foo.bin"))
jsonEntry.writeTo(target)
target.close()
}
The code compiles correctly and there is no compilation error
I know that we can use saveAsNewAPIHadoopDataset with RDD[(ImmutableBytesWritable, Put)] to write to HBase table using spark.
But I have a list i.e RDD[List[(ImmutableBytesWritable, Put)] which I want to write 2 different HBase Tables.
How to do it?
Below is the code.
package com.scryAnalytics.FeatureExtractionController
import com.scryAnalytics.FeatureExtractionController.DAO.{DocumentEntitiesDAO, NLPEntitiesDAO, SegmentFeaturesDAO}
import com.scryAnalytics.NLPGeneric.{GateGenericNLP, NLPEntities}
import com.sun.xml.bind.v2.TODO
import com.vocp.ner.main.GateNERImpl
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.client.{HBaseAdmin, Result}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{MultiTableOutputFormat, TableInputFormat, TableOutputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.mapreduce.Job
import com.scryAnalytics.FeatureExtraction.SegmentsFeatureExtraction
import com.scryAnalytics.FeatureExtraction.DAO.VOCPEntities
import scala.collection.JavaConversions._
import gate.FeatureMap
import java.util.Map.Entry
import scala.collection.JavaConversions
import scala.util.control.Breaks.break
import scala.util.control.ControlThrowable
/**
* Created by sahil on 1/12/16.
*/
object Main {
def main(args: Array[String]): Unit = {
val inputTableName = "posts"
val outputTableName = "drugSegmentNew1"
val pluginHome = "/home/sahil/Voice-of-Cancer-Patients/VOCP Modules/bin/plugins"
val sc = new SparkContext(new SparkConf().setAppName("HBaseRead").setMaster("local[4]"))
val conf = HBaseConfiguration.create()
conf.set(HConstants.ZOOKEEPER_QUORUM, "localhost")
conf.set(TableInputFormat.INPUT_TABLE, inputTableName)
val admin = new HBaseAdmin(conf)
if (!admin.isTableAvailable(inputTableName)) {
val tableDesc = new HTableDescriptor(TableName.valueOf(inputTableName))
admin.createTable(tableDesc)
}
val job: Job = Job.getInstance(conf, "FeatureExtractionJob")
job.setOutputFormatClass(classOf[MultiTableOutputFormat])
val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[ImmutableBytesWritable], classOf[Result])
val resultRDD = hBaseRDD.map(x => x._2)
// TODO: Add filters
val entity: VOCPEntities = VOCPEntities.DRUG
val nlpRDD = resultRDD.mapPartitions { iter =>
val nlpEntities: NLPEntitiesDAO = new NLPEntitiesDAO
iter.map {
result =>
val message = Bytes.toString(result.getValue(Bytes.toBytes("p"), Bytes.toBytes("message")))
val row_key = Bytes.toString(result.getRow)
nlpEntities.setToken(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("gen"), Bytes.toBytes("token")))))
nlpEntities.setSpaceToken(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("gen"), Bytes.toBytes("spaceToken")))))
nlpEntities.setSentence(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("gen"), Bytes.toBytes("sentence")))))
nlpEntities.setVG(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("gen"), Bytes.toBytes("verbGroup")))))
nlpEntities.setSplit(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("gen"), Bytes.toBytes("split")))))
nlpEntities.setNounChunk(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("gen"), Bytes.toBytes("nounChunk")))))
nlpEntities.setDrugs(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("ner"), Bytes.toBytes("drug")))))
nlpEntities.setRegimen(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("ner"), Bytes.toBytes("regimen")))))
nlpEntities.setSideEffects(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("ner"), Bytes.toBytes("sideEffect")))))
nlpEntities.setALT_DRUG(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("ner"), Bytes.toBytes("altDrug")))))
nlpEntities.setALT_THERAPY(Utility.jsonToAnnotations(Bytes.toString(
result.getValue(Bytes.toBytes("ner"), Bytes.toBytes("altTherapy")))))
(row_key, message, nlpEntities)
}
}
val featureExtractionOld: SegmentsFeatureExtraction = new SegmentsFeatureExtraction(
pluginHome, entity)
val outputRDD = nlpRDD.mapPartitions { iter =>
val featureExtraction: SegmentsFeatureExtraction = new SegmentsFeatureExtraction(
pluginHome, entity)
iter.map { x =>
val featuresJson = featureExtraction.generateFeatures(x._2, Utility.objectToJson(x._3))
val segmentFeatures: SegmentFeaturesDAO = Utility.jsonToSegmentFeatures(featuresJson)
val documentEntities: DocumentEntitiesDAO = new DocumentEntitiesDAO
documentEntities.setSystemId(x._1)
documentEntities.setToken(x._3.getToken)
documentEntities.setSpaceToken(x._3.getSpaceToken)
documentEntities.setSentence(x._3.getSentence)
documentEntities.setVG(x._3.getVG)
documentEntities.setNounChunk(x._3.getNounChunk)
documentEntities.setSplit(x._3.getSplit)
documentEntities.setDRUG(x._3.getDrugs)
documentEntities.setSE(x._3.getSideEffects)
documentEntities.setREG(x._3.getRegimen)
documentEntities.setALT_DRUG(x._3.getALT_DRUG)
documentEntities.setALT_THERAPY(x._3.getALT_THERAPY)
documentEntities.setSegment(segmentFeatures.getSegment)
documentEntities.setSegmentClass(segmentFeatures.getSegmentClass)
documentEntities.setSegmentInstance(segmentFeatures.getSegmentInstance)
(x._1, documentEntities)
}
}
val newRDD = outputRDD.map { k => convertToPut(k) }
newRDD.saveAsNewAPIHadoopDataset(job.getConfiguration())
}
def convertToPut(NlpWithRowKey: (String, DocumentEntitiesDAO)): List[(ImmutableBytesWritable, Put)] = {
val rowkey = NlpWithRowKey._1
val documentEntities = NlpWithRowKey._2
var returnList: List[(ImmutableBytesWritable, Put)] = List()
val segmentInstances = documentEntities.getSegmentInstance
val segments = documentEntities.getSegment
if(segments != null) {
var count = 0
for(segment <- segmentInstances) {
val keyString: String = documentEntities.getSystemId + "#" + Integer.toString(count)
count = count + 1
val outputKey: ImmutableBytesWritable = new ImmutableBytesWritable(keyString.getBytes())
val put = new Put(outputKey.get())
val features: FeatureMap = segment.getFeatures
val it: Iterator[Entry[Object, Object]] = features.entrySet.iterator()
var sideEffect_offset = "NULL"
var entity_offset = "NULL"
while(it.hasNext) {
val pair = it.next()
if(pair.getKey.equals("sideEffect-offset")) {
sideEffect_offset = pair.getValue().toString()
}
else if(pair.getKey.equals("drug-offset")) {
entity_offset = pair.getValue().toString()
}
else if(pair.getKey().equals("drug") || pair.getKey().equals("sideEffect")){
put.add(Bytes.toBytes("seg"), Bytes.toBytes(pair.getKey.toString), Bytes
.toBytes(pair.getValue().toString))
}
else {
put.add(Bytes.toBytes("segFeatures"), Bytes.toBytes(pair.getKey.toString), Bytes
.toBytes(pair.getValue().toString))
}
}
put.add(Bytes.toBytes("seg"), Bytes.toBytes("RelationId"),
Bytes.toBytes(documentEntities.getSystemId() + "-" + entity_offset + "-" + sideEffect_offset))
put.add(Bytes.toBytes("segInst"),Bytes.toBytes("id"), Bytes.toBytes(segment.getId()))
put.add(Bytes.toBytes("segInst"), Bytes.toBytes("type"), Bytes.toBytes(segment.getType()))
put.add(Bytes.toBytes("segInst"), Bytes.toBytes("startNodeId"), Bytes.toBytes(
segment.getStartNode().getId()))
put.add(Bytes.toBytes("segInst"), Bytes.toBytes("startNodeOffset"),
Bytes.toBytes(segment.getStartNode().getOffset()))
put.add(Bytes.toBytes("segInst"),Bytes.toBytes("endNodeId"),
Bytes.toBytes(segment.getEndNode().getId()))
put.add(Bytes.toBytes("segInst"), Bytes.toBytes("endNodeOffset"),
Bytes.toBytes(segment.getEndNode().getOffset()))
put.add(Bytes.toBytes("seg"),Bytes.toBytes("system_id"),
Bytes.toBytes(documentEntities.getSystemId()))
put.add(Bytes.toBytes("seg"), Bytes.toBytes("segmentText"),
Bytes.toBytes(segment.getAnnotatedText()))
for(segmentClassAnnots <- documentEntities.getSegmentClass) {
try {
if (segment.getId().equals(segmentClassAnnots.getFeatures().get("instance-id"))) {
put.add(Bytes.toBytes("segClass"), Bytes.toBytes("id"),
Bytes.toBytes(segmentClassAnnots.getId()))
put.add(Bytes.toBytes("segClass"), Bytes.toBytes("type"),
Bytes.toBytes(segmentClassAnnots.getType()))
put.add(Bytes.toBytes("segClass"), Bytes.toBytes("startNodeId"), Bytes
.toBytes(segmentClassAnnots.getStartNode()
.getId()))
put.add(Bytes.toBytes("segClass"), Bytes.toBytes("startNodeOffset"), Bytes
.toBytes(segmentClassAnnots.getStartNode()
.getOffset()))
put.add(Bytes.toBytes("segClass"), Bytes.toBytes("endNodeId"), Bytes
.toBytes(segmentClassAnnots.getEndNode()
.getId()))
put.add(Bytes.toBytes("segClass"), Bytes.toBytes("endNodeOffset"), Bytes
.toBytes(segmentClassAnnots.getEndNode()
.getOffset()))
break
}
} catch {
case t: Throwable => t.printStackTrace
}
returnList = returnList:+((new ImmutableBytesWritable(Bytes.toBytes("drugSegmentNew1")), put))
}
}
}
val PUT = new Put(Bytes.toBytes(rowkey))
PUT.add(Bytes.toBytes("f"), Bytes.toBytes("dStatus"), Bytes.toBytes("1"))
returnList = returnList:+((new ImmutableBytesWritable(Bytes.toBytes("posts")), PUT))
(returnList)
}
}
Just change your below line :
val newRDD = outputRDD.map { k => convertToPut(k) }
with this line:
val newRDD = outputRDD.flatMap { k => convertToPut(k) }
Hope this helps!
i have attached my codes
Application (controller)
package controllers
import play.api._
import play.api.mvc._
import play.api.data._
import play.api.data.Forms._
import models.Task
import java.io._
object Application extends Controller {
val taskForm = Form(
tuple(
"id" -> number,
"label" -> nonEmptyText(minLength = 4),
"add" -> nonEmptyText
)
)
def index = Action {
Redirect(routes.Application.tasks)
}
def tasks = Action {
Ok(views.html.index(Task.all(),taskForm))
}
def showTask= Action {
Ok(views.html.test(Task.all(), taskForm))
}
def newTask = Action { implicit request =>
taskForm.bindFromRequest.fold(
errors => BadRequest(views.html.index(Task.all(), errors)),
{
case(id,label,add) => {
Task.create(id,label,add)
Redirect(routes.Application.showTask)
}
}
)
}
def deleteTask(id: Int) = Action {
Task.delete(id)
Redirect(routes.Application.showTask)
}
}
Task(model)
package models
import anorm._
import anorm.SqlParser._
import play.api.db._
import play.api.Play.current
case class Task(id: Int, label: String,add:String)
object Task {
val task = {
get[Int]("id") ~
get[String]("label") ~
get[String]("add") map {
case id~label~add => Task(id, label,add)
}
}
def all(): List[Task] = DB.withConnection { implicit c =>
SQL("select * from task").as(task *)
}
def create(id:Int , label: String, add:String) {
DB.withConnection { implicit c =>
SQL("insert into task (id,label,add) values ({id},{label},{add})").on(
'id -> id ,
'label -> label ,
'add -> add
).executeUpdate()
}
}
def delete(id:Int) {
DB.withConnection { implicit c =>
SQL("delete from task where id = {id}").on(
'id -> id
).executeUpdate()
}
}
}
I have no idea where to declare the writer function .please help me with the syntax also, I need to write the form elements into a text file .. Thankx in advance
Assuming that you want to append the text whenever a new task is added (i.e. newTask is invoked by Play).
You can define a helper function in object Application and use this helper method in your newTask method.
object Application extends Controller {
//...
import java.io.FileWriter
val filePath = """ path to file """
def writingToFile(str: String) = {
val fw = new FileWriter(filePath, true)
try {
fw.write(str)
} finally {
fw.close()
}
}
def newTask = Action { implicit request =>
taskForm.bindFromRequest.fold(
errors => BadRequest(views.html.index(Task.all(), errors)),
{
case(id,label,add) => {
/* Call the helper function to append to the file */
writingToFile(s"id : $id, label : $label, add : $add \n")
Task.create(id,label,add)
Redirect(routes.Application.showTask)
}
}
)
}
//..
}
Likewise when other methods are invoked you may append to the file in similar fashion.
Hope it helps :)