Akka Kafka Custom Serializer - scala

I'm using Akka Kafka (Scala) and want to send custom objects.
class TweetsSerializer extends Serializer[Seq[MyCustomType]] {
override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ???
override def serialize(topic: String, data: Seq[MyCustomType]): Array[Byte] = ???
override def close(): Unit = ???
}
How can i correctly write my own serializer ? And, what should i do with field config ?

I would use the StringSerializer, I mean, I´d convert all my types to string before produce them. However that works:
case class MyCustomType(a: Int)
class TweetsSerializer extends Serializer[Seq[MyCustomType]] {
private var encoding = "UTF8"
override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = {
val propertyName = if (isKey) "key.serializer.encoding"
else "value.serializer.encoding"
var encodingValue = configs.get(propertyName)
if (encodingValue == null) encodingValue = configs.get("serializer.encoding")
if (encodingValue != null && encodingValue.isInstanceOf[String]) encoding = encodingValue.asInstanceOf[String]
}
override def serialize(topic: String, data: Seq[MyCustomType]): Array[Byte] =
try
if (data == null) return null
else return {
data.map { v =>
v.a.toString
}
.mkString("").getBytes("UTF8")
}
catch {
case e: UnsupportedEncodingException =>
throw new SerializationException("Error when serializing string to byte[] due to unsupported encoding " + encoding)
}
override def close(): Unit = Unit
}
}
object testCustomKafkaSerializer extends App {
implicit val producerConfig = {
val props = new Properties()
props.setProperty("bootstrap.servers", "localhost:9092")
props.setProperty("key.serializer", classOf[StringSerializer].getName)
props.setProperty("value.serializer", classOf[TweetsSerializer].getName)
props
}
lazy val kafkaProducer = new KafkaProducer[String, Seq[MyCustomType]](producerConfig)
// Create scala future from Java
private def publishToKafka(id: String, data: Seq[MyCustomType]) = {
kafkaProducer
.send(new ProducerRecord("outTopic", id, data))
.get()
}
val input = MyCustomType(1)
publishToKafka("customSerializerTopic", Seq(input))
}

Related

Is it possible to write a upickle Serializer for akka

I would like to implement an akka Serializer using upickle but I'm not sure its possible. To do so I would need to implement a Serializer something like the following:
import akka.serialization.Serializer
import upickle.default._
class UpickleSerializer extends Serializer {
def includeManifest: Boolean = true
def identifier = 1234567
def toBinary(obj: AnyRef): Array[Byte] = {
writeBinary(obj) // ???
}
def fromBinary(bytes: Array[Byte], clazz: Option[Class[_]]): AnyRef = {
readBinary(bytes) // ???
}
}
The problem is I cannot call writeBinary/readBinary without having the relevant Writer/Reader. Is there a way I can look these up based on the object class?
Take a look at following files, you should get some ideas!
CborAkkaSerializer.scala
LocationAkkaSerializer.scala
Note: These serializers are using cbor
I found a way to do it using reflection. I base the solution on the assumption that any object that needs to be serialized should have defined a ReadWriter in its companion object:
class UpickleSerializer extends Serializer {
private var map = Map[Class[_], ReadWriter[AnyRef]]()
def includeManifest: Boolean = true
def identifier = 1234567
def toBinary(obj: AnyRef): Array[Byte] = {
implicit val rw = getReadWriter(obj.getClass)
writeBinary(obj)
}
def fromBinary(bytes: Array[Byte], clazz: Option[Class[_]]): AnyRef = {
implicit val rw = lookup(clazz.get)
readBinary[AnyRef](bytes)
}
private def getReadWriter(clazz: Class[_]) = map.get(clazz) match {
case Some(rw) => rw
case None =>
val rw = lookup(clazz)
map += clazz -> rw
rw
}
private def lookup(clazz: Class[_]) = {
import scala.reflect.runtime._
val rootMirror = universe.runtimeMirror(clazz.getClassLoader)
val classSymbol = rootMirror.classSymbol(clazz)
val moduleSymbol = classSymbol.companion.asModule
val moduleMirror = rootMirror.reflectModule(moduleSymbol)
val instanceMirror = rootMirror.reflect(moduleMirror.instance)
val members = instanceMirror.symbol.typeSignature.members
members.find(_.typeSignature <:< typeOf[ReadWriter[_]]) match {
case Some(rw) =>
instanceMirror.reflectField(rw.asTerm).get.asInstanceOf[ReadWriter[AnyRef]]
case None =>
throw new RuntimeException("Not found")
}
}
}

Unable to deserialize a custom Serde using Kafka-Streams

I am trying to create a simple topology to upperCase a person entity using Kafka-streams.
case class Person(id: Int, name: String, age: Int)
My custom Serializer and Deserializer are like this:
class KafkaBytesSerializer[T] extends Serializer[T] {
override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = 0
override def serialize(topic: String, data: T): Array[Byte] = {
val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
val oos = new ObjectOutputStream(stream)
oos.writeObject(data)
oos.close()
stream.toByteArray
}
override def close(): Unit = 0
}
class KafkaBytesDeserializer[T] extends Deserializer[T]{
override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = 0
override def deserialize(topic: String, data: Array[Byte]): T = {
val objIn = new ObjectInputStream(new ByteArrayInputStream(data))
val obj = objIn.readObject().asInstanceOf[T]
objIn.close
obj
}
override def close(): Unit = 0
}
The main calling code of the streaming app is this:
val personSerde: Serde[Person] =
Serdes.serdeFrom(new KafkaBytesSerializer[Person], new KafkaBytesDeserializer[Person])
val builder = new StreamsBuilder()
builder
.stream[String, Person](INPUT_TOPIC)(Consumed.`with`(Serdes.String(), personSerde))
.map[String, Person]((k,p) => (k, Person(p.id, p.name.toUpperCase(), p.age)))
.peek((k, p) => println("Key" + k + " Person: " + p))
.to(OUTPUT_TOPIC)(Produced.`with`(Serdes.String(), personSerde))
When I run the application, I am getting the class cast exception:
[MainApp-consumer-group-b45b436d-1412-494b-9733-f75a61c9b9e3-StreamThread-1] ERROR org.apache.kafka.streams.processor.internals.StreamThread - stream-thread [MainApp-consumer-group-b45b436d-1412-494b-9733-f75a61c9b9e3-StreamThread-1] Encountered the following error during processing:
java.lang.ClassCastException: [B cannot be cast to models.Person
at org.apache.kafka.streams.scala.FunctionsCompatConversions$ValueMapperFromFunction$$anon$6.apply(FunctionsCompatConversions.scala:66)
at org.apache.kafka.streams.kstream.internals.AbstractStream.lambda$withKey$1(AbstractStream.java:103)
at org.apache.kafka.streams.kstream.internals.KStreamMapValues$KStreamMapProcessor.process(KStreamMapValues.java:40)
at org.apache.kafka.streams.processor.internals.ProcessorNode.process(ProcessorNode.java:117)
at org.apache.kafka.streams.processor.internals.ProcessorContextImpl.forward(ProcessorContextImpl.java:201)
I suspect something is going wrong at the deserialization level, but not sure why?
Any pointers will be helpful.
Issue is with you ProducerApp. You set value.serializer to com.thebigscale.serdes.PersonSerializer and then try to send array of bytes. You shouldn't serialize you POJO. Kafka Serializer will do it for you - just sent Person object instance.
Below I've fixed your code with comments
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
props.put("value.serializer", "com.thebigscale.serdes.PersonSerializer")
val producer = new KafkaProducer[String, Person](props) // <-- Instead BYTE_ARRAY -> Person
val person = new Person(4, "user4", 27)
//val personSerializer = new KafkaBytesSerializer[Person]() // remove
//val bytePerson: BYTE_ARRAY = personSerializer.serialize("", person) // remove
val record = new ProducerRecord[String, Person](KafkaConf.INPUT_TOPIC, "key1", person) // instead BYTE_ARRAY -> Person, bytePerson -> person
producer.send(record, new Callback {
override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
if (exception != null ) {
println("Exception thrown by producer: " + exception)
} else {
println("Record sent successfully: " + metadata)
}
}
})

Alpakka - read Kryo-serialized objects from S3

I have Kryo-serialized binary data stored on S3 (thousands of serialized objects).
Alpakka allows to read the content as data: Source[ByteString, NotUsed]. But Kryo format doesn't use delimiters so I can't split each serialized object into a separate ByteString using data.via(Framing.delimiter(...)).
So, Kryo actually needs to read the data to understand when an object ends, and it doesn't look streaming-friendly.
Is it possible to implement this case in streaming fashion so that I get Source[MyObject, NotUsed] in the end of the day?
Here is a graph stage that does that. It handles the case when a serialized object spans two byte strings. It needs to be improved when objects are large (not my use case) and can take more than two byte strings in Source[ByteString, NotUsed].
object KryoReadStage {
def flow[T](kryoSupport: KryoSupport,
`class`: Class[T],
serializer: Serializer[_]): Flow[ByteString, immutable.Seq[T], NotUsed] =
Flow.fromGraph(new KryoReadStage[T](kryoSupport, `class`, serializer))
}
final class KryoReadStage[T](kryoSupport: KryoSupport,
`class`: Class[T],
serializer: Serializer[_])
extends GraphStage[FlowShape[ByteString, immutable.Seq[T]]] {
override def shape: FlowShape[ByteString, immutable.Seq[T]] = FlowShape.of(in, out)
override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = {
new GraphStageLogic(shape) {
setHandler(in, new InHandler {
override def onPush(): Unit = {
val bytes =
if (previousBytes.length == 0) grab(in)
else ByteString.fromArrayUnsafe(previousBytes) ++ grab(in)
Managed(new Input(new ByteBufferBackedInputStream(bytes.asByteBuffer))) { input =>
var position = 0
val acc = ListBuffer[T]()
kryoSupport.withKryo { kryo =>
var last = false
while (!last && !input.eof()) {
tryRead(kryo, input) match {
case Some(t) =>
acc += t
position = input.total().toInt
previousBytes = EmptyArray
case None =>
val bytesLeft = new Array[Byte](bytes.length - position)
val bb = bytes.asByteBuffer
bb.position(position)
bb.get(bytesLeft)
last = true
previousBytes = bytesLeft
}
}
push(out, acc.toList)
}
}
}
private def tryRead(kryo: Kryo, input: Input): Option[T] =
try {
Some(kryo.readObject(input, `class`, serializer))
} catch {
case _: KryoException => None
}
})
setHandler(out, new OutHandler {
override def onPull(): Unit = {
pull(in)
}
})
private val EmptyArray: Array[Byte] = Array.empty
private var previousBytes: Array[Byte] = EmptyArray
}
}
override def toString: String = "KryoReadStage"
private lazy val in: Inlet[ByteString] = Inlet("KryoReadStage.in")
private lazy val out: Outlet[immutable.Seq[T]] = Outlet("KryoReadStage.out")
}
Example usage:
client.download(BucketName, key)
.via(KryoReadStage.flow(kryoSupport, `class`, serializer))
.flatMapConcat(Source(_))
It uses some additional helpers below.
ByteBufferBackedInputStream:
class ByteBufferBackedInputStream(buf: ByteBuffer) extends InputStream {
override def read: Int = {
if (!buf.hasRemaining) -1
else buf.get & 0xFF
}
override def read(bytes: Array[Byte], off: Int, len: Int): Int = {
if (!buf.hasRemaining) -1
else {
val read = Math.min(len, buf.remaining)
buf.get(bytes, off, read)
read
}
}
}
Managed:
object Managed {
type AutoCloseableView[T] = T => AutoCloseable
def apply[T: AutoCloseableView, V](resource: T)(op: T => V): V =
try {
op(resource)
} finally {
resource.close()
}
}
KryoSupport:
trait KryoSupport {
def withKryo[T](f: Kryo => T): T
}
class PooledKryoSupport(serializers: (Class[_], Serializer[_])*) extends KryoSupport {
override def withKryo[T](f: Kryo => T): T = {
pool.run(new KryoCallback[T] {
override def execute(kryo: Kryo): T = f(kryo)
})
}
private val pool = {
val factory = new KryoFactory() {
override def create(): Kryo = {
val kryo = new Kryo
(KryoSupport.ScalaSerializers ++ serializers).foreach {
case ((clazz, serializer)) =>
kryo.register(clazz, serializer)
}
kryo
}
}
new KryoPool.Builder(factory).softReferences().build()
}
}

Create custom Serializer and Deserializer in kafka using scala

I am using kafka_2.10-0.10.0.1 and scala_2.10.3.
I want to write custom Serializer and Deserializer using scala.
I tried with these Serializer (from CustomType) and Deserializer (obtain a CustomType):
class CustomTypeSerializer extends Serializer[CustomType] {
private val gson: Gson = new Gson()
override def configure(configs: util.Map[String, _], isKey: Boolean):
Unit = {
// nothing to do
}
override def serialize(topic: String, data: CustomType): Array[Byte] = {
if (data == null)
null
else
gson.toJson(data).getBytes
}
override def close(): Unit = {
//nothing to do
}
}
class CustomTypeDeserializer extends Deserializer[CustomType] {
private val gson: Gson = new Gson()
override def deserialize(topic: String, bytes: Array[Byte]): CustomType = {
val offerJson = gson.toJson(bytes.toString)
val psType: Type = new TypeToken[CustomType]() {}.getType()
val ps: CustomType = gson.fromJson(offerJson, psType)
ps
}
override def configure(configs: util.Map[String, _], isKey: Boolean):
Unit = {
// nothing to do
}
override def close(): Unit = {
//nothing to do
}
}
But, I got this error:
Exception in thread "main" org.apache.kafka.common.errors.SerializationException: Error deserializing key/value for partition topic_0_1-1 at offset 26
Caused by: com.google.gson.JsonSyntaxException: java.lang.IllegalStateException: Expected BEGIN_OBJECT but was BEGIN_ARRAY at line 1 column 2 path $
at com.google.gson.internal.bind.ReflectiveTypeAdapterFactory$Adapter.read(ReflectiveTypeAdapterFactory.java:224)
at com.google.gson.Gson.fromJson(Gson.java:887)
at com.google.gson.Gson.fromJson(Gson.java:852)
at com.google.gson.Gson.fromJson(Gson.java:801)
at kafka.PSDeserializer.deserialize(PSDeserializer.scala:24)
at kafka.PSDeserializer.deserialize(PSDeserializer.scala:18)
at org.apache.kafka.clients.consumer.internals.Fetcher.parseRecord(Fetcher.java:627)
at org.apache.kafka.clients.consumer.internals.Fetcher.parseFetchedData(Fetcher.java:548)
at org.apache.kafka.clients.consumer.internals.Fetcher.fetchedRecords(Fetcher.java:354)
at org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:1000)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:938)
Can you help me please
Find below the custom serializer and deserializer for case class User, User(name:String,id:Int). Replace User in code with your case class. It will work.
import java.io.{ObjectInputStream, ByteArrayInputStream}
import java.util
import org.apache.kafka.common.serialization.{Deserializer, Serializer}
class CustomDeserializer extends Deserializer[User]{
override def configure(configs: util.Map[String,_],isKey: Boolean):Unit = {
}
override def deserialize(topic:String,bytes: Array[Byte]) = {
val byteIn = new ByteArrayInputStream(bytes)
val objIn = new ObjectInputStream(byteIn)
val obj = objIn.readObject().asInstanceOf[User]
byteIn.close()
objIn.close()
obj
}
override def close():Unit = {
}
}
import java.io.{ObjectOutputStream, ByteArrayOutputStream}
import java.util
import org.apache.kafka.common.serialization.Serializer
class CustomSerializer extends Serializer[User]{
override def configure(configs: util.Map[String,_],isKey: Boolean):Unit = {
}
override def serialize(topic:String, data:User):Array[Byte] = {
try {
val byteOut = new ByteArrayOutputStream()
val objOut = new ObjectOutputStream(byteOut)
objOut.writeObject(data)
objOut.close()
byteOut.close()
byteOut.toByteArray
}
catch {
case ex:Exception => throw new Exception(ex.getMessage)
}
}
override def close():Unit = {
}
}

Convert Any type in scala to Array[Byte] and back

I have a variable value declared as Any in my program.
I want to convert this value to Array[Byte].
How can I serialize to Array[Byte] and back? I found examples related to other types such as Double or Int, but not to Any.
This should do what you need. It's pretty similar to how one would do it in Java.
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
object Serialization extends App {
def serialise(value: Any): Array[Byte] = {
val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
val oos = new ObjectOutputStream(stream)
oos.writeObject(value)
oos.close()
stream.toByteArray
}
def deserialise(bytes: Array[Byte]): Any = {
val ois = new ObjectInputStream(new ByteArrayInputStream(bytes))
val value = ois.readObject
ois.close()
value
}
println(deserialise(serialise("My Test")))
println(deserialise(serialise(List(1))))
println(deserialise(serialise(Map(1 -> 2))))
println(deserialise(serialise(1)))
}
def anyTypeToByteArray(value: Any): Array[Byte] = {
val valueConverted :Array[Byte] = SerializationUtils.serialize(value.isInstanceOf[Serializable])
valueConverted
}
def ByteArrayToAny(value: Array[Byte]): Any = {
val valueConverted: Any = SerializationUtils.deserialize(value)
valueConverted
}