I've playing around with Akka-Streams and I'm trying to make a custom Flow by implementing my own PushPullStage. I want the Flow to accumulate the objects it receives from upstream into a list, and group them according to some function before emitting the groups downstream when upstream completes.
It seems like quite a simple thing to implement but I can't figure out how to do it! There doesn't seem to be a way to emit multiple objects from a PushPullStage.
Here is my implementation so far:
class Accumulate[A] extends PushPullStage[A, List[A]] {
private var groups: List[List[A]] = Nil
private def group(x: A): List[List[A]] = ...
override def onPush(elem: A, ctx: Context[A]): SyncDirective = {
groups = group(elem)
ctx.pull()
}
override def onPull(ctx: Context[A]): SyncDirective =
if (ctx.isFinishing) {
for(group <- groups)
ctx.push(group) // this doesn't work
ctx.finish()
} else {
ctx.pull()
}
override def onUpstreamFinish(ctx: Context[A]): TerminationDirective =
ctx.absorbTermination()
}
}
EDIT
I changed the code to account for brackpressure and it's all working now. Basically I just needed to let the downstream Flow's do what they're meant to and keep pulling elements:
class Accumulate[A] extends PushPullStage[A, List[A]] {
private var groups: List[List[A]] = Nil
private def group(x: A): List[List[A]] = ...
override def onPush(elem: A, ctx: Context[A]): SyncDirective = {
groups = group(elem)
ctx.pull()
}
override def onPull(ctx: Context[A]): SyncDirective =
if (ctx.isFinishing) {
groups match {
case Nil => ctx.finish()
case head :: tail =>
groups = tail
ctx.push(head)
}
} else {
ctx.pull()
}
override def onUpstreamFinish(ctx: Context[A]): TerminationDirective =
ctx.absorbTermination()
}
}
You can't push more than was asked for, since that would violate back pressure.
Also, worth noting is that I wouldn't recommend what you are attempting to do as this will blow up with an OutOfMemoryError for large or unbounded streams.
class Accumulate[A] extends PushPullStage[A, List[A]] {
private var groups: List[List[A]] = Nil
private def group(x: A): List[List[A]] = ...
override def onPush(elem: A, ctx: Context[A]): SyncDirective = {
groups = group(elem)
ctx.pull()
}
override def onPull(ctx: Context[A]): SyncDirective =
if (ctx.isFinishing) {
groups match {
case Nil => ctx.finish()
case group :: rest =>
groups = rest
ctx.push(group)
}
} else {
ctx.pull()
}
override def onUpstreamFinish(ctx: Context[A]): TerminationDirective =
ctx.absorbTermination()
}
}
Related
So I recently came across a coding technique in Scala called factory pattern, and I would like some help. Since in the example I saw, all the private classes have the same methods. My question is would it be possible to make it so that the private classes have unique methods?
abstract class Car{
// Creating four abstract methods
def bookingPrice : Double
def Brands : List[String]
def availability : Int
def book(noOfCars:Int)
}
// Creating an object
object Car{
val STANDARD = 0
val DELUXE = 1
val LUXURY = 2
// Creating private class
private class standardCar extends Car{
private var _availability = 100
override def bookingPrice = 200000
override def Brands = List("Maruti", "Tata", "Hyundai")
override def availability = _availability
override def book(noOfCars:Int) = {
_availability = _availability - noOfCars
}
}
// Creating private class
private class DeluxeCar extends Car{
private var _availability = 50
override def bookingPrice = 500000
override def Brands = List("Honda", "Mahindra", "Chevrolet")
override def availability = _availability
override def book(noOfCars:Int) = {
_availability = _availability - noOfCars
}
//enter unique method like def openSlidingDoors(){}
}
// Creating private class
private class LuxuryCar extends Car{
private var _availability = 5
override def bookingPrice = 900000
override def Brands = List("Audi","BMW", "Mercedes")
override def availability = _availability
override def book(noOfCars:Int) = {
_availability = _availability - noOfCars
}
//enter unique method like def enableFlight(){}
}
// create the apply method
// single method to create a variety of objects
def apply(carType:Int):Car = {
carType match {
case 1 => new LuxuryCar()
case 2 => new DeluxeCar()
case 3 => new standardCar()
case _ => new standardCar()
}
}
// Main method
}
object Main{
def main(args: Array[String])
{
val s = Car.apply(1)
println(s.bookingPrice)
println(s.availability)
println(s.getClass)
}
}
I tried to just slot it into the private classes and it (obviously) did not work since the methods are not part of the parent abstract class. Is there any workaround for this?
The concrete Car classes don't need to be private to the Car object. In fact it is better if they are not, because you can then match on the particular car that was returned to gain access to methods specific to that class (which is what you want to do).
trait Car {
def bookingPrice: Double
def Brands: List[String]
def availability: Int
def book(noOfCars: Int): Unit
}
case class StandardCar() extends Car {
private var _availability = 100
override def bookingPrice = 200000
override def Brands = List("Maruti", "Tata", "Hyundai")
override def availability = _availability
override def book(noOfCars: Int) = {
_availability = _availability - noOfCars
}
}
case class DeluxeCar() extends Car {
private var _availability = 50
override def bookingPrice = 500000
override def Brands = List("Honda", "Mahindra", "Chevrolet")
override def availability = _availability
override def book(noOfCars: Int) = {
_availability = _availability - noOfCars
}
def openSlidingDoors() = println("Open doors")
}
case class LuxuryCar() extends Car {
private var _availability = 5
override def bookingPrice = 900000
override def Brands = List("Audi", "BMW", "Mercedes")
override def availability = _availability
override def book(noOfCars: Int) = {
_availability = _availability - noOfCars
}
def enableFlight() = ???
}
// Creating an object
object Car {
val STANDARD = 0
val DELUXE = 1
val LUXURY = 2
// create the apply method
// single method to create a variety of objects
def apply(carType: Int): Car = {
carType match {
case 0 => StandardCar()
case 1 => DeluxeCar()
case 2 => LuxuryCar()
case _ => StandardCar()
}
}
// Main method
}
val s = Car.apply(1)
println(s.bookingPrice)
println(s.availability)
println(s.getClass)
s match {
case d: DeluxeCar => d.openSlidingDoors()
case _ =>
}
Using var like this is pretty ugly so prefer to create a new Car each time when updating availability, or store the availability somewhere else in the system.
I have an array of Any (in real life, it's a Spark Row, but it's sufficient to isolate the problem)
object Row {
val buffer : Array[Any] = Array(42, 21, true)
}
And I want to apply some operations on its elements.
So, I've defined a simple ADT to define a compute operation on a type A
trait Op[A] {
def cast(a: Any) : A = a.asInstanceOf[A]
def compute(a: A) : A
}
case object Count extends Op[Int] {
override def compute(a: Int): Int = a + 1
}
case object Exist extends Op[Boolean] {
override def compute(a: Boolean): Boolean = a
}
Given that I have a list of all operations and I know which operation is to apply to each element, let's use these operations.
object GenericsOp {
import Row._
val ops = Seq(Count, Exist)
def compute() = {
buffer(0) = ops(0).compute(ops(0).cast(buffer(0)))
buffer(1) = ops(0).compute(ops(0).cast(buffer(1)))
buffer(2) = ops(1).compute(ops(1).cast(buffer(2)))
}
}
By design, for a given op, types are aligned between cast and combine. But unfortunately the following code does not compile. The error is
Type mismatch, expected: _$1, actual: AnyVal
Is there a way to make it work ?
I've found a workaround by using abstract type member instead of type parameter.
object AbstractOp extends App {
import Row._
trait Op {
type A
def compute(a: A) : A
}
case object Count extends Op {
type A = Int
override def compute(a: Int): Int = a + 1
}
case object Exist extends Op {
type A = Boolean
override def compute(a: Boolean): Boolean = a
}
val ops = Seq(Count, Exist)
def compute() = {
val op0 = ops(0)
val op1 = ops(1)
buffer(0) = ops(0).compute(buffer(0).asInstanceOf[op0.A])
buffer(1) = ops(0).compute(buffer(1).asInstanceOf[op0.A])
buffer(2) = ops(1).compute(buffer(2).asInstanceOf[op1.A])
}
}
Is there a better way ?
It seems that your code can be simplified by making Op[A] extend Any => A:
trait Op[A] extends (Any => A) {
def cast(a: Any) : A = a.asInstanceOf[A]
def compute(a: A) : A
def apply(a: Any): A = compute(cast(a))
}
case object Count extends Op[Int] {
override def compute(a: Int): Int = a + 1
}
case object Exist extends Op[Boolean] {
override def compute(a: Boolean): Boolean = a
}
object AbstractOp {
val buffer: Array[Any] = Array(42, 21, true)
val ops: Array[Op[_]] = Array(Count, Count, Exist)
def main(args: Array[String]): Unit = {
for (i <- 0 until buffer.size) {
buffer(i) = ops(i)(buffer(i))
}
println(buffer.mkString("[", ",", "]"))
}
}
Since it's asInstanceOf everywhere anyway, it does not make the code any less safe than what you had previously.
Update
If you cannot change the Op interface, then invoking cast and compute is a bit more cumbersome, but still possible:
trait Op[A] {
def cast(a: Any) : A = a.asInstanceOf[A]
def compute(a: A) : A
}
case object Count extends Op[Int] {
override def compute(a: Int): Int = a + 1
}
case object Exist extends Op[Boolean] {
override def compute(a: Boolean): Boolean = a
}
object AbstractOp {
val buffer: Array[Any] = Array(42, 21, true)
val ops: Array[Op[_]] = Array(Count, Count, Exist)
def main(args: Array[String]): Unit = {
for (i <- 0 until buffer.size) {
buffer(i) = ops(i) match {
case op: Op[t] => op.compute(op.cast(buffer(i)))
}
}
println(buffer.mkString("[", ",", "]"))
}
}
Note the ops(i) match { case op: Opt[t] => ... } part with a type-parameter in the pattern: this allows us to make sure that cast returns a t that is accepted by compute.
As a more general solution than Andrey Tyukin's, you can define the method outside Op, so it works even if Op can't be modified:
def apply[A](op: Op[A], x: Any) = op.compute(op.cast(x))
buffer(0) = apply(ops(0), buffer(0))
Say I have this situation
class Pipe {
var vel = 3.4
var V = 300
var a = 10.2
var in = ???
var TotV = V+in
var out = TotV*a/vel
}
val pipe1 = new Pipe
val pipe2 = new Pipe
The in variable is were my problem is, what i'd like to do is get the out variable from pipe1 and feed that in as the in variable for pipe 2 effectively to join the two pipes but I cant figure out if this is even possible in the same class. So I can do it manually but need to know if its possible to do in the class.
pipe2.in = pipe1.out
my attempted fix was to add an ID field then try and use that to reference an instance with a higher id field but that doesnt seem doable. ie
class Pipe(id:Int) {
var vel = 3.4
var V = 300
var a = 10.2
var in = Pipe(id+1).out //this is the sticking point, I want to reference instances of this class and use their out value as in value for instances with a lower ID
var TotV = V+in
var out = TotV*a/vel
}
any help would be appreciated
You can do this by defining a companion object for the class and passing in the upstream pipe as an optional parameter to the factory method, then extracting its in value and passing it to the class constructor, as follows:
object Pipe {
def apply(upstreamPipe: Option[Pipe]): Pipe = {
val inValue = upstreamPipe match {
case Some(pipe) => pipe.out
case None => 0 // or whatever your default value is
new Pipe(inValue)
}
You would then call
val pipe1 = Pipe(None)
val pipe2 = Pipe(Some(pipe1))
Unfortunately your question is not clear now. Under certain assumptions what you describe looks like what is now called "FRP" aka "Functional Reactive Programming". If you want to do it in a serious way, you probably should take a look at some mature library such as RxScala or Monix that handle many important in the real world details such as error handling or scheduling/threading and many others.
For a simple task you might roll out a simple custom implementation like this:
trait Observable {
def subscribe(subscriber: Subscriber): RxConnection
}
trait RxConnection {
def disconnect(): Unit
}
trait Subscriber {
def onChanged(): Unit
}
trait RxOut[T] extends Observable {
def currentValue: Option[T]
}
class MulticastObservable extends Observable with Subscriber {
private val subscribers: mutable.Set[Subscriber] = mutable.HashSet()
override def onChanged(): Unit = subscribers.foreach(s => s.onChanged())
override def subscribe(subscriber: Subscriber): RxConnection = {
subscribers.add(subscriber)
new RxConnection {
override def disconnect(): Unit = subscribers.remove(subscriber)
}
}
}
abstract class BaseRxOut[T](private var _lastValue: Option[T]) extends RxOut[T] {
private val multicast = new MulticastObservable()
protected def lastValue: Option[T] = _lastValue
protected def lastValue_=(value: Option[T]): Unit = {
_lastValue = value
multicast.onChanged()
}
override def currentValue: Option[T] = lastValue
override def subscribe(subscriber: Subscriber): RxConnection = multicast.subscribe(subscriber)
}
class RxValue[T](initValue: T) extends BaseRxOut[T](Some(initValue)) {
def value: T = this.lastValue.get
def value_=(value: T): Unit = {
this.lastValue = Some(value)
}
}
trait InputConnector[T] {
def connectInput(input: RxOut[T]): RxConnection
}
class InputConnectorImpl[T] extends BaseRxOut[T](None) with InputConnector[T] {
val inputHolder = new RxValue[Option[(RxOut[T], RxConnection)]](None)
private def updateValue(): Unit = {
lastValue = for {inputWithDisconnect <- inputHolder.value
value <- inputWithDisconnect._1.currentValue}
yield value
}
override def connectInput(input: RxOut[T]): RxConnection = {
val current = inputHolder.value
if (current.exists(iwd => iwd._1 == input))
current.get._2
else {
current.foreach(iwd => iwd._2.disconnect())
inputHolder.value = Some(input, input.subscribe(() => this.updateValue()))
updateValue()
new RxConnection {
override def disconnect(): Unit = {
if (inputHolder.value.exists(iwd => iwd._1 == input)) {
inputHolder.value.foreach(iwd => iwd._2.disconnect())
inputHolder.value = None
updateValue()
}
}
}
}
}
}
abstract class BaseRxCalculation[Out] extends BaseRxOut[Out](None) {
protected def registerConnectors(connectors: InputConnectorImpl[_]*): Unit = {
connectors.foreach(c => c.subscribe(() => this.recalculate()))
}
private def recalculate(): Unit = {
var newValue = calculateOutput()
if (newValue != lastValue) {
lastValue = newValue
}
}
protected def calculateOutput(): Option[Out]
}
case class RxCalculation1[In1, Out](func: Function1[In1, Out]) extends BaseRxCalculation[Out] {
private val conn1Impl = new InputConnectorImpl[In1]
def conn1: InputConnector[In1] = conn1Impl // show to the outer world only InputConnector
registerConnectors(conn1Impl)
override protected def calculateOutput(): Option[Out] = {
for {v1 <- conn1Impl.currentValue}
yield func(v1)
}
}
case class RxCalculation2[In1, In2, Out](func: Function2[In1, In2, Out]) extends BaseRxCalculation[Out] {
private val conn1Impl = new InputConnectorImpl[In1]
def conn1: InputConnector[In1] = conn1Impl // show to the outer world only InputConnector
private val conn2Impl = new InputConnectorImpl[In2]
def conn2: InputConnector[In2] = conn2Impl // show to the outer world only InputConnector
registerConnectors(conn1Impl, conn2Impl)
override protected def calculateOutput(): Option[Out] = {
for {v1 <- conn1Impl.currentValue
v2 <- conn2Impl.currentValue}
yield func(v1, v2)
}
}
// add more RxCalculationN if needed
And you can use it like this:
def test(): Unit = {
val pipe2 = new RxCalculation1((in: Double) => {
println(s"in = $in")
val vel = 3.4
val V = 300
val a = 10.2
val TotV = V + in
TotV * a / vel
})
val in1 = new RxValue(2.0)
println(pipe2.currentValue)
val conn1 = pipe2.conn1.connectInput(in1)
println(pipe2.currentValue)
in1.value = 3.0
println(pipe2.currentValue)
conn1.disconnect()
println(pipe2.currentValue)
}
which prints
None
in = 2.0
Some(905.9999999999999)
in = 3.0
Some(909.0)
None
Here your "pipe" is RxCalculation1 (or other RxCalculationN) which wraps a function and you can "connect" and "disconnect" other "pipes" or just "values" to various inputs and start a chain of updates.
I have Kryo-serialized binary data stored on S3 (thousands of serialized objects).
Alpakka allows to read the content as data: Source[ByteString, NotUsed]. But Kryo format doesn't use delimiters so I can't split each serialized object into a separate ByteString using data.via(Framing.delimiter(...)).
So, Kryo actually needs to read the data to understand when an object ends, and it doesn't look streaming-friendly.
Is it possible to implement this case in streaming fashion so that I get Source[MyObject, NotUsed] in the end of the day?
Here is a graph stage that does that. It handles the case when a serialized object spans two byte strings. It needs to be improved when objects are large (not my use case) and can take more than two byte strings in Source[ByteString, NotUsed].
object KryoReadStage {
def flow[T](kryoSupport: KryoSupport,
`class`: Class[T],
serializer: Serializer[_]): Flow[ByteString, immutable.Seq[T], NotUsed] =
Flow.fromGraph(new KryoReadStage[T](kryoSupport, `class`, serializer))
}
final class KryoReadStage[T](kryoSupport: KryoSupport,
`class`: Class[T],
serializer: Serializer[_])
extends GraphStage[FlowShape[ByteString, immutable.Seq[T]]] {
override def shape: FlowShape[ByteString, immutable.Seq[T]] = FlowShape.of(in, out)
override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = {
new GraphStageLogic(shape) {
setHandler(in, new InHandler {
override def onPush(): Unit = {
val bytes =
if (previousBytes.length == 0) grab(in)
else ByteString.fromArrayUnsafe(previousBytes) ++ grab(in)
Managed(new Input(new ByteBufferBackedInputStream(bytes.asByteBuffer))) { input =>
var position = 0
val acc = ListBuffer[T]()
kryoSupport.withKryo { kryo =>
var last = false
while (!last && !input.eof()) {
tryRead(kryo, input) match {
case Some(t) =>
acc += t
position = input.total().toInt
previousBytes = EmptyArray
case None =>
val bytesLeft = new Array[Byte](bytes.length - position)
val bb = bytes.asByteBuffer
bb.position(position)
bb.get(bytesLeft)
last = true
previousBytes = bytesLeft
}
}
push(out, acc.toList)
}
}
}
private def tryRead(kryo: Kryo, input: Input): Option[T] =
try {
Some(kryo.readObject(input, `class`, serializer))
} catch {
case _: KryoException => None
}
})
setHandler(out, new OutHandler {
override def onPull(): Unit = {
pull(in)
}
})
private val EmptyArray: Array[Byte] = Array.empty
private var previousBytes: Array[Byte] = EmptyArray
}
}
override def toString: String = "KryoReadStage"
private lazy val in: Inlet[ByteString] = Inlet("KryoReadStage.in")
private lazy val out: Outlet[immutable.Seq[T]] = Outlet("KryoReadStage.out")
}
Example usage:
client.download(BucketName, key)
.via(KryoReadStage.flow(kryoSupport, `class`, serializer))
.flatMapConcat(Source(_))
It uses some additional helpers below.
ByteBufferBackedInputStream:
class ByteBufferBackedInputStream(buf: ByteBuffer) extends InputStream {
override def read: Int = {
if (!buf.hasRemaining) -1
else buf.get & 0xFF
}
override def read(bytes: Array[Byte], off: Int, len: Int): Int = {
if (!buf.hasRemaining) -1
else {
val read = Math.min(len, buf.remaining)
buf.get(bytes, off, read)
read
}
}
}
Managed:
object Managed {
type AutoCloseableView[T] = T => AutoCloseable
def apply[T: AutoCloseableView, V](resource: T)(op: T => V): V =
try {
op(resource)
} finally {
resource.close()
}
}
KryoSupport:
trait KryoSupport {
def withKryo[T](f: Kryo => T): T
}
class PooledKryoSupport(serializers: (Class[_], Serializer[_])*) extends KryoSupport {
override def withKryo[T](f: Kryo => T): T = {
pool.run(new KryoCallback[T] {
override def execute(kryo: Kryo): T = f(kryo)
})
}
private val pool = {
val factory = new KryoFactory() {
override def create(): Kryo = {
val kryo = new Kryo
(KryoSupport.ScalaSerializers ++ serializers).foreach {
case ((clazz, serializer)) =>
kryo.register(clazz, serializer)
}
kryo
}
}
new KryoPool.Builder(factory).softReferences().build()
}
}
I want something like this:
private val cachedResponse = mutable.Option.empty[A]
def get: A = cachedResponse getOrElseUpdate db.findModel()
def update: Unit = {
db.updateModel
cachedResponse.empty() // set it to None/Option.empty
}
I am not looking for a generic HashMap based memoization like this. I tried implementing it using a var Option[A] but it did not look very idiomatic to me:
private var cachedResponse: Option[A] = None
def get: A = cachedResponse getOrElse {
cachedResponse = Option(db.findModel())
cachedResponse.get
}
def update: Unit = {
db.updateModel
cachedResponse = None
}
There isn't one built into the standard library.
Using a var containing an immutable Option is the idiomatic way to do it (assuming you can't rewrite this to not use state at all).
Otherwise, you should build your own. Here's the core of an implementation:
class MutableOpt[A] {
private[this] var myValue: A = _
private[this] var loaded = false
private def valueEquals(o: Any) = myValue == o
def get = if (loaded) myValue else throw new NoSuchElementException("MutableOpt")
def set(a: A): this.type = { loaded = true; myValue = a; this }
def getOrSet(a: => A): A = {
if (!loaded) {
myValue = a
loaded = true
}
myValue
}
def isEmpty = !loaded
def nonEmpty = loaded
def foreach[U](f: A => U): Unit = if (loaded) f(myValue)
def transform(f: A => A): this.type = { if (loaded) myValue = f(myValue); this }
def clear(): this.type = { loaded = false; this }
def toOption = if (loaded) Some(myValue) else None
override def toString = if (loaded) "MutableOpt("+myValue.toString+")" else "MutableOpt()"
override def hashCode = if (loaded) myValue.hashCode else 1751
override def equals(o: Any) = o match {
case m: MutableOpt[_] =>
(isEmpty && m.isEmpty) || (nonEmpty && m.nonEmpty && m.valueEquals(myValue))
case _ => false
}
}
object MutableOpt {
def from[A](o: Option[A]) = {
val m = new MutableOpt[A]
o match {
case Some(a) => m set a
case _ =>
}
m
}
}
Define together with :paste if using the REPL.