Akka HTTP - max-open-requests and substreams? - scala

I'm writing an app using Scala 2.13 with Akka HTTP 10.2.4 and Akka Stream 2.6.15. I'm trying to query a web service in a parallel manner, like so:
package com.example
import akka.actor.typed.scaladsl.ActorContext
import akka.http.scaladsl.Http
import akka.http.scaladsl.client.RequestBuilding.Get
import akka.http.scaladsl.model.HttpResponse
import akka.http.scaladsl.unmarshalling.Unmarshal
import akka.stream.scaladsl.{Flow, JsonFraming, Sink, Source}
import spray.json.DefaultJsonProtocol
import spray.json.DefaultJsonProtocol.jsonFormat2
import scala.util.Try
case class ClientStockPortfolio(id: Long, symbol: String)
case class StockTicker(symbol: String, price: Double)
trait SprayFormat extends DefaultJsonProtocol {
implicit val stockTickerFormat = jsonFormat2(StockTicker)
}
class StockTrader(context: ActorContext[_]) extends SprayFormat {
implicit val system = context.system.classicSystem
val httpPool = Http().superPool()[Seq[ClientStockPortfolio]]
def collectPrices() = {
val src = Source(Seq(
ClientStockPortfolio(1, "GOOG"),
ClientStockPortfolio(2, "AMZN"),
ClientStockPortfolio(3, "MSFT")
)
)
val graph = src
.groupBy(8, _.id % 8)
.via(createPost)
.via(httpPool)
.via(decodeTicker)
.mergeSubstreamsWithParallelism(8)
.to(
Sink.fold(0.0) { (totalPrice, ticker) =>
insertIntoDatabase(ticker)
totalPrice + ticker.price
}
)
graph.run()
}
def createPost = Flow[ClientStockPortfolio]
.grouped(10)
.map { port =>
(
Get(uri = s"http://wherever/?symbols=${port.map(_.symbol).mkString(",")}"),
port
)
}
def decodeTicker = Flow[(Try[HttpResponse], Seq[ClientStockPortfolio])]
.flatMapConcat { x =>
x._1.get.entity.dataBytes
.via(JsonFraming.objectScanner(Int.MaxValue))
.mapAsync(4)(bytes => Unmarshal(bytes).to[StockTicker])
.mapConcat { ticker =>
lookupPreviousPrices(ticker)
}
}
def lookupPreviousPrices(ticker: StockTicker): List[StockTicker] = ???
def insertIntoDatabase(ticker: StockTicker) = ???
}
I have two questions. First, will the groupBy call that splits the stream into substreams run them in parallel like I want? And second, when I call this code, I run into the max-open-requests error, since I haven't increased the setting from the default. But even if I am running in parallel, I'm only running 8 threads - how is the Http().superPool() getting backed up with 32 requests?

Related

scala akka http type mismatch

I created a project with open api then added a endpoint GET /product which is supposed to return a product( i am testing it so I just want it to return any product).
When I run the project I get the following error.
[error] found : org.openapitools.server.model.Product.type
[error] required: (?, ?, ?) => ?
[error] def toEntityMarshallerProduct: ToEntityMarshaller[Product] = jsonFormat3(Product)
the logs point at Product inside jsonFormat3
I noticed it's caused by the number of properties of the Product Model, if I reduce them to 3, it works ! this is weird! does anyone know how to resolve this?
this is the product model file
package org.openapitools.server.model
final case class Product (
id: Int,
name: String,
isAvailable: Boolean,
description: String,
category: String
)
this is the productAPI file
package org.openapitools.server.api
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.Route
import akka.http.scaladsl.model.StatusCodes
import akka.http.scaladsl.marshalling.ToEntityMarshaller
import akka.http.scaladsl.unmarshalling.FromEntityUnmarshaller
import akka.http.scaladsl.unmarshalling.FromStringUnmarshaller
import org.openapitools.server.AkkaHttpHelper._
import org.openapitools.server.model.Product
class ProductApi(
productService: ProductApiService,
productMarshaller: ProductApiMarshaller
) {
import productMarshaller._
lazy val route: Route =
path("product" / "all") {
get {
productService.productAllGet()
}
}
}
trait ProductApiService {
def productAllGet200(responseProduct: Product)(implicit toEntityMarshallerProduct: ToEntityMarshaller[Product]): Route =
complete((200, responseProduct))
def productAllGet()(implicit toEntityMarshallerProduct: ToEntityMarshaller[Product]): Route
}
trait ProductApiMarshaller {
implicit def toEntityMarshallerProduct: ToEntityMarshaller[Product]
}
and this is the main file
import akka.actor.typed.{ActorSystem, ActorRef}
import akka.actor.typed.scaladsl.Behaviors
import akka.http.scaladsl.Http
import akka.http.scaladsl.server.Route
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.model.StatusCodes
// for JSON serialization/deserialization following dependency is required:
// "com.typesafe.akka" %% "akka-http-spray-json" % AkkaHttpVersion
import akka.http.scaladsl.marshalling.ToEntityMarshaller
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
import spray.json.DefaultJsonProtocol._
import scala.io.StdIn
import akka.util.Timeout
import scala.concurrent.duration._
import scala.concurrent.{ ExecutionContext, Future }
import org.openapitools.server.api._
import org.openapitools.server.model._
object Main extends App {
// needed to run the route
implicit val system = ActorSystem(Behaviors.empty, "product")
// implicit val materializer = ActorMaterializer()
// needed for the future map/flatmap in the end and future in fetchItem and saveOrder
implicit val executionContext = system.executionContext
object DefaultMarshaller extends ProductApiMarshaller {
def toEntityMarshallerProduct: ToEntityMarshaller[Product] = jsonFormat3(Product)
}
object DefaultService extends ProductApiService {
def productAllGet() (implicit toEntityMarshallerProduct: ToEntityMarshaller[Product]) : Route = {
val reponse = Future {
Product(1,"product",false,"desc","pizza")
}
requestcontext => {
(reponse).flatMap {
(product: Product) =>
productAllGet200(product)(toEntityMarshallerProduct)(requestcontext)
}
}
}
}
val api = new ProductApi(DefaultService, DefaultMarshaller)
val host = "localhost"
val port = 3005
val bindingFuture = Http().newServerAt(host, port).bind(pathPrefix("api"){api.route})
println(s"Server online at http://${host}:${port}/\nPress RETURN to stop...")
bindingFuture.failed.foreach { ex =>
println(s"${ex} Failed to bind to ${host}:${port}!")
}
StdIn.readLine() // let it run until user presses return
bindingFuture
.flatMap(_.unbind()) // trigger unbinding from the port
.onComplete(_ => system.terminate()) // and shutdown when done
}
I noticed it's caused by the number of properties of the Product Model, if I reduce them to 3, it works ! this is weird! does anyone know how to resolve this?
try it with using a Data Transfer Object (DTO) which will provide by your api and a domain model object which is handled internally.
so you will use jsonFormat3 for your DTO and have no need to jsonFormat5.
a simple mapper or helper method inner of case classes provide a smooth integration of that.
for example:
final case class Product (
id: Int,
name: String,
isAvailable: Boolean,
description: String,
category: String
) {
def toDto: ProductDTO = ProductDTO(prop0, prop1, prop2)
}
final case class ProductDTO (
prop0: X,
prop1: Y,
prop2: Z
) {
def toDomain(propA: X, propB: Y): Product = Product(id, name, isAvailable, description, category)
}
object ProductSupport extends SprayJsonSupport with DefaultJsonProtocol {
implicit val productFormat = json3Format(ProductDTO)
}

scala (spark) zio convert future to zio

My objective is to run a number of spark ml regression models (1000s of times) on one dataset and I want to do this using zio instead of future, because it is running too slow. Below is the working example of using Future.
A distinct list of keys is used to filter the partitioned dataset on key and run the model on. I've set up a thread pool with 8 executors to manage it, but it quickly degrades in performance.
import scala.concurrent.{Await, ExecutionContext, ExecutionContextExecutorService, Future}
import java.util.concurrent.{Executors, TimeUnit}
import scala.concurrent.duration._
import org.apache.spark.sql.SaveMode
val pool = Executors.newFixedThreadPool(8)
implicit val xc: ExecutionContextExecutorService = ExecutionContext.fromExecutorService(pool)
case class Result(key: String, coeffs: String)
try {
import spark.implicits._
val tasks = {
for (x <- keys)
yield Future {
Seq(
Result(
x.group,
runModel(input.filter(col("group")===x)).mkString(",")
)
).toDS()
.write.mode(SaveMode.Overwrite).option("header", false).csv(
s"hdfs://namenode:8020/results/$x.csv"
)
}
}.toSeq
Await.result(Future.sequence(tasks), Duration.Inf)
}
finally {
pool.shutdown()
pool.awaitTermination(Long.MaxValue, TimeUnit.NANOSECONDS)
}
I've tried to implement this in zio, but I don't know how to implement queues and set a limit of executors like in futures.
Below is my failed attempt so far...
import zio._
import zio.console._
import zio.stm._
import org.apache.spark.sql.{Dataset, SaveMode, SparkSession}
import org.apache.spark.sql.functions.col
//example data/signatures
case class ModelResult(key: String, coeffs: String)
case class Data(key: String, sales: Double)
val keys: Array[String] = Array("100_1", "100_2")
def runModel[T](ds: Dataset[T]): Vector[Double]
object MyApp1 extends App {
val spark = SparkSession
.builder()
.getOrCreate()
import spark.implicits._
val input: Dataset[Data] = Seq(Data("100_1", 1d), Data("100_2", 2d)).toDS
def run(args: List[String]): ZIO[ZEnv, Nothing, Int] = {
for {
queue <- Queue.bounded[Int](8)
_ <- ZIO.foreach(1 to 8) (i => queue.offer(i)).fork
_ <- ZIO.foreach(keys) { k => queue.take.flatMap(_ => readWrite(k, input, queue)) }
} yield 0
}
def writecsv(k: String, v: String) = {
Seq(ModelResult(k, v))
.toDS
.write
.mode(SaveMode.Overwrite).option("header", value = false)
.csv(s"hdfs://namenode:8020/results/$k.csv")
}
def readWrite[T](key: String, ds: Dataset[T], queue: Queue[Int]): ZIO[ZEnv, Nothing, Int] = {
(for {
result <- runModel(ds.filter(col("key")===key)).mkString(",")
_ <- writecsv(key, result)
_ <- queue.offer(1)
_ <- putStrLn(s"successfully wrote output for $key")
} yield 0)
}
}
//to run
MyApp1.run(List[String]())
What is the best way to deal with compute this in zio?
To parallelize some workload across, say, 8 threads all you need is
ZIO.foreachParN(8)(1 to 100)(id => zio.blocking.blocking(Task{yourClusterJob(id)}))
But don't expect lots of a boost by switching from Futures to ZIO here:
1) Actual workload dominates coordination overhead so difference between ZIO and Future should be marginal.
2) Maybe you won't get any boost at all because 8 tasks will be fighting for the same resource pool in the Spark cluster.

items fail to be processed in Akka streams app that uses Source.queues and Sink.queues in a flow

I am trying to create an (Akka HTTP) stream procsesing flow using the classes akka.stream.scaladsl.Source and Sink queues.
I am using a queue because I have a processing step in my flow that issues http requests and I want this step to take as many
items off the queue as there are max-open-requests, and stop taking off the queue once max-open-requests are in flight.
The result is that backpressure is applied when my connection pool is overloaded.
Below, I have a very simplified test that reflects the main logic of my app. In the test 'Stress Spec' (below)
I am simulating a number of simultaneous connections via which I will send a 'Source' of 'Requesto' objects
to the getResponses method of the class ServiceImpl.
In the processing step 'pullOffSinkQueue' you will note that I am incrementing a counter to see how many items
I have pulled off the queue.
The test will send Serviceimpl a set of requests whose cardinality is set to equal
streamedRequestsPerConnection * numSimultaneousConnections.
When I send 20 requests my test passes fine. In particular the count of requests pulled off the
Sink.queue will be equal to the number of requests I send out. However, if
I increase the number of requests I send to above 50 or so, I see consistent failures in the test.
I get a message such as the one below
180 was not equal to 200
ScalaTestFailureLocation: com.foo.StressSpec at (StressSpec.scala:116)
Expected :200
Actual :180
<Click to see difference>
This indicates that the number of items pulled off the queue does not equal the number of items put on the queue.
I have a feeling this might be due to the fact that my test is not properly waiting for all items put into the stream
to be processed. If anyone has any suggestions, I'd be all ears ! Code is below.
package com.foo
import java.util.concurrent.atomic.AtomicInteger
import akka.stream.ActorAttributes.supervisionStrategy
import akka.stream.{Attributes, Materializer, QueueOfferResult}
import akka.stream.Supervision.resumingDecider
import akka.stream.scaladsl.{Flow, Keep, Sink, Source}
import scala.concurrent.{ExecutionContext, Future}
import akka.NotUsed
import akka.actor.ActorSystem
import akka.event.{Logging, LoggingAdapter}
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{Sink, Source}
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{FunSuite, Matchers}
import scala.collection.immutable
import scala.concurrent.duration._
import scala.concurrent.{Await, Future, _}
final case class Responso()
final case class Requesto()
object Handler {
val dbRequestCounter = new AtomicInteger(0)
}
class Handler(implicit ec: ExecutionContext, mat: Materializer) {
import Handler._
private val source =
Source.queue[(Requesto, String)](8, akka.stream.OverflowStrategy.backpressure)
private val sink =
Sink.queue[(Requesto, String)]().withAttributes(Attributes.inputBuffer(8, 8))
private val (sourceQueue, sinkQueue) = source.toMat(sink)(Keep.both).run()
def placeOnSourceQueue(ar: Requesto): Future[QueueOfferResult] = {
sourceQueue.offer((ar, "foo"))
}
def pullOffSinkQueue(qofr: QueueOfferResult): Future[Responso] = {
dbRequestCounter.incrementAndGet()
qofr match {
case QueueOfferResult.Enqueued =>
sinkQueue.pull().flatMap { maybeRequestPair: Option[(Requesto, String)] =>
Future.successful(Responso())
}
case error =>
println("enqueuing error: " + error)
Future.failed(new RuntimeException("enqueuing error: " + error))
}
}
}
class ServiceImpl(readHandler: Handler, writeHandler: Handler)
(implicit log: LoggingAdapter, mat: Materializer) {
private val readAttributeFlow: Flow[Requesto, Responso, NotUsed] = {
Flow[Requesto]
.mapAsyncUnordered(1)(readHandler.placeOnSourceQueue)
.mapAsyncUnordered(1)(readHandler.pullOffSinkQueue)
}
def getResponses(request: Source[Requesto, NotUsed]): Source[Responso, NotUsed] =
request
.via(readAttributeFlow)
.withAttributes(supervisionStrategy(resumingDecider))
}
class StressSpec
extends FunSuite
with MockitoSugar
with Matchers {
val streamedRequestsPerConnection = 10
val numSimultaneousConnections = 20
implicit val actorSystem: ActorSystem = ActorSystem()
implicit val materializer: ActorMaterializer = ActorMaterializer()
implicit val log: LoggingAdapter = Logging(actorSystem.eventStream, "test")
implicit val ec: ExecutionContext = actorSystem.dispatcher
import Handler._
lazy val requestHandler = new Handler()
lazy val svc: ServiceImpl =
new ServiceImpl(requestHandler, requestHandler)
test("can handle lots of simultaneous read requests") {
val totalExpected = streamedRequestsPerConnection * numSimultaneousConnections
def sendRequestAndAwaitResponse(): Unit = {
def getResponses(i: Integer) = {
val requestStream: Source[Requesto, NotUsed] =
Source(1 to streamedRequestsPerConnection)
.map { i =>
Requesto()
}
svc.getResponses(requestStream).runWith(Sink.seq)
}
val responses: immutable.Seq[Future[immutable.Seq[Responso]]] =
(1 to numSimultaneousConnections).map { getResponses(_) }
val flattenedResponses: Future[immutable.Seq[Responso]] =
Future.sequence(responses).map(_.flatten)
Await.ready(flattenedResponses, 1000.seconds).value.get
}
sendRequestAndAwaitResponse()
dbRequestCounter.get shouldBe(totalExpected)
}
}

Scala compiler can't find the unmarshalling implicits in route declaration

I'm trying to build a REST server using this tutorial:
https://spindance.com/reactive-rest-services-akka-http/
However, having reached the "Responding with JSON" section, I've noticed that my code doesn't compile, and I can't make POST requests. This is the error that I'm getting:
Error:(59, 18) could not find implicit value for parameter um: akka.http.scaladsl.unmarshalling.FromRequestUnmarshaller[Health]
entity(as[Health]) { statusReport =>
^
Having looked at other tutorials, I've found out that you need to include an object containing an implicit variable for the class that I'm trying to unmarshall. I did that, and I even imported the httpx library, but I'm still getting this error. My code is given below.
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.StatusCodes
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.Route
import akka.stream.ActorMaterializer
import akka.pattern.ask
import akka.util.Timeout
import spray.json._
import DefaultJsonProtocol._
import spray.httpx.SprayJsonSupport.sprayJsonUnmarshaller
import scala.concurrent.duration._
import scala.io.StdIn
object JsonImplicits extends DefaultJsonProtocol {
implicit val healthFormat = jsonFormat2(Health)
}
object MyApplication {
val host = "localhost"
val port = 8080
def main(args: Array[String]): Unit = {
implicit val system = ActorSystem("simple-rest-system")
// Something to do with flows
implicit val materializer = ActorMaterializer()
// A reference to a specific thread pool
// You can configure thread pool options through it
// It is the engine that executes the actors
implicit val executionContext = system.dispatcher
val requestHandler = system.actorOf(RequestHandler.props(), "requestHandler")
//Define the route
val route : Route = {
implicit val timeout = Timeout(20 seconds)
import JsonImplicits._
import spray.httpx.SprayJsonSupport._
path("health") {
get {
onSuccess(requestHandler ? GetHealthRequest) {
case response: HealthResponse =>
complete(StatusCodes.OK, s"Everything is ${response.health.status}!")
case _ =>
complete(StatusCodes.InternalServerError)
}
}
} ~ post {
// Entity extracts the body of the POST request and then converts it into a
// Health object
entity(as[Health]) { statusReport =>
onSuccess(requestHandler ? SetStatusRequest(statusReport)) {
case response: HealthResponse =>
complete(StatusCodes.OK,s"Posted health as ${response.health.status}!")
case _ =>
complete(StatusCodes.InternalServerError)
}
}
}
}
//Start up and listen for requests
val bindingFuture = Http().bindAndHandle(route, host, port)
println(s"Waiting for requests at http://$host:$port/...\nHit RETURN to terminate")
StdIn.readLine()
//Shutdown
bindingFuture.flatMap(_.unbind())
system.terminate()
}
}

Stubbing SOAP requests in Scala

I use scalaxb to generate models and client part of the SOAP interface. For testing I use Betamax, which can also be used in Scala. However, scalaxb uses Netty as a transport, which ignores proxy settings set up by Betamax. How would you cope with this situation?
scalaxb uses cake pattern, so the service is built from 3 parts like in the following example:
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent._
import scala.concurrent.duration._
val service = (new stockquote.StockQuoteSoap12Bindings with
scalaxb.SoapClientsAsync with
scalaxb.DispatchHttpClientsAsync {}).service
val fresponse = service.getQuote(Some("GOOG"))
val response = Await.result(fresponse, 5 seconds)
println(response)
And tests:
import co.freeside.betamax.{TapeMode, Recorder}
import co.freeside.betamax.proxy.jetty.ProxyServer
import dispatch._
import org.scalatest.{Tag, FunSuite}
import scala.concurrent.duration._
import scala.concurrent.{Await, Future}
class StockquoteSpec extends FunSuite with Betamax {
testWithBetamax("stockquote", Some(TapeMode.READ_WRITE))("stockquote") {
val fresponse = service.getQuote(Some("GOOG"))
val response = Await.result(fresponse, 5 seconds)
println(response)
}
}
trait Betamax {
protected def test(testName: String, testTags: Tag*)(testFun: => Unit)
def testWithBetamax(tape: String, mode: Option[TapeMode] = None)(testName: String, testTags: Tag*)(testFun: => Unit) = {
test(testName, testTags: _*) {
val recorder = new Recorder
val proxyServer = new ProxyServer(recorder)
recorder.insertTape(tape)
recorder.getTape.setMode(mode.getOrElse(recorder.getDefaultMode()))
proxyServer.start()
try {
testFun
} finally {
recorder.ejectTape()
proxyServer.stop()
}
}
}
}
Versions:
net.databinder.dispatch 0.11.2
co.freeside.betamax 1.1.2
com.ning.async-http-client 1.8.10
io.netty.netty 3.9.2.Final
It is indeed possible to use proxy with Netty. Although Netty does not read system properties for proxy settings, the settings can be injected using ProxyServerSelector. It is created in build method of AsyncHttpClientConfig:
if (proxyServerSelector == null && useProxySelector) {
proxyServerSelector = ProxyUtils.getJdkDefaultProxyServerSelector();
}
if (proxyServerSelector == null && useProxyProperties) {
proxyServerSelector = ProxyUtils.createProxyServerSelector(System.getProperties());
}
if (proxyServerSelector == null) {
proxyServerSelector = ProxyServerSelector.NO_PROXY_SELECTOR;
}
The only obstacle is that scalaxb uses default config with useProxyProperties=false. You can override it with custom MyDispatchHttpClientsAsync that you can use when creating the service:
val service = (new stockquote.StockQuoteSoap12Bindings with
scalaxb.SoapClientsAsync with
MyDispatchHttpClientsAsync {}).service
And the source code of MyDispatchHttpClientsAsync (the key point is calling setUseProxyProperties(true)):
import com.ning.http.client.providers.netty.NettyAsyncHttpProvider
import com.ning.http.client.{AsyncHttpClientConfig, AsyncHttpClient}
import scalaxb.HttpClientsAsync
/**
* #author miso
*/
trait MyDispatchHttpClientsAsync extends HttpClientsAsync {
lazy val httpClient = new DispatchHttpClient {}
trait DispatchHttpClient extends HttpClient {
import dispatch._, Defaults._
// Keep it lazy. See https://github.com/eed3si9n/scalaxb/pull/279
lazy val http = new Http(new AsyncHttpClient(new NettyAsyncHttpProvider(new AsyncHttpClientConfig.Builder().setUseProxyProperties(true).build())))
// lazy val http = Http.configure(_.setUseProxyProperties(true)) // Maybe later. See https://github.com/eed3si9n/scalaxb/issues/312
def request(in: String, address: java.net.URI, headers: Map[String, String]): concurrent.Future[String] = {
val req = url(address.toString).setBodyEncoding("UTF-8") <:< headers << in
http(req > as.String)
}
}
}