Get whole HttpResponse body as a String with Akka-Streams HTTP - scala

I'm trying to understand how to use the new akka.http library. I would like to send an http request to a server and read the whole response body as a single String in order to produce a Source[String,?].
Here is the best solution I was able to produce so far:
def get(
modelID: String,
pool: Flow[(HttpRequest,Int),(Try[HttpResponse],Int),Http.HostConnectionPool]
): Source[String,Unit] = {
val uri = reactionsURL(modelID)
val req = HttpRequest(uri = uri)
Source.single( (req,0) )
.via( pool )
.map {
case (Success(resp),_) =>
resp.entity.dataBytes.map( _.decodeString("utf-8") )
}.flatten(FlattenStrategy.concat)
.grouped( 1024 )
.map( _.mkString )
It seems to work well (except the missing error path), but it is a bit clunky for such simple tasks. Is there a smarter solution ? Can I avoid the grouped/mkString ?

You can use toStrict method of HttpResponse with timeout. It gathers whole answer as Future.
def toStrict(timeout: FiniteDuration)(implicit ec: ExecutionContext, fm: Materializer): Future[Strict] Returns a sharable and serializable
copy of this message with a strict entity.
Example:
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpResponse, HttpRequest}
import akka.stream.{Materializer, ActorMaterializer}
import akka.stream.scaladsl.{Sink, Flow, Source}
import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration._
import scala.util.{Try, Success}
object Main extends App {
implicit val system = ActorSystem()
import system.dispatcher
implicit val materializer = ActorMaterializer()
val host = "127.0.0.1"
lazy val pool = Http().newHostConnectionPool[Int](host, 9000)
FlowBuilder.get("/path", pool).to(Sink.foreach(_.foreach(println))).run()
}
object FlowBuilder {
def get(modelID: String, pool: Flow[(HttpRequest, Int), (Try[HttpResponse], Int), Http.HostConnectionPool])
(implicit ec: ExecutionContext, mat: Materializer): Source[Future[String], Unit] = {
val uri = modelID
val req = HttpRequest(uri = modelID)
Source.single((req, 0)).via(pool)
.map {
case (Success(resp), _) => resp.entity.toStrict(5 seconds).map(_.data.decodeString("UTF-8"))
}
}
}

You can use Unmarshall which will also work on other types e.g. json from spray-json. This also as strict returns Future[_].
Example:
authedReq.via(authServerReqResFlow).mapAsync(1) { case (tryRes, _) =>
tryRes match {
case Failure(exception) => Future.failed[Principal](exception)
case Success(response # HttpResponse(StatusCodes.OK,_,_,_)) =>
val userContext = Unmarshal(response).to[UserContextData]
userContext.map {
case UserContextData(UserInfo(_, userName, fullName, email, title), _, _) =>
Principal(userName, fullName, email, title)
}
case Success(response # HttpResponse(responseCode,_,entity,_)) =>
Unmarshal(entity).to[String].flatMap(msg => Future.failed(new AuthenticationFailure(s"$responseCode\n$msg")))
}
}

Related

Slick Futures converted into Promises

I am starting to develop in Scala, so I started witha really simple RESTful API using AKKA HTTP actors and then wanted to add a PostgreSQL database to "close up" the project. The thing is that somewhere in the project, a Future that is returned by a db.run method is converted into a Promise and returning me errors. When I run the Main object and start the API and hit somewhere, I get this error:
Cannot cast scala.concurrent.impl.Promise$DefaultPromise to scala.collection.immutable.Seq or Cannot cast scala.concurrent.impl.Promise$DefaultPromise to api.Item depending on which route I an hitting.
Here is the main api.scala file:
package api
import akka.actor.{Actor, ActorSystem, Props}
import akka.http.scaladsl.Http
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
import akka.http.scaladsl.model.StatusCodes
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.Route
import akka.pattern.ask
import akka.util.Timeout
import api.Main.ItemActor._
import slick.jdbc.JdbcBackend.Database
import spray.json.DefaultJsonProtocol._
import scala.concurrent.Future
import scala.concurrent.duration.DurationInt
import scala.util.{Failure, Success}
object Main extends App {
val db = Database.forConfig("scaladb");
val itemDao = new handler(db)
val system = ActorSystem("mySystem")
val itemActor = system.actorOf(Props(new ItemActor(db)))
implicit val actorSystem = system
implicit val itemFormat = jsonFormat3(Item)
implicit val timeout: Timeout = Timeout(5.seconds)
class ItemActor(db: Database) extends Actor {
import api.Main.ItemActor._
def receive = {
case CreateItem(item) =>
sender() ! itemDao.create(item)
case ReadItem(id) =>
sender() ! itemDao.read(id)
case ReadAllItems =>
sender() ! itemDao.readAll
case UpdateItem(item) =>
sender() ! itemDao.update(item)
case DeleteItem(id) =>
sender() ! itemDao.delete(id)
}
}
object ItemActor {
case class CreateItem(item: Item)
case class ReadItem(id: Int)
case object ReadAllItems
case class UpdateItem(item: Item)
case class DeleteItem(id: Int)
}
def handleResponse(futureResponse: Future[Item]): Route = {
onComplete(futureResponse) {
case Success(response) => complete(response)
case Failure(ex) => complete(StatusCodes.InternalServerError, s"An error occurred: ${ex.getMessage}")
}
}
def handleResponseSeq(futureResponse: Future[Seq[Item]]): Route = {
onComplete(futureResponse) {
case Success(response) => complete(response)
case Failure(ex) => complete(StatusCodes.InternalServerError, s"An error occurred: ${ex.getMessage}")
}
}
val routes = pathPrefix("items") {
pathEnd {
post {
entity(as[Item]) { item =>
handleResponse((itemActor ? CreateItem(item)).mapTo[Item])
}
} ~
get {
handleResponseSeq((itemActor ? ReadAllItems).mapTo[Seq[Item]])
}
} ~
path(IntNumber) { id =>
get {
handleResponse((itemActor ? ReadItem(id)).mapTo[Item])
} ~
put {
entity(as[Item]) { item =>
handleResponse((itemActor ? UpdateItem(item)).mapTo[Item])
}
} ~
delete {
handleResponse((itemActor ? DeleteItem(id)).mapTo[Item])
}
}
}
val bindRoutes = Http().bindAndHandle(routes, "localhost", 8888)
println("Server online at http://localhost:8888/")
}
Then the handler (Where I definde the methods that access the PostgreSQL database):
package api
import slick.jdbc.PostgresProfile.api._
import scala.concurrent.Future
class handler (db:Database){
val items = TableQuery[Items]
def create(item:Item): Future[Item] = {
db.run((items returning items.map(_.id.?) into ((item, id) => item.copy(id = id))) += item)
}
def read(id: Int): Future[Option[Item]] = {
db.run(items.filter(_.id === id).result.headOption)
}
def readAll: Future[Seq[Item]] = {
println((db.run(items.result)).getClass)
db.run(items.result)
}
def update(item: Item): Future[Int] = {
db.run(items.filter(_.id === item.id).update(item))
}
def delete(id: Int): Future[Int] = {
db.run(items.filter(_.id === id).delete)
}
}
And the items file:
package api
import slick.jdbc.PostgresProfile.api._
case class Item(id: Option[Int] = None, name: String, description: String)
class Items(tag: Tag) extends Table[Item](tag, "items") {
def id = column[Int]("id", O.PrimaryKey, O.AutoInc)
def name = column[String]("name")
def description = column[String]("description")
def * = (id.?, name, description) <> (Item.tupled, Item.unapply)
}
I've tried to use a getClass next to the db.run(items.result) in the handler file, and it prits class scala.concurrent.impl.Promise$DefaultPromise so it must be something of an implicit converter. Thanks.
You're mixing Futures and actors, which is generally not a great idea.
In your ItemActor, instead of sending the future as a reply, it's a better idea to pipe the future as a reply (the reply won't actually happen until the future is complete, that is to say, the DAO has a result).
import akka.pattern.pipe
class ItemActor(db: Database) extends Actor {
import ItemActor._
import context.dispatcher
def receive = {
case CreateItem(item) =>
itemDao.create(item).pipeTo(sender())
case ReadItem(id) =>
itemDao.read(id).pipeTo(sender())
}
}
That said, at least in this code, there doesn't really seem to be a good reason for ItemActor to exist, given that it's just forwarding operations to the DAO. Making the itemDao visible in the routes, you could just as well do:
handleResponse(itemDao.create(item))
Here: handleResponse((itemActor ? CreateItem(item)).mapTo[Item])
Actor returns Future[Item], mapTo[Item] tries to cast it to item and fails.
You want your actor to return the actual item, not Future result from db.run.
I haven't used akka in a while, but I think, something like this should work:
val replyTo = sender
...
case CreateItem(item) => itemDao.create(item).onComplete {
case Success(i) => replyTo ! i
case Failure(e) => throw e
}
...

Adding retry to future sequence for running Databricks notebooks in parallel in Scala

I use the below code from Databricks itself on how to run its notebook in parallel in Scala, https://docs.databricks.com/notebooks/notebook-workflows.html#run-multiple-notebooks-concurrently . I am trying to add retry feature where if one of the notebooks in the sequence failed, it will retry that notebook based on the retry value I passed to it.
Here is the parallel notebook code from Databricks:
//parallel notebook code
import scala.concurrent.{Future, Await}
import scala.concurrent.duration._
import scala.util.control.NonFatal
case class NotebookData(path: String, timeout: Int, parameters: Map[String, String] = Map.empty[String, String])
def parallelNotebooks(notebooks: Seq[NotebookData]): Future[Seq[String]] = {
import scala.concurrent.{Future, blocking, Await}
import java.util.concurrent.Executors
import scala.concurrent.ExecutionContext
import com.databricks.WorkflowException
val numNotebooksInParallel = 5
// If you create too many notebooks in parallel the driver may crash when you submit all of the jobs at once.
// This code limits the number of parallel notebooks.
implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(numNotebooksInParallel))
val ctx = dbutils.notebook.getContext()
Future.sequence(
notebooks.map { notebook =>
Future {
dbutils.notebook.setContext(ctx)
if (notebook.parameters.nonEmpty)
dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
else
dbutils.notebook.run(notebook.path, notebook.timeout)
}
.recover {
case NonFatal(e) => s"ERROR: ${e.getMessage}"
}
}
)
}
This is an example of how I am calling the above code to run multiple examples notebooks:
import scala.concurrent.Await
import scala.concurrent.duration._
import scala.language.postfixOps
val notebooks = Seq(
NotebookData("Notebook1", 0, Map("client"->client)),
NotebookData("Notebook2", 0, Map("client"->client))
)
val res = parallelNotebooks(notebooks)
Await.result(res, 3000000 seconds) // this is a blocking call.
res.value
Here is one attempt. Since your code does not compile, I inserted a few dummy classes.
Also, you did not fully specify the desired behavior, so I made some assumptions. Only five retries will be made for each connection. If any of the Futures are still failing after five retries, then the entire Future is failed. Both of these behaviors can be changed, but since you did not specify, I am not sure what it is you want.
If you have questions or would like me to make an alteration to the program, let me know in the comments section.
object TestNotebookData extends App{
//parallel notebook code
import scala.concurrent.{Future, Await}
import scala.concurrent.duration._
import scala.util.control.NonFatal
case class NotebookData(path: String, timeout: Int, parameters: Map[String, String] = Map.empty[String, String])
case class Context()
case class Notebook(){
def getContext(): Context = Context()
def setContext(ctx: Context): Unit = ()
def run(path: String, timeout: Int, paramters: Map[String, String] = Map()): Seq[String] = Seq()
}
case class Dbutils(notebook: Notebook)
val dbutils = Dbutils(Notebook())
def parallelNotebooks(notebooks: Seq[NotebookData]): Future[Seq[Seq[String]]] = {
import scala.concurrent.{Future, blocking, Await}
import java.util.concurrent.Executors
import scala.concurrent.ExecutionContext
// This code limits the number of parallel notebooks.
implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(numNotebooksInParallel))
val ctx = dbutils.notebook.getContext()
val isRetryable = true
val retries = 5
def runNotebook(notebook: NotebookData): Future[Seq[String]] = {
def retryWrapper(retry: Boolean, current: Int, max: Int): Future[Seq[String]] = {
val fut = Future {runNotebookInner}
if (retry && current < max) fut.recoverWith{ _ => retryWrapper(retry, current + 1, max)}
else fut
}
def runNotebookInner() = {
dbutils.notebook.setContext(ctx)
if (notebook.parameters.nonEmpty)
dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
else
dbutils.notebook.run(notebook.path, notebook.timeout)
}
retryWrapper(isRetryable, 0, retries)
}
Future.sequence(
notebooks.map { notebook =>
runNotebook(notebook)
}
)
}
val notebooks = Seq(
NotebookData("Notebook1", 0, Map("client"->"client")),
NotebookData("Notebook2", 0, Map("client"->"client"))
)
val res = parallelNotebooks(notebooks)
Await.result(res, 3000000 seconds) // this is a blocking call.
res.value
}
I found this to work:
import scala.util.{Try, Success, Failure}
def tryNotebookRun (path: String, timeout: Int, parameters: Map[String, String] = Map.empty[String, String]): Try[Any] = {
Try(
if (parameters.nonEmpty){
dbutils.notebook.run(path, timeout, parameters)
}
else{
dbutils.notebook.run(path, timeout)
}
)
}
//parallel notebook code
import scala.concurrent.{Future, Await}
import scala.concurrent.duration._
import scala.util.control.NonFatal
def runWithRetry(path: String, timeout: Int, parameters: Map[String, String] = Map.empty[String, String], maxRetries: Int = 2) = {
var numRetries = 0
while (numRetries < maxRetries){
tryNotebookRun(path, timeout, parameters) match {
case Success(_) => numRetries = maxRetries
case Failure(_) => numRetries = numRetries + 1
}
}
}
case class NotebookData(path: String, timeout: Int, parameters: Map[String, String] = Map.empty[String, String])
def parallelNotebooks(notebooks: Seq[NotebookData]): Future[Seq[Any]] = {
import scala.concurrent.{Future, blocking, Await}
import java.util.concurrent.Executors
import scala.concurrent.ExecutionContext
import com.databricks.WorkflowException
val numNotebooksInParallel = 5
// If you create too many notebooks in parallel the driver may crash when you submit all of the jobs at once.
// This code limits the number of parallel notebooks.
implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(numNotebooksInParallel))
val ctx = dbutils.notebook.getContext()
Future.sequence(
notebooks.map { notebook =>
Future {
dbutils.notebook.setContext(ctx)
runWithRetry(notebook.path, notebook.timeout, notebook.parameters)
}
.recover {
case NonFatal(e) => s"ERROR: ${e.getMessage}"
}
}
)
}

How to create akka-http cachedHostConnectionPool for https requests in scala?

I can't figure out how to create cachedHostConnectionPool in akka-http using scala for sending https requests. queueRequest(HttpRequest(uri = "https://example.com") sends a request to http, cachedHostConnectionPool[Promise[HttpResponse]]("https://example.com") throws an error that : isn't expected character.
import scala.util.{ Failure, Success }
import scala.concurrent.{ Future, Promise }
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model._
import akka.stream.ActorMaterializer
import akka.stream.scaladsl._
import akka.stream.{ OverflowStrategy, QueueOfferResult }
implicit val system = ActorSystem()
import system.dispatcher // to get an implicit ExecutionContext into scope
implicit val materializer = ActorMaterializer()
val QueueSize = 10
// This idea came initially from this blog post:
// http://kazuhiro.github.io/scala/akka/akka-http/akka-streams/2016/01/31/connection-pooling-with-akka-http-and-source-queue.html
val poolClientFlow = Http().cachedHostConnectionPool[Promise[HttpResponse]]("example.com")
val queue =
Source.queue[(HttpRequest, Promise[HttpResponse])](QueueSize, OverflowStrategy.dropNew)
.via(poolClientFlow)
.toMat(Sink.foreach({
case ((Success(resp), p)) => p.success(resp)
case ((Failure(e), p)) => p.failure(e)
}))(Keep.left)
.run()
def queueRequest(request: HttpRequest): Future[HttpResponse] = {
val responsePromise = Promise[HttpResponse]()
queue.offer(request -> responsePromise).flatMap {
case QueueOfferResult.Enqueued => responsePromise.future
case QueueOfferResult.Dropped => Future.failed(new RuntimeException("Queue overflowed. Try again later."))
case QueueOfferResult.Failure(ex) => Future.failed(ex)
case QueueOfferResult.QueueClosed => Future.failed(new RuntimeException("Queue was closed (pool shut down) while running the request. Try again later."))
}
}
val responseFuture: Future[HttpResponse] = queueRequest(HttpRequest(uri = "/"))
It seems like scala version supports only plain host names while in java you can provide a protocol too (from their tests):
http.cachedHostConnectionPool("akka.io", materializer());
http.cachedHostConnectionPool("https://akka.io", materializer());
http.cachedHostConnectionPool("https://akka.io:8080", materializer());
Any known workarounds?
You have to use cachedHostConnectionPoolHttps instead:
val poolClientFlow = Http().cachedHostConnectionPoolHttps[Promise[HttpResponse]]("example.com")

Scala, Sangria and Scalatra

We have a Scala application using Scalatra (http://scalatra.org/) as our web framework. I'm wondering if there are any good (or just any) resources out there on how to implement a GraphQL endpoint using Sangria (http://sangria-graphql.org/) and Scalatra?
I'm new to Scala and would appreciate any help to get started on this.
There aren't any that I know of but since Scalatra uses json4s you would use sangria's json4s marshaller .
Otherwise, if sangria could be clearer to you, here's a scala worksheet with a very simplistic example based off play + sangria - in this case you would just need to swap the json library.
The db is mocked (perhaps you use Slick?) and the http server as well but it's a simple case of swapping in the function definitions.
import sangria.ast.Document
import sangria.execution.{ErrorWithResolver, Executor, QueryAnalysisError}
import sangria.macros.derive.{ObjectTypeDescription, ObjectTypeName, deriveObjectType}
import sangria.parser.{QueryParser, SyntaxError}
import sangria.renderer.SchemaRenderer
import sangria.schema.{Argument, Field, IntType, ListType, ObjectType, OptionInputType, Schema, fields}
import scala.concurrent.Await
import scala.concurrent.duration._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.util.{Failure, Success}
// replace with another json lib
// eg https://github.com/sangria-graphql/sangria-json4s-jackson
import play.api.libs.json._
import sangria.marshalling.playJson._
case class User(name: String, age: Int, phone: Option[String])
class FakeDb {
class UsersTable {
def getUsers(limit: Int): List[User] = {
// this would come from the db
List(
User("john smith", 23, None),
User("Anne Schwazenbach", 45, Some("2134556"))
)
}
}
val usersRepo = new UsersTable
}
object MySchema {
val limitArg: Argument[Int] = Argument("first", OptionInputType(IntType),
description = s"Returns the first n elements from the list.",
defaultValue = 10)
implicit val UsersType: ObjectType[FakeDb, User] = {
deriveObjectType[FakeDb, User](
ObjectTypeName("Users"),
ObjectTypeDescription("Users in the system")
)
}
private val Query: ObjectType[FakeDb, Unit] = ObjectType[FakeDb, Unit](
"Query", fields[FakeDb, Unit](
Field("users", ListType(UsersType),
arguments = limitArg :: Nil,
resolve = c => c.ctx.usersRepo.getUsers(c.arg(limitArg))
)
))
val theSchema: Schema[FakeDb, Unit] = Schema(Query)
}
object HttpServer {
def get(): String = {
// Http GET
SchemaRenderer.renderSchema(MySchema.theSchema)
}
def post(query: String): Future[JsValue] = {
// Http POST
val variables = None
val operation = None
QueryParser.parse(query) match {
case Success(q) => executeQuery(q, variables, operation)
case Failure(error: SyntaxError) => Future.successful(Json.obj("error" -> error.getMessage))
case Failure(error: Throwable) => Future.successful(Json.obj("error" -> error.getMessage))
}
}
private def executeQuery(queryAst: Document, vars: Option[JsValue], operation: Option[String]): Future[JsValue] = {
val schema: Schema[FakeDb, Unit] = MySchema.theSchema
Executor.execute[FakeDb, Unit, JsValue](schema, queryAst, new FakeDb,
operationName = operation,
variables=vars.getOrElse(Json.obj()))
.map((d: JsValue) => d)
.recover {
case error: QueryAnalysisError ⇒ Json.obj("error" -> error.getMessage)
case error: ErrorWithResolver ⇒ Json.obj("error" -> error.getMessage)
}
}
}
HttpServer.get()
val myquery = """
{
users {
name
}
}
"""
val res: JsValue = Await.result(HttpServer.post(myquery), 10.seconds)

How to check if Scala Spray Bind actually binds to port

From the Scala Spray documentation it is not clear how to check if it is not able to bind to a particular port
implicit val system = ActorSystem("mediaiton")
implicit val timeout = Timeout(5, TimeUnit.SECONDS)
val service = system.actorOf(Props[IotRestNB], "mediaiton")
println(s"Going to start the REST NB at $host $port")
IO(Http) ! Http.Bind(service, interface = host, port = port)
Spend a day trying to figure out from different post
import java.util.concurrent.TimeUnit
import akka.actor.{ActorSystem, Props}
import akka.io.IO
import akka.util.Timeout
import nb.rest.IotRestNB
import spray.can.Http
implicit val system = ActorSystem("lwm2m-mediaiton")
implicit val timeout = Timeout(5, TimeUnit.SECONDS)
val service = system.actorOf(Props[IotClassNB], "lwm2m-mediaiton")
println(s"Going to start the REST NB at $host $port")
IO(Http).tell(Http.Bind(service, interface = host, port = port), sender = service)
Now the actor Class - IotClassNB
import java.util.concurrent.Executors
import akka.actor.Actor
import lwm2m.server.BootUp
import org.eclipse.leshan.core.request.ContentFormat
import spray.can.Http._
import spray.http.MediaTypes
import spray.routing.HttpService
import scala.concurrent.{ExecutionContext, Future}
class IotClassNBextends Actor with MediationRoute {
//mixin class
def actorRefFactory = context
def receive = handleConnection orElse runRoute(route)
def handleConnection: Receive = {
case b: Bound =>
println("***REST Server Started***")
Future.successful(b)
case failed: CommandFailed =>
println("***REST Server Could not be Started***")//this is what we want
Future.failed(new
RuntimeException("Binding failed"))
}
}
trait MediationRoute extends HttpService {
// Execution Context for blocking ops
val blockingExecutionContext = {
ExecutionContext.fromExecutor(Executors.newFixedThreadPool(10))
}
val route = {
pathPrefix("v1") {
pathPrefix("mediation") {
respondWithMediaType(MediaTypes.`application/json`) {
pathPrefix("get_connected_clients") {
pathEnd {
get {
complete(
// Future.apply {
get_registered_clients())
// }(blockingExecutionContext))
}
}.....
And here is how to test your Spary server via Spray Client
#Test def test_RestNB(): Unit = {
implicit val system = ActorSystem("test")
import system.dispatcher
val pipeline: HttpRequest => Future[HttpResponse] = sendReceive
implicit val timeout = Timeout(25, TimeUnit.SECONDS)
val server_url = s"http://${host}:${port}/xxx/"
val response: Future[HttpResponse] = pipeline(Get(server_url))
val result = Await.result(response, timeout.duration) //wait for timeout
// println(s"Await Result is ${result.entity.asString}")
response.onComplete {
case Success(result: HttpResponse) =>
logger.info("Result: " + result.entity.asString)
assert(result.entity.asString === xxxxx")
case Failure(error) =>
logger.error(error + "Couldn't get list of items")
case _ =>
assert(false)
}