I have publisher with stream of messages. And i should send for consumers only messages which are based on consumer subsribers.
In my local is working for 2 tread without load, but on load testing is not working, many messages goes in not right way.
I have 2 versions of problem:
I new in akka stream and don't see in doc a little bit same case, joining 2 streams
I use not in right way scala tread safe options
What it can be?
import akka._
import akka.http.scaladsl.model.http2.PeerClosedStreamException
import akka.stream._
import akka.stream.scaladsl._
import scala.collection.mutable
import scala.concurrent.Future
import scala.util.matching.Regex
class RtkBrokerImpl(materializer: Materializer) extends MessageBroker {
private implicit val mat: Materializer = materializer
val mutableArray = mutable.Map[String, Source[ConsumeRequest, NotUsed]]()
val (inboundHub: Sink[ProduceRequest, NotUsed], outboundHub: Source[ConsumeResponse, NotUsed]) =
MergeHub.source[ProduceRequest]
.async
.mapAsyncUnordered(100)(x => Future.successful(ConsumeResponse(x.key, x.payload)))
.toMat(BroadcastHub.sink)(Keep.both)
.run()
val all: RunnableGraph[Source[ProduceRequest, NotUsed]] =
MergeHub.source[ProduceRequest]
.toMat(BroadcastHub.sink)(Keep.right)
val queue = Sink.queue[ProduceRequest](100)
override def produce(in: Source[ProduceRequest, NotUsed]): Future[ProduceResponse] = {
in.to(inboundHub).run()
Future.never
}
override def consume(in: Source[ConsumeRequest, NotUsed]): Source[ConsumeResponse, NotUsed] = {
val patternRepo = new PatternsRepository
in.runForeach { req =>
req.action match {
case ConsumeRequest.Action.SUBSCRIBE => patternRepo.putPatterns(req.keys)
case ConsumeRequest.Action.UNSUBSCRIBE => patternRepo.dropPatterns(req.keys)
}
}
outboundHub.async.filter(res => patternRepo.checkKey(res.key))
}
}
class PatternsRepository {
import RtkBrokerImpl._
import scala.collection.JavaConverters._
val concurrentSet: mutable.Set[String] = java.util.concurrent.ConcurrentHashMap.newKeySet[String]().asScala
def getPatterns(): mutable.Set[String] = {
concurrentSet
}
def checkKey(key: String): Boolean = {
concurrentSet.contains(key) ||
concurrentSet.exists(x => isInPattern(key, x))
}
def dropPatterns(string: Seq[String]) = {
string.foreach(concurrentSet.remove)
}
def putPatterns(string: Seq[String]) = {
concurrentSet.addAll(string)
}
}
object RtkBrokerImpl {
def isInPattern(key: String, pattern: String): Boolean = {
if (pattern.exists(x => x == '#' || x == '*')) {
val splittedPatten = pattern.split(".")
val splittedKey = key.split(".")
if (!splittedPatten.contains("#")) {
if (splittedKey.size != splittedKey.size)
return false
splittedPatten.zip(splittedKey)
.forall { case (key, pat) => if (pat == "*") true else key == pat }
} else {
val regExp = pattern.replaceAll(".", "\\.")
.replaceAll("\\*", "[A-Za-z0-9]+")
.replaceAll("#", "\\S+")
new Regex(regExp).matches(key)
}
} else {
key == pattern
}
}
}
Related
Im trying to rewrite a httpclient through the java11 HttpClient in Scala
Here is my code:
import cats.effect._
import java.net.http._
import java.net.http.HttpResponse._
import java.net.http.HttpClient._
trait HttpClients[F[_]] {
def send(req: HttpRequest)(implicit F: Async[F]): F[HttpResponse[_]]
}
object HttpClients {
val client: HttpClient = HttpClient.newBuilder().followRedirects(Redirect.ALWAYS).build()
def newClient[F[_] : Async](): HttpClients[F] = new HttpClients[F] {
override def send(req: HttpRequest)(implicit F: Async[F]): F[HttpResponse[_]] = F.async { cb =>
val resp = client.sendAsync(req, BodyHandlers.ofString())
val s = resp.handle((res: HttpResponse[String], err: Throwable) => {
if (err == null)
cb(Right(res))
else
cb(Left(err))
})
s // TODO ?
// Type missmatch
// Required: F[Option[F[Unit]]]
// Found: Unit
}
}
}
the handle callback from this
I guess the error comes from here, but I don't know how to write next.
Then I make some change:
def newClient[F[_] : Async](): HttpClients[F] = new HttpClients[F] {
override def send(req: HttpRequest)(implicit F: Async[F]): F[HttpResponse[_]] = F.async[HttpResponse[_]] { cb =>
val s = Sync[F](F: Async[F]).delay {
val resp = client.sendAsync(req, BodyHandlers.ofString())
resp.handle((res: HttpResponse[String], err: Throwable) => {
if (err == null)
cb(Right(res))
else
cb(Left(err))
}).join()
}
F.delay(s.some)
}
}
This time, there is no error, but I don't know how to get the response's body
Thanks for your reply!
#OlegPyzhcov already provided insight in case you are using CE3, this answer is using CE2 in case that is what you wanted.
The first version of the code was correct, here is a full running example using Ammonite with some style improvements and ensuring a new client is created for each call and evaluation of newClient
// scala 2.13.5
import $ivy.`org.typelevel::cats-effect:2.5.0`
import cats.effect.{Async, IO}
import cats.syntax.all._
import java.net.URI
import java.net.http.{HttpClient, HttpRequest, HttpResponse}
trait HttpClients[F[_]] {
def send(req: HttpRequest): F[HttpResponse[String]]
}
object HttpClients {
def newClient[F[_]](implicit F: Async[F]): F[HttpClients[F]] =
F.delay {
HttpClient
.newBuilder
.followRedirects(HttpClient.Redirect.ALWAYS)
.build()
} map { client =>
new HttpClients[F] {
override def send(req: HttpRequest): F[HttpResponse[String]] =
F.async { cb =>
client.sendAsync(req, HttpResponse.BodyHandlers.ofString).handle {
(res: HttpResponse[String], err: Throwable) =>
if (err == null) cb(Right(res))
else cb(Left(err))
}
}
}
}
}
object Main {
private val request =
HttpRequest
.newBuilder
.GET
.uri(URI.create("https://stackoverflow.com/questions/tagged/scala?tab=Newest"))
.build()
private val program = for {
_ <- IO.delay(println("Hello, World!"))
client <- HttpClients.newClient[IO]
response <- client.send(request)
_ <- IO.delay(println(response))
_ <- IO.delay(println(response.body))
} yield ()
def run(): Unit = {
program.unsafeRunSync()
}
}
#main
def main(): Unit = {
Main.run()
}
I have a CSV file that I need to parse and do some action on every record. How do I use Free Monads with it? Currently, I'm loading the entire file into memory and would like to know if there is any better solution. Below is my program:
for {
reader <- F.getReader("my_file.csv")
csvRecords <- C.readCSV(reader)
_ <- I.processCSV(csvRecords)
_ <- F.close(reader)
} yield()
This code works for smaller files, but if I have very large files (over 1 GB), this wouldn't work very well. I'm using Commons CSV for reading the CSVRecords.
Looking into the code at your gist I think that the line with the comment is exactly the line you don't want at all:
object CSVIOInterpreter extends (CSVIO ~> Future) {
import scala.collection.JavaConverters._
override def apply[A](fa: CSVIO[A]): Future[A] = fa match {
case ReadCSV(reader) => Future.fromTry(Try {
CSVFormat.RFC4180
.withFirstRecordAsHeader()
.parse(reader)
.getRecords // Loads the complete file
.iterator().asScala.toStream
})
}
}
Just remove the whole getRecords line. CSVFormat.parse returns an instance of CSVParser which already implements Iterable<CSVRecord>. And the getRecords call is the only thing that force it to read the whole file.
Actually you can see CSVParser.getRecords implementation and it is
public List<CSVRecord> getRecords() throws IOException {
CSVRecord rec;
final List<CSVRecord> records = new ArrayList<>();
while ((rec = this.nextRecord()) != null) {
records.add(rec);
}
return records;
}
So it just materializes the whole file using this.nextRecord call which is obviously a more "core" part of the API.
So when I do a simplified version of your code without the getRecords call:
import cats._
import cats.free.Free
import java.io._
import org.apache.commons.csv._
import scala.collection.JavaConverters._
trait Action[A] {
def run(): A
}
object F {
import Free.liftF
case class GetReader(fileName: String) extends Action[Reader] {
override def run(): Reader = new FileReader(fileName)
}
case class CloseReader(reader: Reader) extends Action[Unit] {
override def run(): Unit = reader.close()
}
def getReader(fileName: String): Free[Action, Reader] = liftF(GetReader(fileName))
def close(reader: Reader): Free[Action, Unit] = liftF(CloseReader(reader))
}
object C {
import Free.liftF
case class ReadCSV(reader: Reader) extends Action[CSVParser] {
override def run(): CSVParser = CSVFormat.DEFAULT.parse(reader)
}
def readCSV(reader: Reader): Free[Action, CSVParser] = liftF(ReadCSV(reader))
}
object I {
import Free.liftF
case class ProcessCSV(parser: CSVParser) extends Action[Unit] {
override def run(): Unit = {
for (r <- parser.asScala)
println(r)
}
}
def processCSV(parser: CSVParser): Free[Action, Unit] = liftF(ProcessCSV(parser))
}
object Runner {
import cats.arrow.FunctionK
import cats.{Id, ~>}
val runner = new (Action ~> Id) {
def apply[A](fa: Action[A]): Id[A] = fa.run()
}
def run[A](free: Free[Action, A]): A = {
free.foldMap(runner)
}
}
def test() = {
val free = for {
// reader <- F.getReader("my_file.csv")
reader <- F.getReader("AssetsImportCompleteSample.csv")
csvRecords <- C.readCSV(reader)
_ <- I.processCSV(csvRecords)
_ <- F.close(reader)
} yield ()
Runner.run(free)
}
it seems to work OK in line-by-line mode.
Here how I use the CSV file to read and do some operation on that -
I use scala.io.Source.fromFile()
I create one case class of the type of header of CSV file to make the data more accessible and operational.
PS: I don't have knowledge of monads, as well as I am in beginner in Scala. I posted this as it may be helpful.
case class AirportData(id:Int, ident:String, name:String, typeAirport:String, latitude_deg:Double,
longitude_deg:Double, elevation_ft:Double, continent:String, iso_country:String, iso_region:String,
municipality:String)
object AirportData extends App {
def toDoubleOrNeg(s: String): Double = {
try {
s.toDouble
} catch {
case _: NumberFormatException => -1
}
}
val source = scala.io.Source.fromFile("resources/airportData/airports.csv")
val lines = source.getLines().drop(1)
val data = lines.flatMap { line =>
val p = line.split(",")
Seq(AirportData(p(0).toInt, p(1).toString, p(2).toString, p(3).toString, toDoubleOrNeg(p(4)), toDoubleOrNeg(p(5)),
toDoubleOrNeg(p(6)), p(7).toString, p(8).toString, p(9).toString, p(10).toString))
}.toArray
source.close()
println(data.length)
data.take(10) foreach println
}
Is it possible to create stream of discrete events in fs2? if so how to do it.
I just started to play with the library and I know I have a lot to study. But I am not seeing any example related. e.g. I would like to create a stream for "mousemove" or "click" in scalajs or swing.
I am looking for something like in RxJS that I can use a Rx.Observable.create to create discrete events something like:
//note: pseudo code
var mouse = Rx.Observable.create( subscriber => {
document.body.addEventListener("mousemove", event =>{
subscriber.onNext(event)
})
} )
The equivalent in fs2 might not be so trivial but if anyone can suggest me how. I guess it would be using Handler and Pull/Push datatypes but I am far to understand how.
Cheers.
Here's an example I came up with which demonstrates how to use fs2 with JavaFX:
import cats.implicits._
import cats.effect._
import cats.effect.implicits._
import javafx.application.{Application, Platform}
import javafx.scene.{Node, Scene}
import javafx.scene.layout._
import javafx.stage.Stage
import fs2._
import fs2.concurrent._
import javafx.beans.value.WritableValue
import javafx.scene.control.{Label, TextField}
import javafx.scene.input.KeyEvent
import scala.concurrent.ExecutionContext
import scala.util.Try
class Fs2Ui extends Application {
override def start(primaryStage: Stage): Unit = {
implicit val cs: ContextShift[IO] = IO.contextShift(ExecutionContext.global)
implicit val timer: Timer[IO] = IO.timer(ExecutionContext.global)
new Logic[IO]().run(primaryStage).start.unsafeRunSync()
}
class Logic[F[_]: ConcurrentEffect: ContextShift: Timer] {
import Fs2Ui._
import java.time.{Duration, Instant}
import java.util.concurrent.TimeUnit.MILLISECONDS
def run(primaryStage: Stage): F[Unit] = for {
v <- initializeUi(primaryStage)
View(input, feedback) = v
_ <- Stream(input).covary[F]
.through(typedChars)
.through(processInput)
.through(displayFeedback(feedback.textProperty))
.compile.drain
} yield ()
private def initializeUi(primaryStage: Stage): F[View] = updateUi {
val input = new TextField()
input.setPrefWidth(300)
val feedback = new Label("...")
val vbox = new VBox(input, feedback)
val root = new StackPane(vbox)
val scene = new Scene(root)
primaryStage.setScene(scene)
primaryStage.show()
View(input, feedback)
}
private def processInput: Pipe[F, TypedChar, Feedback] = for {
typed <- _
_ <- Stream.eval(ContextShift[F].shift)
res <- Stream.eval { time(processSingle(typed)) }
(d, Feedback(str)) = res
} yield Feedback(s"$str in [$d]")
private def displayFeedback(value: WritableValue[String]): Pipe[F, Feedback, Unit] =
_.map { case Feedback(str) => str } through updateValue(value)
private def time[A](f: F[A]): F[(Duration, A)] = {
val now = Timer[F].clock.monotonic(MILLISECONDS).map(Instant.ofEpochMilli)
for {
start <- now
a <- f
stop <- now
d = Duration.between(start, stop)
} yield (d, a)
}
private val processSingle: TypedChar => F[Feedback] = {
import scala.util.Random
import scala.concurrent.duration._
val prng = new Random()
def randomDelay: F[Unit] = Timer[F].sleep { (250 + prng.nextInt(750)).millis }
c => randomDelay *> Sync[F].delay(Feedback(s"processed $c"))
}
}
}
object Fs2Ui {
case class View(input: TextField, feedback: Label)
case class TypedChar(value: String)
case class Feedback(value: String)
private def typedChars[F[_]: ConcurrentEffect]: Pipe[F, Node, TypedChar] = for {
node <- _
q <- Stream.eval(Queue.unbounded[F, KeyEvent])
_ <- Stream.eval(Sync[F].delay {
node.setOnKeyTyped { evt => (q enqueue1 evt).toIO.unsafeRunSync() }
})
keyEvent <- q.dequeue
} yield TypedChar(keyEvent.getCharacter)
private def updateValue[F[_]: Async, A](value: WritableValue[A]): Pipe[F, A, Unit] = for {
a <- _
_ <- Stream.eval(updateUi(value setValue a))
} yield ()
private def updateUi[F[_]: Async, A](action: => A): F[A] =
Async[F].async[A] { cb =>
Platform.runLater { () =>
cb(Try(action).toEither)
}
}
}
The specific parts that demonstrate bindings between fs2 and JavaFX are the two Pipes: typedChars and updateValue. Personally, I think, the most challenging part was adapting a KeyEvent listener to look like an fs2 Stream of events:
node.setOnKeyTyped { evt => (q enqueue1 evt).toIO.unsafeRunSync() }
When using Neo4j unmanaged extensions, one can stream results to the client while traversing the graph like this (in Scala):
import javax.ws.rs.core.{MediaType, Response, StreamingOutput}
val stream: StreamingOutput = ???
Response.ok().entity(stream).`type`(MediaType.APPLICATION_JSON).build()
I can't find a similar possibility when using Neo4j 3 used-defined stored procedures. They return Java 8 Streams but I can't see how I could add elements to such streams while they already being consumed, in parallel.
Is it possible?
I have an example of that in one of the APOC procedures.
https://github.com/neo4j-contrib/neo4j-apoc-procedures/blob/master/src/main/java/apoc/cypher/Cypher.java#L77
I want to add more / a more general example of that in the future.
Here is what I came up with based on Michael Hunger code (in Scala).
QueueBasedSpliterator:
import java.util.Spliterator
import java.util.concurrent.{BlockingQueue, TimeUnit}
import java.util.function.Consumer
import org.neo4j.kernel.api.KernelTransaction
private class QueueBasedSpliterator[T](queue: BlockingQueue[T],
tombstone: T,
tx: KernelTransaction) extends Spliterator[T] {
override def tryAdvance(action: Consumer[_ >: T]): Boolean =
try {
if (tx.shouldBeTerminated()) false
else {
val entry = queue.poll(100, TimeUnit.MILLISECONDS)
if (entry == null || entry == tombstone) false
else {
action.accept(entry)
true
}
}
} catch {
case e: InterruptedException => false
}
override def trySplit(): Spliterator[T] = null
override def estimateSize(): Long = Long.MaxValue
override def characteristics(): Int = Spliterator.ORDERED | Spliterator.NONNULL
}
Notice the 100 ms timeout value. Might require tuning.
ResultsStream (wrapper around blocking queue):
import java.util.concurrent.BlockingQueue
class ResultsStream[T](tombstone: T, queue: BlockingQueue[T]) extends AutoCloseable {
def put(t: T): Unit = {
queue.put(t)
}
override def close(): Unit = {
put(tombstone)
}
}
CommonUtil helper methods:
import java.util.concurrent.ArrayBlockingQueue
import java.util.stream.{Stream, StreamSupport}
import org.neo4j.kernel.api.KernelTransaction
import org.neo4j.kernel.internal.GraphDatabaseAPI
import scala.concurrent.{ExecutionContext, Future}
object CommonUtil {
def inTx(db: GraphDatabaseAPI)(f: => Unit): Unit =
Managed(db.beginTx()) { tx => f; tx.success() }
def inTxFuture(db: GraphDatabaseAPI)(f: => Unit)(implicit ec: ExecutionContext): Future[Unit] =
Future(inTx(db)(f))
def streamResults[T](tombstone: T, tx: KernelTransaction)
(f: ResultsStream[T] => Any): Stream[T] = {
val queue = new ArrayBlockingQueue[T](100)
f(new ResultsStream(tombstone, queue))
StreamSupport.stream(new QueueBasedSpliterator[T](queue, tombstone, tx), false)
}
}
Some more helpers:
object Managed {
type AutoCloseableView[T] = T => AutoCloseable
def apply[T : AutoCloseableView, V](resource: T)(op: T => V): V =
try {
op(resource)
} finally {
resource.close()
}
}
Pool:
import java.util.concurrent.{ArrayBlockingQueue, ThreadPoolExecutor, TimeUnit}
import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}
object Pool {
lazy val DefaultExecutionContent: ExecutionContextExecutor =
ExecutionContext.fromExecutor(createDefaultExecutor())
// values might be tuned in production
def createDefaultExecutor(corePoolSize: Int = Runtime.getRuntime.availableProcessors() * 2,
keepAliveSeconds: Int = 30) = {
val queueSize = corePoolSize * 25
new ThreadPoolExecutor(
corePoolSize / 2,
corePoolSize,
keepAliveSeconds.toLong,
TimeUnit.SECONDS,
new ArrayBlockingQueue[Runnable](queueSize),
new ThreadPoolExecutor.CallerRunsPolicy()
)
}
}
Usage in a procedure:
#Procedure("example.readStream")
def readStream(#Name("nodeId") nodeId: NodeId): Stream[StreamingItem] =
CommonUtil.streamResults(StreamingItem.Tombstone, kernelTx) { results =>
CommonUtil.inTxFuture(db) { // uses Pool.DefaultExecutionContent
Managed(results) { _ =>
graphUtil.findTreeNode(nodeId).foreach { node =>
// add elements to the stream here
results.put(???)
}
}
}
}
StreamingItem.Tombstone is just a static StreamingItem instance with special meaning to close the stream. db and kernelTx are just context variable set by Neo4j:
#Context
public GraphDatabaseAPI db;
#Context
public KernelTransaction kernelTx;
I have a function get: T => scala.concurrent.Future[T]
I want to iterates it like :
val futs: Iterator[Future[T]] = Iterator.iterate(get(init)){
_.flatMap(prev => get(prev))
}
But the type of Iterator is Future[T], it is not easy to process this iterator.
How could I transfer that to Process[?, T]
(Maybe T => Future[T] as context type F).
Not super nice solution, but works
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{Future => SFuture}
import scala.language.implicitConversions
import scalaz.concurrent.Task
import scalaz.stream._
implicit class Transformer[+T](fut: => SFuture[T]) {
def toTask(implicit ec: scala.concurrent.ExecutionContext): Task[T] = {
import scala.util.{Success, Failure}
import scalaz.syntax.either._
Task.async {
register =>
fut.onComplete {
case Success(v) => register(v.right)
case Failure(ex) => register(ex.left)
}
}
}
}
val init: Int = 0
def f(i: Int): SFuture[Int] = SFuture(i + 1)
val p = Process.repeatEval[Task, Int] {
var prev = init
f(prev).toTask.map(next => {prev = next; next})
}
println(p.take(10).runLog.run)
Assuming you know how to convert Future -> Task (either via implicit or via Process.transform) this shall work:
def get(t:T): Task[T] = ???
val initial : T = ???
val signal = scalaz.stream.async.signal[T]
// emit initial value, and follow by any change of `T` within the signal
val source:Process[Task,T] = eval_(signal.set(t)) fby signal.discrete
// sink to update `T` within the signal
val signalSink:Sink[Task,T] = constant((t:T) => signal.set(t))
// result, that esentially converts T => Task[T] into Process[Task,T]
val result: Process[Task,T] = source.observe(signalSink)
Finally I got what Pavel Chlupacek wanted to say. Signal looks cool, but a little bit cryptic for beginner.
import scala.concurrent.{Future => SFuture}
import scala.language.implicitConversions
import scalaz.concurrent.Task
import scalaz.stream._
import scala.concurrent.ExecutionContext.Implicits.global
implicit class Transformer[+T](fut: => SFuture[T]) {
def toTask(implicit ec: scala.concurrent.ExecutionContext): Task[T] = {
import scala.util.{Failure, Success}
import scalaz.syntax.either._
Task.async {
register =>
fut.onComplete {
case Success(v) => register(v.right)
case Failure(ex) => register(ex.left)
}
}
}
}
val init: Int = 0
def f(i: Int): SFuture[Int] = SFuture(i + 1)
val signal = scalaz.stream.async.signal[Int]
// Observe value and push them to signal
val signalSink: Process[Task, Int => Task[Unit]] = // =:= Sink[Task, Int]
Process.constant((input: Int) => signal.set(input))
// Start from init and then consume from signal
val result = (Process.eval(f(init).toTask) ++ signal.discrete.evalMap(i => f(i).toTask)) observe signalSink
println(result.take(10).runLog.run)
I made another solution
def iterate[F[_],A](init: A)(f: A => F[A]): Process[F, A] = {
Process.emit(init) ++ Process.await(f(init)) { next => iterate(next)(f)}
}
This is already an feature of scalaz-stream 0.6, see this pr for detail
Inorder to use scala.concurrent.Future as context type F
We need import scalaz.std.scalaFuture._ and an Catchable instance
implicit def futureCatchable(implicit ctx: ExecCtx): Catchable[Future] = {
new Catchable[Future] {
def attempt[A](f: Future[A]) = f.map(\/-(_)).recover { case e => -\/(e)}
def fail[A](err: Throwable) = Future.failed(err)
}
}
Finally I got this:
package stream
import scala.concurrent._
import scalaz._
import scalaz.stream._
package object future {
type ExecCtx = ExecutionContext
def iterate[F[_],A](init: A)(f: A => F[A]): Process[F, A] = {
Process.emit(init) ++ Process.await(f(init)) { next => iterate(next)(f)}
}
implicit def futureCatchable(implicit ctx: ExecCtx): Catchable[Future] = {
new Catchable[Future] {
def attempt[A](f: Future[A]) = f.map(\/-(_)).recover { case e => -\/(e)}
def fail[A](err: Throwable) = Future.failed(err)
}
}
}
object futureApp extends App {
import scalaz.Scalaz._
import future._
import scala.concurrent.ExecutionContext.Implicits.global
def get(i: Int) = Future {
println(i + 1)
i + 1
}
iterate(0)(get).takeWhile(_ < 100000).run
}