Related
I want to group large Stream[F, A] into Stream[Stream[F, A]] with at most n element for inner stream.
This is what I did, basically pipe chunks into Queue[F, Queue[F, Chunk[A]], and then yields queue elements as result stream.
implicit class StreamSyntax[F[_], A](s: Stream[F, A])(
implicit F: Concurrent[F]) {
def groupedPipe(
lastQRef: Ref[F, Queue[F, Option[Chunk[A]]]],
n: Int): Pipe[F, A, Stream[F, A]] = { in =>
val initQs =
Queue.unbounded[F, Option[Queue[F, Option[Chunk[A]]]]].flatMap { qq =>
Queue.bounded[F, Option[Chunk[A]]](1).flatMap { q =>
lastQRef.set(q) *> qq.enqueue1(Some(q)).as(qq -> q)
}
}
Stream.eval(initQs).flatMap {
case (qq, initQ) =>
def newQueue = Queue.bounded[F, Option[Chunk[A]]](1).flatMap { q =>
qq.enqueue1(Some(q)) *> lastQRef.set(q).as(q)
}
val evalStream = {
in.chunks
.evalMapAccumulate((0, initQ)) {
case ((i, q), c) if i + c.size >= n =>
val (l, r) = c.splitAt(n - i)
q.enqueue1(Some(l)) >> q.enqueue1(None) >> q
.enqueue1(None) >> newQueue.flatMap { nq =>
nq.enqueue1(Some(r)).as(((r.size, nq), c))
}
case ((i, q), c) if (i + c.size) < n =>
q.enqueue1(Some(c)).as(((i + c.size, q), c))
}
.attempt ++ Stream.eval {
lastQRef.get.flatMap { last =>
last.enqueue1(None) *> last.enqueue1(None)
} *> qq.enqueue1(None)
}
}
qq.dequeue.unNoneTerminate
.map(
q =>
q.dequeue.unNoneTerminate
.flatMap(Stream.chunk)
.onFinalize(
q.dequeueChunk(Int.MaxValue).unNoneTerminate.compile.drain))
.concurrently(evalStream)
}
}
def grouped(n: Int) = {
Stream.eval {
Queue.unbounded[F, Option[Chunk[A]]].flatMap { empty =>
Ref.of[F, Queue[F, Option[Chunk[A]]]](empty)
}
}.flatMap { ref =>
val p = groupedPipe(ref, n)
s.through(p)
}
}
}
But it is very complicated, is there any simpler way ?
fs2 has chunkN chunkLimit methods that can help with grouping
stream.chunkN(n).map(Stream.chunk)
stream.chunkLimit(n).map(Stream.chunk)
chunkN produces chunks of size n until the end of a stream
chunkLimit splits existing chunks and can produce chunks with variable size.
scala> Stream(1,2,3).repeat.chunkN(2).take(5).toList
res0: List[Chunk[Int]] = List(Chunk(1, 2), Chunk(3, 1), Chunk(2, 3), Chunk(1, 2), Chunk(3, 1))
scala> (Stream(1) ++ Stream(2, 3) ++ Stream(4, 5, 6)).chunkLimit(2).toList
res0: List[Chunk[Int]] = List(Chunk(1), Chunk(2, 3), Chunk(4, 5), Chunk(6))
In addition to the already mentioned chunksN, also consider using groupWithin (fs2 1.0.1):
def groupWithin[F2[x] >: F[x]](n: Int, d: FiniteDuration)(implicit timer: Timer[F2], F: Concurrent[F2]): Stream[F2, Chunk[O]]
Divide this streams into groups of elements received within a time window, or limited by the number of the elements, whichever happens first. Empty groups, which can occur if no elements can be pulled from upstream in a given time window, will not be emitted.
Note: a time window starts each time downstream pulls.
I'm not sure why you'd want this to be nested streams, since the requirement is to have "at most n elements" in one batch - which implies that you're keeping track of a finite number of elements (which is exactly what a Chunk is for). Either way, a Chunk can always be represented as a Stream with Stream.chunk:
val chunks: Stream[F, Chunk[O]] = ???
val streamOfStreams: Stream[F, Stream[F, O]] = chunks.map(Stream.chunk)
Here's a complete example of how to use groupWithin:
import cats.implicits._
import cats.effect.{ExitCode, IO, IOApp}
import fs2._
import scala.concurrent.duration._
object GroupingDemo extends IOApp {
override def run(args: List[String]): IO[ExitCode] = {
Stream('a, 'b, 'c).covary[IO]
.groupWithin(2, 1.second)
.map(_.toList)
.showLinesStdOut
.compile.drain
.as(ExitCode.Success)
}
}
Outputs:
List('a, 'b)
List('c)
Finally I use a more reliable version (use Hotswap ensure queue termination) like this.
def grouped(
innerSize: Int
)(implicit F: Async[F]): Stream[F, Stream[F, A]] = {
type InnerQueue = Queue[F, Option[Chunk[A]]]
type OuterQueue = Queue[F, Option[InnerQueue]]
def swapperInner(swapper: Hotswap[F, InnerQueue], outer: OuterQueue) = {
val innerRes =
Resource.make(Queue.unbounded[F, Option[Chunk[A]]])(_.offer(None))
swapper.swap(innerRes).flatTap(q => outer.offer(q.some))
}
def loopChunk(
gathered: Int,
curr: Queue[F, Option[Chunk[A]]],
chunk: Chunk[A],
newInnerQueue: F[InnerQueue]
): F[(Int, Queue[F, Option[Chunk[A]]])] = {
if (gathered + chunk.size > innerSize) {
val (left, right) = chunk.splitAt(innerSize - gathered)
curr.offer(left.some) >> newInnerQueue.flatMap { nq =>
loopChunk(0, nq, right, newInnerQueue)
}
} else if (gathered + chunk.size == innerSize) {
curr.offer(chunk.some) >> newInnerQueue.tupleLeft(
0
)
} else {
curr.offer(chunk.some).as(gathered + chunk.size -> curr)
}
}
val prepare = for {
outer <- Resource.eval(Queue.unbounded[F, Option[InnerQueue]])
swapper <- Hotswap.create[F, InnerQueue]
} yield outer -> swapper
Stream.resource(prepare).flatMap {
case (outer, swapper) =>
val newInner = swapperInner(swapper, outer)
val background = Stream.eval(newInner).flatMap { initQueue =>
s.chunks
.filter(_.nonEmpty)
.evalMapAccumulate(0 -> initQueue) { (state, chunk) =>
val (gathered, curr) = state
loopChunk(gathered, curr, chunk, newInner).tupleRight({})
}
.onFinalize(swapper.clear *> outer.offer(None))
}
val foreground = Stream
.fromQueueNoneTerminated(outer)
.map(i => Stream.fromQueueNoneTerminatedChunk(i))
foreground.concurrently(background)
}
}
I have the following issue
Given this list in input , I want to concateneate integers for each line having the same title,
val listIn= List("TitleB,Int,11,0",
"TitleB,Int,1,0",
"TitleB,Int,1,0",
"TitleB,Int,3,0",
"TitleA,STR,3,0",
"TitleC,STR,4,5")
I wrote the following function
def sumB(list: List[String]): List[String] = {
val itemPattern = raw"(.*)(\d+),(\d+)\s*".r
list.foldLeft(ListMap.empty[String, (Int,Int)].withDefaultValue((0,0))) {
case (line, stri) =>
val itemPattern(k,i,j) = stri
val (a, b) = line(k)
line.updated(k, (i.toInt + a, j.toInt + b))
}.toList.map { case (k, (i, j)) => s"$k$i,$j" }
}
Expected output would be:
List("TitleB,Int,16,0",
"TitleA,STR,3,0",
"TitleC,STR,4,5")
Since you are looking to preserve the order of the titles as they appear in the input data, I would suggest you to use LinkedHashMap with foldLeft as below
val finalResult = listIn.foldLeft(new mutable.LinkedHashMap[String, (String, String, Int, Int)]){ (x, y) => {
val splitted = y.split(",")
if(x.keySet.contains(Try(splitted(0)).getOrElse(""))){
val oldTuple = x(Try(splitted(0)).getOrElse(""))
x.update(Try(splitted(0)).getOrElse(""), (Try(splitted(0)).getOrElse(""), Try(splitted(1)).getOrElse(""), oldTuple._3+Try(splitted(2).toInt).getOrElse(0), oldTuple._4+Try(splitted(3).toInt).getOrElse(0)))
x
}
else {
x.put(Try(splitted(0)).getOrElse(""), (Try(splitted(0)).getOrElse(""), Try(splitted(1)).getOrElse(""), Try(splitted(2).toInt).getOrElse(0), Try(splitted(3).toInt).getOrElse(0)))
x
}
}}.mapValues(iter => iter._1+","+iter._2+","+iter._3+","+iter._4).values.toList
finalResult should be your desired output
List("TitleB,Int,16,0", "TitleA,STR,3,0", "TitleC,STR,4,5")
What is wrong with the following code snippet?
val loginInfoFuture: Future[LoginInfo] = credentialsProvider.authenticate(credentials)
for{loginInfo <- loginInfoFuture}{
println("in loginInfo future")
} yield Future{Ok(Json.toJson(JsonResultError("Invalid Body Type. Need Json")))}
I am seeing error in IDE - Error:(239, 17) ';' expected but 'yield' found.
} yield Ok(Json.toJson(JsonResultError("Invalid Body Type. Need Json")))
I tried a similar piece of code on REPL and that seem to work fine.
scala> import scala.concurrent.Future
import scala.concurrent.Future
scala> import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.ExecutionContext.Implicits.global
scala> val f:Future[Int] = Future{1}
f: scala.concurrent.Future[Int] = Future(Success(1))
scala> for(f1 <- f) yield f1
res0: scala.concurrent.Future[Int] = Future(<not completed>)
scala>
For reference, below is the full function
def signInUser = silhouette.UserAwareAction.async { implicit request => {
val body: AnyContent = request.body
val jsonBody: Option[JsValue] = body.asJson
jsonBody match {
case Some(json) => {
val userSignin: Option[UserSignin] = json.asOpt[UserSignin] //check if json conforms with UserProfile structure
userSignin match {
case Some(signinInfo) => { //format of JSON is correct
//Get signin info from JSON (email and password)
val credentials: Credentials = Credentials(signinInfo.signinInfo.email, signinInfo.signinInfo.password)
val authInfoRepository = new DelegableAuthInfoRepository(userRepo.passwordRepo)
val passwordHasherRegistory = new PasswordHasherRegistry(userRepo.passwordHasher)
val credentialsProvider = new CredentialsProvider(authInfoRepository, passwordHasherRegistory)
for{loginInfo <- loginInfoFuture}{ //for returns unit. Should use yield
println("in loginInfo future")
} yield Future{Ok(Json.toJson(JsonResultError("Invalid Body Type. Need Json")))}
}
case None => { //No signin info found
Future {
Ok(Json.toJson(JsonResultError("Invalid user. No Login info found")))
}
}
}
}
case None => {//NO Body
Future {
Ok(Json.toJson(JsonResultError("Invalid Body Type. Need Json")))
}
}
} //jsonBody match
}//async
}//def signin
for{loginInfo <- loginInfoFuture}{ //for returns unit. Should use yield
println("in loginInfo future")
} yield Future{Ok(Json.toJson(JsonResultError("Invalid Body Type. Need Json")))}
This is invalid. Your for/yield needs to be in the format:
for {
y <- z
x <- y
//etc
} yield {
//whatever
}
The println after the for but before the yield is throwing you. To get the result of the println inside the for/yield, you could to assign it to a value:
for {
y <- z
a = println(y) // will print out every y
x <- y
//etc
} yield {
//whatever
}
for/yield blocks are stupid like that. At least there are work-arounds though!
The following section is from scala's Future documentation:
def foo(): Unit = {
val f = Future { 5 }
val g = Future { 3 }
val h = for {
x: Int <- f // returns Future(5)
y: Int <- g // returns Future(3)
} yield x + y
}
You on the other hand try to do this:
def foo(): Unit = {
val f = Future { 5 }
val g = Future { 3 }
val h = for {
x: Int <- f // returns Future(5)
y: Int <- g // returns Future(3)
} {
println("whatever") // <<<<<<<<<
} yield x + y
}
The extra block of code that I point is what causing the compilation error which you did not add in your scala repl example.
This is how you can print within a Future:
def foo(): Unit = {
val f = Future {
println("5")
5
}
val g = Future {
println("3")
3
}
val h = for {
x: Int <- f // returns Future(5)
y: Int <- g // returns Future(3)
} yield x + y
}
Error:(239, 17) ';' expected but 'yield' found.
simply means that the for loop definition is wrong
So either with yield
for{loginInfo <- loginInfoFuture
//other conditions and statements
} yield //value to be returned
or without yield
for(loginInfo <- loginInfoFuture){
//value updated
}
are correct for loop definitions
I had a simple task to find combination which occurs most often when we drop 4 cubic dices an remove one with least points.
So, the question is: are there any Scala core classes to generate streams of cartesian products in Scala? When not - how to implement it in the most simple and effective way?
Here is the code and comparison with naive implementation in Scala:
object D extends App {
def dropLowest(a: List[Int]) = {
a diff List(a.min)
}
def cartesian(to: Int, times: Int): Stream[List[Int]] = {
def stream(x: List[Int]): Stream[List[Int]] = {
if (hasNext(x)) x #:: stream(next(x)) else Stream(x)
}
def hasNext(x: List[Int]) = x.exists(n => n < to)
def next(x: List[Int]) = {
def add(current: List[Int]): List[Int] = {
if (current.head == to) 1 :: add(current.tail) else current.head + 1 :: current.tail // here is a possible bug when we get maximal value, don't reuse this method
}
add(x.reverse).reverse
}
stream(Range(0, times).map(t => 1).toList)
}
def getResult(list: Stream[List[Int]]) = {
list.map(t => dropLowest(t).sum).groupBy(t => t).map(t => (t._1, t._2.size)).toMap
}
val list1 = cartesian(6, 4)
val list = for (i <- Range(1, 7); j <- Range(1,7); k <- Range(1, 7); l <- Range(1, 7)) yield List(i, j, k, l)
println(getResult(list1))
println(getResult(list.toStream) equals getResult(list1))
}
Thanks in advance
I think you can simplify your code by using flatMap :
val stream = (1 to 6).toStream
def cartesian(times: Int): Stream[Seq[Int]] = {
if (times == 0) {
Stream(Seq())
} else {
stream.flatMap { i => cartesian(times - 1).map(i +: _) }
}
}
Maybe a little bit more efficient (memory-wise) would be using Iterators instead:
val pool = (1 to 6)
def cartesian(times: Int): Iterator[Seq[Int]] = {
if (times == 0) {
Iterator(Seq())
} else {
pool.iterator.flatMap { i => cartesian(times - 1).map(i +: _) }
}
}
or even more concise by replacing the recursive calls by a fold :
def cartesian[A](list: Seq[Seq[A]]): Iterator[Seq[A]] =
list.foldLeft(Iterator(Seq[A]())) {
case (acc, l) => acc.flatMap(i => l.map(_ +: i))
}
and then:
cartesian(Seq.fill(4)(1 to 6)).map(dropLowest).toSeq.groupBy(i => i.sorted).mapValues(_.size).toSeq.sortBy(_._2).foreach(println)
(Note that you cannot use groupBy on Iterators, so Streams or even Lists are the way to go whatever to be; above code still valid since toSeq on an Iterator actually returns a lazy Stream).
If you are considering stats on the sums of dice instead of combinations, you can update the dropLowest fonction :
def dropLowest(l: Seq[Int]) = l.sum - l.min
I implemented a simple method to generate Cartesian product on several Seqs like this:
object RichSeq {
implicit def toRichSeq[T](s: Seq[T]) = new RichSeq[T](s)
}
class RichSeq[T](s: Seq[T]) {
import RichSeq._
def cartesian(ss: Seq[Seq[T]]): Seq[Seq[T]] = {
ss.toList match {
case Nil => Seq(s)
case s2 :: Nil => {
for (e <- s) yield s2.map(e2 => Seq(e, e2))
}.flatten
case s2 :: tail => {
for (e <- s) yield s2.cartesian(tail).map(seq => e +: seq)
}.flatten
}
}
}
Obviously, this one is really slow, as it calculates the whole product at once. Did anyone implement a lazy solution for this problem in Scala?
UPD
OK, So I implemented a reeeeally stupid, but working version of an iterator over a Cartesian product. Posting here for future enthusiasts:
object RichSeq {
implicit def toRichSeq[T](s: Seq[T]) = new RichSeq(s)
}
class RichSeq[T](s: Seq[T]) {
def lazyCartesian(ss: Seq[Seq[T]]): Iterator[Seq[T]] = new Iterator[Seq[T]] {
private[this] val seqs = s +: ss
private[this] var indexes = Array.fill(seqs.length)(0)
private[this] val counts = Vector(seqs.map(_.length - 1): _*)
private[this] var current = 0
def next(): Seq[T] = {
val buffer = ArrayBuffer.empty[T]
if (current != 0) {
throw new NoSuchElementException("no more elements to traverse")
}
val newIndexes = ArrayBuffer.empty[Int]
var inside = 0
for ((index, i) <- indexes.zipWithIndex) {
buffer.append(seqs(i)(index))
newIndexes.append(index)
if ((0 to i).forall(ind => newIndexes(ind) == counts(ind))) {
inside = inside + 1
}
}
current = inside
if (current < seqs.length) {
for (i <- (0 to current).reverse) {
if ((0 to i).forall(ind => newIndexes(ind) == counts(ind))) {
newIndexes(i) = 0
} else if (newIndexes(i) < counts(i)) {
newIndexes(i) = newIndexes(i) + 1
}
}
current = 0
indexes = newIndexes.toArray
}
buffer.result()
}
def hasNext: Boolean = current != seqs.length
}
}
Here's my solution to the given problem. Note that the laziness is simply caused by using .view on the "root collection" of the used for comprehension.
scala> def combine[A](xs: Traversable[Traversable[A]]): Seq[Seq[A]] =
| xs.foldLeft(Seq(Seq.empty[A])){
| (x, y) => for (a <- x.view; b <- y) yield a :+ b }
combine: [A](xs: Traversable[Traversable[A]])Seq[Seq[A]]
scala> combine(Set(Set("a","b","c"), Set("1","2"), Set("S","T"))) foreach (println(_))
List(a, 1, S)
List(a, 1, T)
List(a, 2, S)
List(a, 2, T)
List(b, 1, S)
List(b, 1, T)
List(b, 2, S)
List(b, 2, T)
List(c, 1, S)
List(c, 1, T)
List(c, 2, S)
List(c, 2, T)
To obtain this, I started from the function combine defined in https://stackoverflow.com/a/4515071/53974, passing it the function (a, b) => (a, b). However, that didn't quite work directly, since that code expects a function of type (A, A) => A. So I just adapted the code a bit.
These might be a starting point:
Cartesian product of two lists
Expand a Set[Set[String]] into Cartesian Product in Scala
https://stackoverflow.com/questions/6182126/im-learning-scala-would-it-be-possible-to-get-a-little-code-review-and-mentori
What about:
def cartesian[A](list: List[Seq[A]]): Iterator[Seq[A]] = {
if (list.isEmpty) {
Iterator(Seq())
} else {
list.head.iterator.flatMap { i => cartesian(list.tail).map(i +: _) }
}
}
Simple and lazy ;)
def cartesian[A](list: List[List[A]]): List[List[A]] = {
list match {
case Nil => List(List())
case h :: t => h.flatMap(i => cartesian(t).map(i :: _))
}
}
You can look here: https://stackoverflow.com/a/8318364/312172 how to translate a number into an index of all possible values, without generating every element.
This technique can be used to implement a stream.