Akka Streams only processing one Collection - scala

I have the following situation, I have bunch of directories which have bunch of files. I'm processing them using AKKA but for some reasons only the last sequence is processed, Here is the code of the method I have, let me know if you see something wrong
def read(): Unit = {
implicit val system = ActorSystem("LiveS3Parser")
implicit val materializer = ActorMaterializer()
val reader = new LiveSequenceFileReader(conf.getString("s3url"))
val dateList = generateDates(conf.getString("startDate"), conf.getString("endDate"))
reader.readAllFilesFromPath(conf.getString("s3url"))
val seqElements = generateURLS(dateList, conf).via(readDataFromS3(reader)).via(parseJsonSeq())
val sinkseq = Sink.fold(0)(persistDataSeq)
val dataCounter = seqElements.toMat(sinkseq)(Keep.right)
val sum: Future[Int] = dataCounter.run()
sum.andThen({
case _ =>
sum.foreach(c => println(s"Total records Loaded: $c"))
})
Await.result(sum,Duration.Inf)
}
def generateURLS(data: Seq[Long], conf: Config): Source[String, NotUsed] = {
val s3URL = conf.getString("s3url")
val dataWithURLs = data.map(x => s3URL.concat("dt=").concat(DateUtils.formatDate(new Date(x), "yyyy-MM-dd")))
Source(dataWithURLs.to[scala.collection.immutable.Seq])
}
def readDataFromS3(lv: LiveSequenceFileReader)(implicit ec: ExecutionContext): Flow[String, Seq[KeyValue], NotUsed] = {
Flow[String].mapAsyncUnordered(Runtime.getRuntime().availableProcessors())(url => Future(readFiles(url, lv)))
}
def parseJsonSeq()(implicit ec: ExecutionContext): Flow[Seq[KeyValue], Seq[Try[OptimizedSearchQueryEventMessage]], NotUsed] = {
Flow[Seq[KeyValue]].mapAsyncUnordered(Runtime.getRuntime().availableProcessors())(line => Future(parseAllItems(line)))
}
def readFiles(url: String, lv: LiveSequenceFileReader): Seq[KeyValue] = {
println("Reading Files from " + url)
val files = lv.readAllFilesFromPath(url)
println("Records to process" + files.size())
files
}
def parseAllItems(seq: Seq[KeyValue]) = {
seq.map(kv => parseItem(kv.getValue))
}
def parseItem(data: String): Try[OptimizedSearchQueryEventMessage] = {
val retVal = Try(mapper.readValue(data, classOf[OptimizedSearchQueryEventMessage]))
retVal
}
def generateDates(startingDate: String, endDate: String): Seq[Long] = {
val fmt = new SimpleDateFormat("yyyy-MM-dd")
val startDate = fmt.parse(startingDate).getTime
val endingDate = fmt.parse(endDate).getTime
val list = for (currentDate <- startDate to endingDate by TimeUnit.DAYS.toMillis(1)) yield currentDate
list
}

Related

how to delete a java file created from akka streams

I am using akka streams and am following tutorial. The code is trying to delete the created file but it's not getting deleted. I was unable to know which resource is left opened. file.delete() is returning false. the file has the permission of read, write and execute
Here is the example code
final case class FileField(fieldName: String, fileNameF: FileInfo ⇒ File)
final case class PartsAndFiles(form: immutable.Map[String, List[String]], files: immutable.Seq[(FileInfo, File)]) {
final def addForm(fieldName: String, content: String): PartsAndFiles = this.copy(
form = {
val existingContent: List[String] = this.form.getOrElse(fieldName, List.empty)
val newContents: List[String] = content :: existingContent
this.form + (fieldName -> newContents)
}
)
final def addFile(info: FileInfo, file: File): PartsAndFiles = this.copy(
files = this.files :+ ((info, file))
)
}
object PartsAndFiles {
val Empty = PartsAndFiles(immutable.Map.empty, immutable.Seq.empty)
}
def formAndFiles(
fileFields: immutable.Seq[FileField]
): Directive1[PartsAndFiles] = entity(as[Multipart.FormData]).flatMap { formData ⇒
extractRequestContext.flatMap { ctx ⇒
implicit val mat = ctx.materializer
implicit val ec = ctx.executionContext
val uploadingSink =
Sink.foldAsync[PartsAndFiles, Multipart.FormData.BodyPart](PartsAndFiles.Empty) {
(acc, part) ⇒
def discard(p: Multipart.FormData.BodyPart): Future[PartsAndFiles] = {
p.entity.discardBytes()
Future.successful(acc)
}
part.filename.map { fileName ⇒
fileFields.find(_.fieldName == part.name)
.map {
case FileField(fieldName, destFn) ⇒
val fileInfo = FileInfo(part.name, fileName, part.entity.contentType)
val dest = destFn(fileInfo)
part.entity.dataBytes.runWith(FileIO.toPath(dest.toPath)).map { _ ⇒
acc.addFile(fileInfo, dest)
}
}.getOrElse(discard(part))
} getOrElse {
part.entity match {
case HttpEntity.Strict(ct, data) if ct.isInstanceOf[ContentType.NonBinary] ⇒
val charsetName = ct.asInstanceOf[ContentType.NonBinary].charset.nioCharset.name
val partContent = data.decodeString(charsetName)
Future.successful(acc.addForm(part.name, partContent))
case _ ⇒
discard(part)
}
}
}
val uploadedF = formData.parts.runWith(uploadingSink)
onSuccess(uploadedF)
}
}
val fileFields = scala.collection.immutable.Seq(FileField("picture", _ => new File("/tmp/uploaded")))
val routes: Route = formAndFiles(fileFields) {
case PartsAndFiles(fields, files) =>
files.foreach(_._2.delete())
val body = s"""
|File: ${files.head._2.getAbsolutePath}
|Form: ${fields}
""".stripMargin
complete(OK, body)
}
here
files.foreach(_._2.delete())
is returning false
files.foreach(_._2.isExists)
is returning true
i want to delete this file how can i do it ?

Scala3 macro summon typeclass instance of a TypeTree (no type arg)

trait Show[T] {
def show(t: T): String
}
Give such Show typeclass, I want to generate show for case class like
def caseClassShow[A](using Type[A], Quotes): Expr[Show[A]] = {
import quotes.reflect._
def shows(caseClassExpr: Expr[A]): Expr[String] = {
val caseClassTerm = caseClassExpr.asTerm
val parts = TypeRepr.of[A].typeSymbol.caseFields.collect {
case cf if cf.isValDef =>
val valDefTree = cf.tree.asInstanceOf[ValDef]
val valType = valDefTree.tpt
val showCtor = TypeTree.of[Show[_]]
val valShowType = Applied(showCtor, List(valType))
val showInstance = Expr.summon[valShowType] // compile error, how to summon the instance here
val valuePart = Apply(Select.unique(showInstance, "show"), List(Select(caseClassTerm, cf)))
'{s"${Expr(cf.name)}:${valuePart}"}
}
val strParts = Expr.ofList(parts)
'{$strParts.mkString(",")}
}
'{
new Show[A] {
def show(a: A) = {
${shows('{a})}
}
}
}
}
But the showInstance part won't compile, so how to summon an implicit Show[X] here ?
Implicits.search
can be used to summon implicit instance if there is no type arg avaiable for Expr.summon
val valDefTree = cf.tree.asInstanceOf[ValDef]
val valType = valDefTree.tpt
val showCtor = TypeRepr.typeConstructorOf(classOf[Show[_]])
val valShowType = showCtor.appliedTo(valType.tpe)
Implicits.search(valShowType) match {
case si: ImplicitSearchSuccess =>
val siExpr: Expr[Show[Any]] = si.tree.asExpr.asInstanceOf[Expr[Show[Any]]]
val valueExpr = Select(caseClassTerm, cf).asExpr
'{$siExpr.show($valueExpr)}
}

Function never reached in Akka Streams Source mapping

Given the following code:
class MigrationHandler #Inject()(database: Database, doUpdate: UpdateHandler)
(implicit #PropagateContext executor: ExecutionContext, actorSystem: ActorSystem)
extends Handler with Logging {
implicit val materializer: ActorMaterializer = ActorMaterializer()
val buttonTypeId = "someId1"
val promotionButtonTypeId = "someId2"
val typeId3 = "someId3"
val typeId4 = "someId4"
val typeIds = Seq[String](buttonTypeId, promotionButtonTypeId, typeId3, typeId4)
def apply(requestHeaders: Headers): Seq[Future[Done]] = {
for {
typeId <- typeIds
result = database.allByType(typeId)
.map(contents => contents.map(content => migrate(content, typeId))
.map(migratedContent => doUpdate(ContentId(migratedContent.id),
NewContent(migratedContent.raw, isDefaultContent = false), requestHeaders))
.runForeach(_ => ())).flatten
} yield result
}
def migrate(content: SomeContent, typeId: String): SomeContent = {
logger.info(s"$content with type $typeId")
content
}}
Future[Source[SomeContent, _]] is returned by the database.allByType(typeId)
In unit test where I am mocking database.allByType(typeId) to return Source.single(SomeContent()), I saw that I wasn't able reach the migrate function. Any idea what could be the problem here?

akka streams stops after parallelism

I tried to build a small PDF parser with akka-streams (and limited understandings of it yet) and Apache's pdfbox.
One thing which I don't really get, the stream stops exactly after the given number of parallelism which is given into the mapAsync.
so if a PDF-doc has 20 pages and the parallelism is set to 5, the first 5 pages get processed and the rest is ignored, if set to 20, everything is done fine. Anybody an idea what I'm doing wrong?
class PdfParser(ws: WSClient, conf: Configuration, parallelism: Int) {
implicit val system = ActorSystem("image-parser")
implicit val materializer = ActorMaterializer()
def documentPages(doc: PDDocument, key: String): Iterator[Page] = {
val pages: util.List[_] = doc.getDocumentCatalog.getAllPages
val pageList = (for {
i ← 0 until pages.size()
page = pages.get(i)
} yield Page(page, s"$key-$i.jpg")).toIterator
pageList
}
val pageToImage: Flow[Page, Image, NotUsed] = Flow[Page].map { p ⇒
val img = p.content.asInstanceOf[PDPage].convertToImage()
Image(img, p.name)
}
val imageToS3: Flow[Image, String, NotUsed] = Flow[Image].mapAsync(parallelism) { i ⇒
val s3 = S3.fromConfiguration(ws, conf)
val bucket = s3.getBucket("elsa-essays")
val baos = new ByteArrayOutputStream()
ImageIO.write(i.content, "jpg", baos)
val res = bucket add BucketFile(i.name, "image/jpeg", baos.toByteArray)
res.map { _ ⇒
"uploaded"
}.recover {
case e: S3Exception ⇒ e.message
}
}
val sink: Sink[String, Future[String]] = Sink.head[String]
def parse(path: Path, key: String): Future[String] = {
val stream: InputStream = new FileInputStream(path.toString)
val doc = PDDocument.load(stream)
val source = Source.fromIterator(() ⇒ documentPages(doc, key))
val runnable: RunnableGraph[Future[String]] = source.via(pageToImage).via(imageToS3).toMat(sink)(Keep.right)
val res = runnable.run()
res.map { s ⇒
doc.close()
stream.close()
s
}
}
}
The problem is in your Sink. That Sink.head will return one element from your materialized Stream. So the question is, why it´s received more than one value when mapAsync(>1) is used in stream materialization?. Maybe it´s because it uses more than one actor pushing values downstream.
In any case, change your sink to something like:
val sink: Sink[String, Future[String]] = Sink.fold("")((a, b) => b ++ a)
and it will work.

How to get a name of a class member?

I want to be able to do something like this:
prepare form:
val formDescription = formBuilder(_.textField[User](_.firstName)
.textField[User](_.lastName)
).build
showForm(formDescription)
extract data from user filled form, using User:
//contains data of a form submitted by a user:
val formData: Map[String, String] = getFormData
val newUser = User(id = randomUuid, firstName = formData.extract[User](_.firstName))
One solution I see is to use a dynamic proxy that extends provided class and remembers what was invoked on him:
def getFieldName[T:Manifest](foo: T => Any) = {
val clazz = implicitly[Manifest[T]].erasure
val proxy = createDynamicProxy(clazz)
foo(proxy)
proxy.lastInvokedMethodName
}
Is there a better way to do it? Is there any lib that implements it already?
This reflective approach takes a case class and invokes its companion apply, calling getField and fetching default args if the field is not in the data.
import scala.reflect.runtime.{currentMirror => cm, universe => uni}
import uni._
def fromXML(xml: Node): Option[PluginDescription] = {
def extract[A]()(implicit tt: TypeTag[A]): Option[A] = {
// extract one field
def getField(field: String): Option[String] = {
val text = (xml \\ field).text.trim
if (text == "") None else Some(text)
}
val apply = uni.newTermName("apply")
val module = uni.typeOf[A].typeSymbol.companionSymbol.asModule
val ts = module.moduleClass.typeSignature
val m = (ts member apply).asMethod
val im = cm reflect (cm reflectModule module).instance
val mm = im reflectMethod m
def getDefault(i: Int): Option[Any] = {
val n = uni.newTermName("apply$default$" + (i+1))
val m = ts member n
if (m == NoSymbol) None
else Some((im reflectMethod m.asMethod)())
}
def extractArgs(pss: List[List[Symbol]]): List[Option[Any]] =
pss.flatten.zipWithIndex map (p => getField(p._1.name.encoded) orElse getDefault(p._2))
val args = extractArgs(m.paramss)
if (args exists (!_.isDefined)) None
else Some(mm(args.flatten: _*).asInstanceOf[A])
}
// check the top-level tag
xml match {
case <plugin>{_*}</plugin> => extract[PluginDescription]()
case _ => None
}
}
The idea was to do something like:
case class User(id: Int = randomUuid, firstName: String, lastName: String)
val user = extract[User]()
That's my own solution:
package utils
import javassist.util.proxy.{MethodHandler, MethodFilter, ProxyFactory}
import org.specs2.mutable._
import javassist.util.proxy.Proxy
import java.lang.reflect.{Constructor, Method}
class DynamicProxyTest extends Specification with MemberNameGetter {
"Dynamic proxy" should {
"extract field name" in {
memberName[TestClass](_.a) must ===("a")
memberName[TestClass](_.i) must ===("i")
memberName[TestClass](_.b) must ===("b")
memberName[TestClass](_.variable) must ===("variable")
memberName[TestClass](_.value) must ===("value")
memberName[TestClass](_.method) must ===("method")
}
}
}
trait MemberNameGetter {
def memberName[T: Manifest](foo: T => Any) = {
val mf = manifest[T]
val clazz = mf.erasure
val proxyFactory = new ProxyFactory
proxyFactory.setSuperclass(clazz)
proxyFactory.setFilter(new MethodFilter {
def isHandled(p1: Method) = true
})
val newClass = proxyFactory.createClass()
var lastInvokedMethod: String = null
val mh = new MethodHandler {
def invoke(p1: Any, p2: Method, p3: Method, p4: Array[AnyRef]) = {
lastInvokedMethod = p2.getName
p3.invoke(p1, p4: _*)
}
}
val constructor = defaultConstructor(newClass)
val parameters = defaultConstructorParameters(constructor)
// val proxy = constructor.newInstance("dsf", new Integer(0))
val proxy2 = constructor.newInstance(parameters: _*)
proxy2.asInstanceOf[Proxy].setHandler(mh)
foo(proxy2.asInstanceOf[T])
lastInvokedMethod
}
private def defaultConstructor(c: Class[_]) = c.getConstructors.head
private def defaultConstructorParameters(constructor: Constructor[_]) = {
val parameterTypes = constructor.getParameterTypes
parameterTypes.map{
case Integer.TYPE => Integer.valueOf(0)
case _ => null
}
}
}
case class TestClass(a: String, i: Int, b: Boolean) {
var variable = "asdf"
val value = "asdfasdfasd"
def method = "method"
}
val mh = new MethodHandler {
def invoke(p1: Any, p2: Method, p3: Method, p4: Array[AnyRef]) = {
lastInvokedMethod = p2.getName
p3.invoke(p1, p4: _*)
}
}
val constructor = defaultConstructor(newClass)
val parameters = defaultConstructorParameters(constructor)
// val proxy = constructor.newInstance("dsf", new Integer(0))
val proxy2 = constructor.newInstance(parameters: _*)
proxy2.asInstanceOf[Proxy].setHandler(mh)
foo(proxy2.asInstanceOf[T])
lastInvokedMethod
}
private def defaultConstructor(c: Class[_]) = c.getConstructors.head
private def defaultConstructorParameters(constructor: Constructor[_]) = {
val parameterTypes = constructor.getParameterTypes
parameterTypes.map{
case Integer.TYPE => Integer.valueOf(0)
case java.lang.Double.TYPE => java.lang.Double.valueOf(0)
case java.lang.Long.TYPE => java.lang.Long.valueOf(0)
case java.lang.Boolean.TYPE => java.lang.Boolean.FALSE
case _ => null
}
}
}
case class TestClass(a: String, i: Int, b: Boolean) {
var variable = "asdf"
val value = "asdfasdfasd"
def method = "method"
}