I am trying to read a text file to compare 2 files. I have written the code to read the first file and I am expecting readFileStream function to give me Collections of String but I am getting only String.
Could you see where I have made wrong?
import java.io.{BufferedReader, FileInputStream, InputStreamReader}
import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import scala.util.{Failure, Success, Try}
object TestCompareHDFSFiles {
def main(args: Array[String]): Unit = {
val hdfs = FileSystem.get(new Configuration())
val path1 = new Path(args(0))
val path2 = new Path(args(1))
readHDFSFile(hdfs, path1, path2)
}
// Accept a parameter which implements a close method
def using[A <: { def close(): Unit }, B](resource: A)(f: A => B): B =
try {
f(resource)
} finally {
resource.close()
}
def readHDFSFile(hdfs: FileSystem, path1: Path, path2: Path): Option[Stream[(String,String)]] = {
Try(using(new BufferedReader(new InputStreamReader(hdfs.open(path1))))(readFileStream))
} match {
case Success(result) => {
}
case Failure(ex) => {
println(s"Could not read file $path1, detail ${ex.getClass.getName}:${ex.getMessage}")
None
}
}
def readFileStream(br: BufferedReader)= {
for {
line <- Try(br.readLine())
if (line != null )
} yield line
}
}
I am struck here. Any help please.
Thanks,
Related
Im trying to rewrite a httpclient through the java11 HttpClient in Scala
Here is my code:
import cats.effect._
import java.net.http._
import java.net.http.HttpResponse._
import java.net.http.HttpClient._
trait HttpClients[F[_]] {
def send(req: HttpRequest)(implicit F: Async[F]): F[HttpResponse[_]]
}
object HttpClients {
val client: HttpClient = HttpClient.newBuilder().followRedirects(Redirect.ALWAYS).build()
def newClient[F[_] : Async](): HttpClients[F] = new HttpClients[F] {
override def send(req: HttpRequest)(implicit F: Async[F]): F[HttpResponse[_]] = F.async { cb =>
val resp = client.sendAsync(req, BodyHandlers.ofString())
val s = resp.handle((res: HttpResponse[String], err: Throwable) => {
if (err == null)
cb(Right(res))
else
cb(Left(err))
})
s // TODO ?
// Type missmatch
// Required: F[Option[F[Unit]]]
// Found: Unit
}
}
}
the handle callback from this
I guess the error comes from here, but I don't know how to write next.
Then I make some change:
def newClient[F[_] : Async](): HttpClients[F] = new HttpClients[F] {
override def send(req: HttpRequest)(implicit F: Async[F]): F[HttpResponse[_]] = F.async[HttpResponse[_]] { cb =>
val s = Sync[F](F: Async[F]).delay {
val resp = client.sendAsync(req, BodyHandlers.ofString())
resp.handle((res: HttpResponse[String], err: Throwable) => {
if (err == null)
cb(Right(res))
else
cb(Left(err))
}).join()
}
F.delay(s.some)
}
}
This time, there is no error, but I don't know how to get the response's body
Thanks for your reply!
#OlegPyzhcov already provided insight in case you are using CE3, this answer is using CE2 in case that is what you wanted.
The first version of the code was correct, here is a full running example using Ammonite with some style improvements and ensuring a new client is created for each call and evaluation of newClient
// scala 2.13.5
import $ivy.`org.typelevel::cats-effect:2.5.0`
import cats.effect.{Async, IO}
import cats.syntax.all._
import java.net.URI
import java.net.http.{HttpClient, HttpRequest, HttpResponse}
trait HttpClients[F[_]] {
def send(req: HttpRequest): F[HttpResponse[String]]
}
object HttpClients {
def newClient[F[_]](implicit F: Async[F]): F[HttpClients[F]] =
F.delay {
HttpClient
.newBuilder
.followRedirects(HttpClient.Redirect.ALWAYS)
.build()
} map { client =>
new HttpClients[F] {
override def send(req: HttpRequest): F[HttpResponse[String]] =
F.async { cb =>
client.sendAsync(req, HttpResponse.BodyHandlers.ofString).handle {
(res: HttpResponse[String], err: Throwable) =>
if (err == null) cb(Right(res))
else cb(Left(err))
}
}
}
}
}
object Main {
private val request =
HttpRequest
.newBuilder
.GET
.uri(URI.create("https://stackoverflow.com/questions/tagged/scala?tab=Newest"))
.build()
private val program = for {
_ <- IO.delay(println("Hello, World!"))
client <- HttpClients.newClient[IO]
response <- client.send(request)
_ <- IO.delay(println(response))
_ <- IO.delay(println(response.body))
} yield ()
def run(): Unit = {
program.unsafeRunSync()
}
}
#main
def main(): Unit = {
Main.run()
}
I am trying to execute this code on databricks in scala. Everything is in an object, then I have a case class and def main and other def functions.
Trying to work with "package cells" but I got Warning: classes defined within packages cannot be redefined without a cluster restart.
Compilation successful.
removing the object didn't work either
package x.y.z
import java.util.Date
import java.io.File
import java.io.PrintWriter
import org.apache.hadoop.fs.{FileSystem, Path}
object Meter {
val dateFormat = new SimpleDateFormat("yyyyMMdd")
case class Forc (cust: String, Num: String, date: String, results: Double)
def main(args: Array[String]): Unit = {
val inputFile = "sv" //
val outputFile = "ssv" //
val fileSystem = getFileSystem(inputFile)
val inputData = readLines(fileSystem, inputFile, skipHeader = true).toSeq
val filtinp = inputData.filter(x => x.nonEmpty)
.map(x => Results(x(6), x(5), x(0), x(8).toDouble))
def getTimestamp(date: String): Long = dateFormat.parse(date).getTime
def getDate(timeStampInMills: Long): String = {
val time = new Date(timeStampInMills)
dateFormat.format(time)
}
def getFileSystem(path: String): FileSystem = {
val hconf = new Configuration()
new Path(path).getFileSystem(hconf)
}
override def next(): String = {
val result = line
line = inputData.readLine()
if (line == null) {
inputData.close()
}
result
}
}
}
}
I am trying to setup tests for a scala compiler-plugin I am developping.
According to this similar question, it is possible to invoke the plugin programmatically. However, when I tried to do this the program could not find my plugin.
Error:(34, 25) not found: type GetFileFromAnnotation
for (phase <- new GetFileFromAnnotation(this).components)
Both files are in the same package.
Here is the code which is trying to test the plugin (pulled from the question above), the commented out code is an alternative to the code above it and throws the same error:
package compilerPlugin
import scala.reflect.internal.util.BatchSourceFile
import scala.tools.nsc.io.VirtualDirectory
import scala.tools.nsc.reporters.ConsoleReporter
import scala.tools.nsc.{Global, Settings}
object AnnotationFinderTest extends App {
// prepare the code you want to compile
val code =
"""
|class Typestate(filename:String) extends scala.annotation.StaticAnnotation
|
|#Typestate(filename = "MyProtocol.txt")
|class Cat{
| def comeAlive(): Unit = println("The cat is alive")
|}
|
|object Main extends App {
| val cat = new Cat()
| cat.comeAlive()
|}""".stripMargin
val sources = List(new BatchSourceFile("<test>", code))
println("sources "+sources)
val settings = new Settings
settings.usejavacp.value = true
settings.outputDirs.setSingleOutput(new VirtualDirectory("(memory)", None))
val compiler = new Global(settings, new ConsoleReporter(settings)) {
override protected def computeInternalPhases () {
super.computeInternalPhases
for (phase <- new GetFileFromAnnotation(this).components)
phasesSet += phase
}
}
new compiler.Run() compileSources (sources)
}
The code for the GetFileFromAnnotation plugin is:
package compilerPlugin
import java.io.{FileNotFoundException, IOException}
import scala.tools.nsc
import nsc.Global
import nsc.Phase
import nsc.plugins.Plugin
import nsc.plugins.PluginComponent
import scala.io.BufferedSource
import scala.io.Source._
class GetFileFromAnnotation(val global: Global) extends Plugin {
import global._
val name = "GetFileFromAnnotation"
val description = "gets file from typestate annotation"
val components: List[PluginComponent] = List[PluginComponent](Component)
private object Component extends PluginComponent {
val global: GetFileFromAnnotation.this.global.type = GetFileFromAnnotation.this.global
val runsAfter: List[String] = List[String]("parser")
val phaseName: String = GetFileFromAnnotation.this.name
def newPhase(_prev: Phase) = new GetFileFromAnnotationPhase(_prev)
class GetFileFromAnnotationPhase(prev: Phase) extends StdPhase(prev) {
override def name: String = GetFileFromAnnotation.this.name
def printFile(filename: String): Unit ={
val source = fromFile(filename)
try {
val it = source.getLines()
while (it.hasNext)
println(it.next())
}
catch{
case e: IOException => println(s"Had an IOException trying to use file $filename")
} finally {
source.close
}
}
def getFilenameFromAnnotation(annotation: Apply): Option[String] ={
annotation match{
case Apply(Select(New(Ident(TypeName("Typestate"))), con),List(NamedArg(Ident(TermName("filename")), Literal(Constant(filename))))) => Some(filename.toString)
case Apply(Select(New(Ident(TypeName("Typestate"))), con),List(Literal(Constant(filename)))) => Some(filename.toString)
case _ => None
}
}
def apply(unit: CompilationUnit): Unit = {
for (tree#q"$mods class $tpname[..$tparams] $ctorMods(...$paramss) extends { ..$earlydefns } with ..$parents { $self => ..$stats }" <- unit.body) {
val annotations = mods.annotations
for(annotation#Apply(arg1,arg2) <- annotations){
getFilenameFromAnnotation(annotation) match{
case Some(filename) => printFile(filename)
case None => println("Not a Typestate annotation")
}
}
}
}
}
}
}
Is it even feasible to use this method to test a plugin or is the only way to do it to use mocking? Is there another way of doing this entirely?
UPDATE:
Thanks for the comment and the github example.
It ended up working fine with the edited test code above and prints out the protocol file as expected. I am not sure why it wasn't working before.
I have publisher with stream of messages. And i should send for consumers only messages which are based on consumer subsribers.
In my local is working for 2 tread without load, but on load testing is not working, many messages goes in not right way.
I have 2 versions of problem:
I new in akka stream and don't see in doc a little bit same case, joining 2 streams
I use not in right way scala tread safe options
What it can be?
import akka._
import akka.http.scaladsl.model.http2.PeerClosedStreamException
import akka.stream._
import akka.stream.scaladsl._
import scala.collection.mutable
import scala.concurrent.Future
import scala.util.matching.Regex
class RtkBrokerImpl(materializer: Materializer) extends MessageBroker {
private implicit val mat: Materializer = materializer
val mutableArray = mutable.Map[String, Source[ConsumeRequest, NotUsed]]()
val (inboundHub: Sink[ProduceRequest, NotUsed], outboundHub: Source[ConsumeResponse, NotUsed]) =
MergeHub.source[ProduceRequest]
.async
.mapAsyncUnordered(100)(x => Future.successful(ConsumeResponse(x.key, x.payload)))
.toMat(BroadcastHub.sink)(Keep.both)
.run()
val all: RunnableGraph[Source[ProduceRequest, NotUsed]] =
MergeHub.source[ProduceRequest]
.toMat(BroadcastHub.sink)(Keep.right)
val queue = Sink.queue[ProduceRequest](100)
override def produce(in: Source[ProduceRequest, NotUsed]): Future[ProduceResponse] = {
in.to(inboundHub).run()
Future.never
}
override def consume(in: Source[ConsumeRequest, NotUsed]): Source[ConsumeResponse, NotUsed] = {
val patternRepo = new PatternsRepository
in.runForeach { req =>
req.action match {
case ConsumeRequest.Action.SUBSCRIBE => patternRepo.putPatterns(req.keys)
case ConsumeRequest.Action.UNSUBSCRIBE => patternRepo.dropPatterns(req.keys)
}
}
outboundHub.async.filter(res => patternRepo.checkKey(res.key))
}
}
class PatternsRepository {
import RtkBrokerImpl._
import scala.collection.JavaConverters._
val concurrentSet: mutable.Set[String] = java.util.concurrent.ConcurrentHashMap.newKeySet[String]().asScala
def getPatterns(): mutable.Set[String] = {
concurrentSet
}
def checkKey(key: String): Boolean = {
concurrentSet.contains(key) ||
concurrentSet.exists(x => isInPattern(key, x))
}
def dropPatterns(string: Seq[String]) = {
string.foreach(concurrentSet.remove)
}
def putPatterns(string: Seq[String]) = {
concurrentSet.addAll(string)
}
}
object RtkBrokerImpl {
def isInPattern(key: String, pattern: String): Boolean = {
if (pattern.exists(x => x == '#' || x == '*')) {
val splittedPatten = pattern.split(".")
val splittedKey = key.split(".")
if (!splittedPatten.contains("#")) {
if (splittedKey.size != splittedKey.size)
return false
splittedPatten.zip(splittedKey)
.forall { case (key, pat) => if (pat == "*") true else key == pat }
} else {
val regExp = pattern.replaceAll(".", "\\.")
.replaceAll("\\*", "[A-Za-z0-9]+")
.replaceAll("#", "\\S+")
new Regex(regExp).matches(key)
}
} else {
key == pattern
}
}
}
I have a CSV file that I need to parse and do some action on every record. How do I use Free Monads with it? Currently, I'm loading the entire file into memory and would like to know if there is any better solution. Below is my program:
for {
reader <- F.getReader("my_file.csv")
csvRecords <- C.readCSV(reader)
_ <- I.processCSV(csvRecords)
_ <- F.close(reader)
} yield()
This code works for smaller files, but if I have very large files (over 1 GB), this wouldn't work very well. I'm using Commons CSV for reading the CSVRecords.
Looking into the code at your gist I think that the line with the comment is exactly the line you don't want at all:
object CSVIOInterpreter extends (CSVIO ~> Future) {
import scala.collection.JavaConverters._
override def apply[A](fa: CSVIO[A]): Future[A] = fa match {
case ReadCSV(reader) => Future.fromTry(Try {
CSVFormat.RFC4180
.withFirstRecordAsHeader()
.parse(reader)
.getRecords // Loads the complete file
.iterator().asScala.toStream
})
}
}
Just remove the whole getRecords line. CSVFormat.parse returns an instance of CSVParser which already implements Iterable<CSVRecord>. And the getRecords call is the only thing that force it to read the whole file.
Actually you can see CSVParser.getRecords implementation and it is
public List<CSVRecord> getRecords() throws IOException {
CSVRecord rec;
final List<CSVRecord> records = new ArrayList<>();
while ((rec = this.nextRecord()) != null) {
records.add(rec);
}
return records;
}
So it just materializes the whole file using this.nextRecord call which is obviously a more "core" part of the API.
So when I do a simplified version of your code without the getRecords call:
import cats._
import cats.free.Free
import java.io._
import org.apache.commons.csv._
import scala.collection.JavaConverters._
trait Action[A] {
def run(): A
}
object F {
import Free.liftF
case class GetReader(fileName: String) extends Action[Reader] {
override def run(): Reader = new FileReader(fileName)
}
case class CloseReader(reader: Reader) extends Action[Unit] {
override def run(): Unit = reader.close()
}
def getReader(fileName: String): Free[Action, Reader] = liftF(GetReader(fileName))
def close(reader: Reader): Free[Action, Unit] = liftF(CloseReader(reader))
}
object C {
import Free.liftF
case class ReadCSV(reader: Reader) extends Action[CSVParser] {
override def run(): CSVParser = CSVFormat.DEFAULT.parse(reader)
}
def readCSV(reader: Reader): Free[Action, CSVParser] = liftF(ReadCSV(reader))
}
object I {
import Free.liftF
case class ProcessCSV(parser: CSVParser) extends Action[Unit] {
override def run(): Unit = {
for (r <- parser.asScala)
println(r)
}
}
def processCSV(parser: CSVParser): Free[Action, Unit] = liftF(ProcessCSV(parser))
}
object Runner {
import cats.arrow.FunctionK
import cats.{Id, ~>}
val runner = new (Action ~> Id) {
def apply[A](fa: Action[A]): Id[A] = fa.run()
}
def run[A](free: Free[Action, A]): A = {
free.foldMap(runner)
}
}
def test() = {
val free = for {
// reader <- F.getReader("my_file.csv")
reader <- F.getReader("AssetsImportCompleteSample.csv")
csvRecords <- C.readCSV(reader)
_ <- I.processCSV(csvRecords)
_ <- F.close(reader)
} yield ()
Runner.run(free)
}
it seems to work OK in line-by-line mode.
Here how I use the CSV file to read and do some operation on that -
I use scala.io.Source.fromFile()
I create one case class of the type of header of CSV file to make the data more accessible and operational.
PS: I don't have knowledge of monads, as well as I am in beginner in Scala. I posted this as it may be helpful.
case class AirportData(id:Int, ident:String, name:String, typeAirport:String, latitude_deg:Double,
longitude_deg:Double, elevation_ft:Double, continent:String, iso_country:String, iso_region:String,
municipality:String)
object AirportData extends App {
def toDoubleOrNeg(s: String): Double = {
try {
s.toDouble
} catch {
case _: NumberFormatException => -1
}
}
val source = scala.io.Source.fromFile("resources/airportData/airports.csv")
val lines = source.getLines().drop(1)
val data = lines.flatMap { line =>
val p = line.split(",")
Seq(AirportData(p(0).toInt, p(1).toString, p(2).toString, p(3).toString, toDoubleOrNeg(p(4)), toDoubleOrNeg(p(5)),
toDoubleOrNeg(p(6)), p(7).toString, p(8).toString, p(9).toString, p(10).toString))
}.toArray
source.close()
println(data.length)
data.take(10) foreach println
}