jFreeChart contour plot rendering incorrectly - scala

Code:
package vu.co.kaiyin.sfreechart.plots
import java.awt.{Shape, Stroke, RenderingHints}
import javax.swing.JFrame
import org.jfree.chart.plot.{PlotOrientation, XYPlot}
import org.jfree.chart.{ChartFactory => cf}
import org.jfree.chart.renderer.GrayPaintScale
import org.jfree.chart.renderer.xy.XYBlockRenderer
import org.jfree.chart.title.PaintScaleLegend
import org.jfree.chart._
import org.jfree.chart.axis.{AxisLocation, NumberAxis}
import org.jfree.data.Range
import org.jfree.data.general.DatasetUtilities
import org.jfree.data.statistics.HistogramDataset
import org.jfree.data.xy.{IntervalXYDataset, XYZDataset}
import org.jfree.ui.{RectangleEdge, RectangleInsets}
import vu.co.kaiyin.sfreechart.{ColorPaintScale, ExtendedFastScatterPlot}
import vu.co.kaiyin.sfreechart.implicits._
import scala.util.Random.nextGaussian
/**
* Created by kaiyin on 2/10/16.
*/
object Plots {
def histogram(
dataset: IntervalXYDataset,
title: String = "Histogram",
xAxisLabel: String = "Intervals",
yAxisLabel: String = "Count",
orientation: PlotOrientation = PlotOrientation.VERTICAL,
legend: Boolean = true,
tooltips: Boolean = true,
urls: Boolean = true,
alpha: Float = 0.5F,
pannable: Boolean = false
): JFreeChart = {
val hist = cf.createHistogram(
title, xAxisLabel, yAxisLabel, dataset, orientation, legend, tooltips, urls
)
val xyPlot = hist.getPlot.asInstanceOf[XYPlot]
if (pannable) {
xyPlot.setDomainPannable(true)
xyPlot.setRangePannable(true)
}
xyPlot.setForegroundAlpha(alpha)
hist
}
def controuPlot(dataset: XYZDataset, title: String = "Contour plot", scaleTitle: String = "Scale"): JFreeChart = {
val xAxis = new NumberAxis("x")
val yAxis = new NumberAxis("y")
val blockRenderer = new XYBlockRenderer
val zBounds: Range = DatasetUtilities.findZBounds(dataset)
println(zBounds.getLowerBound, zBounds.getUpperBound)
val paintScale = new ColorPaintScale(zBounds.getLowerBound, zBounds.getUpperBound)
blockRenderer.setPaintScale(paintScale)
val xyPlot = new XYPlot(dataset, xAxis, yAxis, blockRenderer)
xyPlot.setAxisOffset(new RectangleInsets(1D, 1D, 1D, 1D))
xyPlot.setDomainPannable(true)
xyPlot.setRangePannable(true)
val chart = new JFreeChart(title, xyPlot)
chart.removeLegend()
val scaleAxis = new NumberAxis(scaleTitle)
val paintScaleLegend = new PaintScaleLegend(paintScale, scaleAxis)
paintScaleLegend.setAxisLocation(AxisLocation.BOTTOM_OR_LEFT)
paintScaleLegend.setPosition(RectangleEdge.BOTTOM)
chart.addSubtitle(paintScaleLegend)
chart
}
def fastScatter(data: Array[Array[Float]], title: String = "Scatter plot", pointSize: Int = 5, pointAlpha: Float = 0.3F): JFreeChart = {
val xAxis = new NumberAxis("x")
val yAxis = new NumberAxis("y")
xAxis.setAutoRangeIncludesZero(false)
yAxis.setAutoRangeIncludesZero(false)
val fsPlot = new ExtendedFastScatterPlot(data, xAxis, yAxis, pointSize, pointAlpha)
fsPlot.setDomainPannable(true)
fsPlot.setRangePannable(true)
val chart = new JFreeChart(title, fsPlot)
chart.getRenderingHints.put(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON)
chart
}
def main(args: Array[String]) {
// fastScatter(Array(Array(1.0F, 2.0F, 3.0F), Array(1.0F, 2.0F, 3.0F))).vis()
val x = (1 to 10000).map(_.toFloat).toArray
val y = x.map(i => i * nextGaussian().toFloat * 3F).toArray
fastScatter(Array(x, y)).vis()
val x1 = (-13.0 to 13.0 by 0.2).toArray
val y1 = (-13.0 to 13.0 by 0.2).toArray
val xyzData = (for {
i <- x1
j <- y1
if i > j
} yield Array(i, j, math.sin(i) + math.cos(j))).transpose
controuPlot(xyzData.toXYZDataset()).vis()
histogram((1 to 10000).map(_ => nextGaussian()).toArray.toHistogramDataset()).vis()
}
}
Full project can be found here: https://github.com/kindlychung/sfreechart
Running the above code will give you this:
If you look carefully, you will find a narrow band of pixels along the diagonal edge that doesn't quite fit (this is a contour plot of sin(x) + cos(y)), as if there was a tear and shift. But if I comment out the if i < j line, then the plot looks normal:
What went wrong and how can this be solved?
Update
Actually, if you look carefully at the right edge of the second figure above, there is also a strip that shouldn't be there.

I managed to fake a contour plot by a scatter plot:
val x = (-12.0 to 12.0 by 0.1).toArray
val y = (-12.0 to 12.0 by 0.1).toArray
val xyzData = (for {
i <- x
j <- y
} yield {
val s = math.sin(i)
val c = math.cos(j)
Array(i, j, s + c)
}).transpose
fastScatter(xyzData.toFloats, grid = (false, false), pointSize = 4, pointAlpha = 1F).vis()
Implementation of fastScatter can be found here: https://github.com/kindlychung/sfreechart (disclosure: I am the author.)

Related

Android iText Pdf SignatureField

iText kernel;
When I give a specific rectangle and create a signature field, the location of the signature field changes.
val pdfReader: com.itextpdf.kernel.pdf.PdfReader =
com.itextpdf.kernel.pdf.PdfReader(FileInputStream(file2))
val pdfWriter = PdfWriter(dest)
val document = PdfDocument(pdfReader, pdfWriter)
val page: PdfPage = document.getPage(page)
val extractionStrategy = TextPlusXYExtractionStrategy()
val parser = PdfCanvasProcessor(extractionStrategy)
parser.processPageContent(page);
for (i in signatures) {
val rWidth = 200f
val rHeight = 200f
val acroForm = PdfAcroForm.getAcroForm(document, true)
val signature: PdfFormField = PdfSignatureFormField.createSignature(
document,
com.itextpdf.kernel.geom.Rectangle(i.rect.left.toFloat(), i.rect.top.toFloat(), rWidth, rHeight)
)
signature.setVisibility(0)
signature.setFieldName(System.currentTimeMillis().toString() + UUID.randomUUID())
acroForm.addField(signature, page)
}
document.close()
///////////////////////////////////////////////////////////////////
widht = 200
height = 200
Rect 1;
x = 383.0
y = 209.0
Rect2;
x = 440.0
y = 530.0
Rect3;
x = 464.0
y = 879.0
Rect4;
x = 242.0
y = 872.0
Rect5;
x = 255.0
y = 493.0
Result;

How to implement the loop function already implemented in python with scala

enter image description here
I write some code in scala but i am stuck on the loop function.
import math._
object Haversine {
val R = 6372.8 //radius in km
def haversine(lat1:Double, lon1:Double, lat2:Double, lon2:Double)={
val dLat=(lat2 - lat1).toRadians
val dLon=(lon2 - lon1).toRadians
val a = pow(sin(dLat/2),2) + pow(sin(dLon/2),2) * cos(lat1.toRadians) * cos(lat2.toRadians)
val c = 2 * asin(sqrt(a))
R * c
}
def main(args: Array[String]): Unit = {
println(haversine(36.12, -86.67, 33.94, -118.40))
}
}
import org.apache.spark.sql.SparkSession
import Haversine.haversine
object Position {
def main(args: Array[String]): Unit = {
// create Spark DataFrame with Spark configuration
val spark= SparkSession.builder().getOrCreate()
// Read csv with DataFrame
val file1 = spark.read.csv("file:///home/aaron/Downloads/taxi_gps.txt")
val file2 = spark.read.csv("file:///home/aaron/Downloads/district.txt")
//change the name
val new_file1= file1.withColumnRenamed("_c0","id")
.withColumnRenamed("_c4","lat")
.withColumnRenamed("_c5","lon")
val new_file2= file2.withColumnRenamed("_c0","dis")
.withColumnRenamed("_1","lat")
.withColumnRenamed("_2","lon")
.withColumnRenamed("_c3","r")
//count
}
}
I am not familiar with scala,it is quite a tough question for me.
I hope you guys can help me,thx!
Before implementing you need to define and implement method cal_distance out side main method but in same class
def cal_distance(lon: Float, lat: Float, taxiLon: Float, taxiLat: Float) : Float = {
val distance = 0.0f
// write your geopy scala code here
distance
}
You code should in Scala should be similar to something as below
new_file2.foreach(row => {
val district = row.getAs[Float]("dis")
val lon = row.getAs[Float]("lon")
val lat = row.getAs[Float]("lat")
val distance = row.getAs[Float]("r")
var temp = 0
new_file1.foreach(taxi => {
val taxiLon = taxi.getAs[Float]("lon")
val taxiLat = taxi.getAs[Float]("lat")
if(cal_distance(lon,lat,taxiLon,taxiLat) <= distance) {
temp+=1
}
})
println(s"district:${district} temp=${temp}")
})

Sorting a DStream and taking topN

I have some DStream in Spark Scala and I want to sort it then take the top N.
The problem is that whenever I try to run it I get NotSerializableException and the exception message says:
This is because the DStream object is being referred to from within the closure.
The problem is that I don't know how to solve it:
Here is my try:
package com.badrit.realtime
import java.util.Date
import com.badrit.drivers.UnlimitedSpaceTimeDriver
import com.badrit.model.{CellBuilder, DataReader, Trip}
import com.badrit.utility.Printer
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.{Duration, Milliseconds, StreamingContext}
import scala.collection.mutable
object StreamingDriver {
val appName: String = "HotSpotRealTime"
val hostName = "localhost"
val port = 5050
val constrains = UnlimitedSpaceTimeDriver.constrains;
var streamingRate = 1;
var windowSize = 8;
var slidingInterval = 2;
val cellBuilder = new CellBuilder(constrains)
val inputFilePath = "/home/ahmedelgamal/Downloads/green_tripdata_2015-02.csv"
def prepareTestData(sparkStreamCtx: StreamingContext): InputDStream[Trip] = {
val sparkCtx = sparkStreamCtx.sparkContext
val textFile: RDD[String] = sparkCtx.textFile(inputFilePath)
val data: RDD[Trip] = new DataReader().getTrips(textFile)
val groupedData = data.filter(_.pickup.date.before(new Date(2015, 1, 2, 0, 0, 0)))
.groupBy(trip => trip.pickup.date.getMinutes).sortBy(_._1).map(_._2).collect()
printf("Grouped Data Count is " + groupedData.length)
var dataQueue: mutable.Queue[RDD[Trip]] = mutable.Queue.empty;
groupedData.foreach(trips => dataQueue += sparkCtx.makeRDD(trips.toArray))
printf("\n\nTest Queue size is " + dataQueue.size)
groupedData.zipWithIndex.foreach { case (trips: Iterable[Trip], index: Int) => {
println("Items List " + index)
val passengers: Array[Int] = trips.map(_.passengers).toArray
val cnt = passengers.length
println("Sum is " + passengers.sum)
println("Cnt is " + cnt)
val passengersRdd = sparkCtx.parallelize(passengers)
println("Mean " + passengersRdd.mean())
println("Stdv" + passengersRdd.stdev())
}
}
sparkStreamCtx.queueStream(dataQueue, true)
}
def cellCreator(trip: Trip) = cellBuilder.cellForCarStop(trip.pickup)
def main(args: Array[String]) {
if (args.length < 1) {
streamingRate = 1;
windowSize = 3 //2 hours 60 * 60 * 1000L
slidingInterval = 2 //0.5 hour 60 * 60 * 1000L
}
else {
streamingRate = args(0).toInt;
windowSize = args(1).toInt
slidingInterval = args(2).toInt
}
val sparkConf = new SparkConf().setAppName(appName).setMaster("local[*]")
val sparkStreamCtx = new StreamingContext(sparkConf, Milliseconds(streamingRate))
sparkStreamCtx.sparkContext.setLogLevel("ERROR")
sparkStreamCtx.checkpoint("/tmp")
val data: InputDStream[Trip] = prepareTestData(sparkStreamCtx)
val dataWindow = data.window(new Duration(windowSize), new Duration(slidingInterval))
//my main problem lies in the following line
val newDataWindow = dataWindow.transform(rdd => sparkStreamCtx.sparkContext.parallelize(rdd.take(10)))
newDataWindow.print
sparkStreamCtx.start()
sparkStreamCtx.awaitTerminationOrTimeout(1000)
}
}
I don't mind any other ways to sort a DStream and get its top N rather than my way.
You can use transform method in the DStream object then sort the input RDD and take n elements of it in a list, then filter the original RDD to be contained in this list.
val n = 10
val topN = result.transform(rdd =>{
val list = rdd.sortBy(_._1).take(n)
rdd.filter(list.contains)
})
topN.print

Spark job returns a different result on each run

I am working on a scala code which performs Linear Regression on certain datasets. Right now I am using 20 cores and 25 executors and everytime I run a Spark job I get a different result.
The input size of the files are 2GB and 400 MB.However, when I run the job with 20 cores and 1 executor, I get consistent results.
Has anyone experienced such a thing so far?
Please find the code below:
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SchemaRDD
import org.apache.spark.Partitioner
import org.apache.spark.storage.StorageLevel
object TextProcess{
def main(args: Array[String]){
val conf = new SparkConf().set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val numExecutors=(conf.get("spark.executor.instances").toInt)
// Read the 2 input files
// First file is either cases / controls
val input1 = sc.textFile(args(0))
// Second file is Gene Expression
val input2 = sc.textFile(args(1))
//collecting header information
val header1=sc.parallelize(input1.take(1))
val header2=sc.parallelize(input2.take(1))
//mapping data without the header information
val map1 = input1.subtract(header1).map(x => (x.split(" ")(0)+x.split(" ")(1), x))
val map2 = input2.subtract(header2).map(x => (x.split(" ")(0)+x.split(" ")(1), x))
//joining data. here is where the order was getting affected.
val joinedMap = map1.join(map2)
//adding the header back to the top of RDD
val x = header1.union(joinedMap.map{case(x,(y,z))=>y})
val y = header2.union(joinedMap.map{case(x,(y,z))=>z})
//removing irrelevant columns
val rddX = x.map(x=>x.split(" ").drop(3)).zipWithIndex.map{case(a,b)=> a.map(x=>b.toString+" "+x.toString)}
val rddY = y.map(x=>x.split(" ").drop(2)).zipWithIndex.map{case(a,b)=> a.map(x=>b.toString+" "+x.toString)}
//transposing and cross joining data. This keeps the identifier at the start
val transposedX = rddX.flatMap(x => x.zipWithIndex.map(x=>x.swap)).reduceByKey((a,b)=> a+":"+b).map{case(a,b)=>b.split(":").sorted}
val transposedY = rddY.flatMap(x => x.zipWithIndex.map(x=>x.swap)).reduceByKey((a,b)=> a+":"+b).map{case(a,b)=>b.split(":").sorted}.persist(StorageLevel.apply(false, true, false, false, numExecutors))
val cleanedX = transposedX.map(x=>x.map(x=>x.slice(x.indexOfSlice(" ")+1,x.length)))
val cleanedY = transposedY.map(x=>x.map(x=>x.slice(x.indexOfSlice(" ")+1,x.length))).persist(StorageLevel.apply(false, true, false, false, numExecutors))
val cartXY = cleanedX.cartesian(cleanedY)
val finalDataSet= cartXY.map{case(a,b)=>a zip b}
//convert to key value pair
val regressiondataset = finalDataSet.map(x=>(x(0),x.drop(1).filter{case(a,b)=> a!="NA" && b!="NA" && a!="null" && b!="null"}.map{case(a,b)=> (a.toDouble, b.toDouble)}))
val linearOutput = regressiondataset.map(s => new LinearRegression(s._1 ,s._2).outputVal)
linearOutput.saveAsTextFile(args(2))
cleanedY.unpersist()
transposedY.unpersist()
}
}
class LinearRegression (val keys: (String, String),val pairs: Array[(Double,Double)]) {
val size = pairs.size
// first pass: read in data, compute xbar and ybar
val sums = pairs.aggregate(new X_X2_Y(0D,0D,0D))(_ + new X_X2_Y(_),_+_)
val bars = (sums.x / size, sums.y / size)
// second pass: compute summary statistics
val sumstats = pairs.foldLeft(new X2_Y2_XY(0D,0D,0D))(_ + new X2_Y2_XY(_, bars))
val beta1 = sumstats.xy / sumstats.x2
val beta0 = bars._2 - (beta1 * bars._1)
val betas = (beta0, beta1)
//println("y = " + ("%4.3f" format beta1) + " * x + " + ("%4.3f" format beta0))
// analyze results
val correlation = pairs.aggregate(new RSS_SSR(0D,0D))(_ + RSS_SSR.build(_, bars, betas),_+_)
val R2 = correlation.ssr / sumstats.y2
val svar = correlation.rss / (size - 2)
val svar1 = svar / sumstats.x2
val svar0 = ( svar / size ) + ( bars._1 * bars._1 * svar1)
val svar0bis = svar * sums.x2 / (size * sumstats.x2)
/* println("R^2 = " + R2)
println("std error of beta_1 = " + Math.sqrt(svar1))
println("std error of beta_0 = " + Math.sqrt(svar0))
println("std error of beta_0 = " + Math.sqrt(svar0bis))
println("SSTO = " + sumstats.y2)
println("SSE = " + correlation.rss)
println("SSR = " + correlation.ssr)*/
def outputVal() = keys._1
+"\t"+keys._2
+"\t"+beta1
+"\t"+beta0
+"\t"+R2
+"\t"+Math.sqrt(svar1)
+"\t"+Math.sqrt(svar0)
+"\t"+sumstats.y2
+"\t"+correlation.rss
+"\t"+correlation.ssr+"\t;
}
object RSS_SSR {
def build(p: (Double,Double), bars: (Double,Double), betas: (Double,Double)): RSS_SSR = {
val fit = (betas._2 * p._1) + betas._1
val rss = (fit-p._2) * (fit-p._2)
val ssr = (fit-bars._2) * (fit-bars._2)
new RSS_SSR(rss, ssr)
}
}
class RSS_SSR(val rss: Double, val ssr: Double) {
def +(p: RSS_SSR): RSS_SSR = new RSS_SSR(rss+p.rss, ssr+p.ssr)
}
class X_X2_Y(val x: Double, val x2: Double, val y: Double) {
def this(p: (Double,Double)) = this(p._1, p._1*p._1, p._2)
def +(p: X_X2_Y): X_X2_Y = new X_X2_Y(x+p.x,x2+p.x2,y+p.y)
}
class X2_Y2_XY(val x2: Double, val y2: Double, val xy: Double) {
def this(p: (Double,Double), bars: (Double,Double)) = this((p._1-bars._1)*(p._1-bars._1), (p._2-bars._2)*(p._2-bars._2),(p._1-bars._1)*(p._2-bars._2))
def +(p: X2_Y2_XY): X2_Y2_XY = new X2_Y2_XY(x2+p.x2,y2+p.y2,xy+p.xy)
}

Which java API is best for converting pdf to image

I tried with 3 Java APIs for pdf but all 3 did not work properly.
1. PDFFile
2. PDDocument
3. PDFDocumentReader
If I have a pdf which has a 2 layer in which upper one is bit transparent, so when above 3 APIs converts it into image then only upper layer comes in image with no transparency. But both layer must come.
So suggest me other API to get my requirment fulfill
Code for PDFFile :
val raf = new RandomAccessFile(file, "r")
val channel = raf.getChannel()
val buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size())
raf.close()
val pdffile = new PDFFile(buf)
val numPgs = pdffile.getNumPages() + 1
for (i <- 1 until numPgs) {
val page = pdffile.getPage(i)
val pwdt = page.getBBox().getWidth().toDouble
val phgt = page.getBBox().getHeight().toDouble
val rect = new Rectangle(0, 0, pwdt.toInt, phgt.toInt)
val rsiz = resize(method, size, pwdt, phgt)
val img = page.getImage(rsiz("width"), rsiz("height"),
rect, null, true, true)
result ::= buffer(img)
Code for PDDocument :
val doc = PDDocument.load(new FileInputStream(file));
val pages = doc.getDocumentCatalog().getAllPages()
for (i <- 0 until pages.size()) {
val page = pages.get(i)
val before = page.asInstanceOf[PDPage].convertToImage()
}
Code for PDFDocumentReader :
val inputStream = new FileInputStream(file)
val document = new PDFDocumentReader(inputStream)
val numPgs = document.getNumberOfPages
for (i <- 0 until numPgs) {
val pageDetail = new PageDetail("", "", i, "")
val resourceDetails = document.getPageAsImage(pageDetail)
val image = ImageIO.read(new ByteArrayInputStream(resourceDetails.getBytes()))
result ::= image
}