Change a method to private and use another overloaded version instead - scala

Code:
object Integral {
def approx(start: Double, end: Double, nIntervals: Int)(f: Double => Double): Double = {
val delta = (end - start) / nIntervals
val halfDelta = delta / 2
val xs = start until end by delta
xs.map(x => f(x + halfDelta) * delta).sum
}
def approx(startEnd: Array[Double], nIntervas: Int)(f: Double => Double): Double = {
require(startEnd.length == 2)
val startEndSorted = startEnd.sorted
val res = approx(startEndSorted(0), startEndSorted(1), nIntervas)(f)
if(startEndSorted == startEnd) res else (-res)
}
}
object IntegralTest {
def f1(x: Double) = {
math.pow(x, 3) - 6 * x
}
println(Integral.approx(0, 3, 6)(f1))
println(Integral.approx(0, 1, 60000)(f1))
def f2(x: Double) = {
math.sqrt(1 - x * x)
}
println(Integral.approx(0, 1, 60000)(f2))
println(math.Pi / 4)
println(Integral.approx(0, 3, 60000)({
(x: Double) => x - 1
}))
println(Integral.approx(1, 2, 5)({
(x: Double) => 1 / x
}))
// test the exponential function
println(Integral.approx(1, 3, 60000)(math.exp))
println(math.exp(3) - math.exp(1))
}
I want to make approx(start: Double, end: Double, nIntervals: Int)(f: Double => Double): Double private and change all the reference to it to approx(startEnd: Array[Double], nIntervas: Int)(f: Double => Double): Double, is there a way to do this safely and hassle-free in intellij?
Update
I soon realized that for this specific case I could have handled it in a better way (by recursion):
def approx(start: Double, end: Double, nIntervals: Int)(f: Double => Double): Double = {
if(start > end) {
-approx(end, start, nIntervals)(f)
} else {
val delta = (end - start) / nIntervals
val halfDelta = delta / 2
val xs = start until end by delta
xs.map(x => f(x + halfDelta) * delta).sum
}
}
But I am still interested in finding out how to refactor the old problematic code because it might be useful in the future.

I don't know if the fact you use it for Scala changes anything, but you should be able to right-click on a function, and Change method signature to the one you want (so you wouldn't replace function1 by function2 per se, but rather modify f1 to match f2).
Edit: This won't work as per Ben comment. Leaving anyway for the sake of mentioning the functionality.

Related

Scala: overloaded method value converged with alternatives:

I am still new to Scala (still learning it). I have the following assignment, but am having problems running it due to the error "overloaded method value converged with alternatives:"
Full Error:
\KMeans.scala:101:8: overloaded method value converged with alternatives:
(eta: Double,oldMeans: scala.collection.parallel.ParSeq[kmeans.Point],newMeans: scala.collection.parallel.ParSeq[kmeans.Point])Boolean <and>
(eta: Double,oldMeans: scala.collection.Seq[kmeans.Point],newMeans: scala.collection.Seq[kmeans.Point])Boolean
cannot be applied to (Double)
if (!converged(eta)(means, newMeans))
Can you please help?
The full class is:
package kmeans
import scala.annotation.tailrec
import scala.collection.{Map, Seq, mutable}
import scala.collection.parallel.CollectionConverters._
import scala.collection.parallel.{ParMap, ParSeq}
import scala.util.Random
import org.scalameter._
class KMeans extends KMeansInterface {
def generatePoints(k: Int, num: Int): Seq[Point] = {
val randx = new Random(1)
val randy = new Random(3)
val randz = new Random(5)
(0 until num)
.map({ i =>
val x = ((i + 1) % k) * 1.0 / k + randx.nextDouble() * 0.5
val y = ((i + 5) % k) * 1.0 / k + randy.nextDouble() * 0.5
val z = ((i + 7) % k) * 1.0 / k + randz.nextDouble() * 0.5
new Point(x, y, z)
}).to(mutable.ArrayBuffer)
}
def initializeMeans(k: Int, points: Seq[Point]): Seq[Point] = {
val rand = new Random(7)
(0 until k).map(_ => points(rand.nextInt(points.length))).to(mutable.ArrayBuffer)
}
def findClosest(p: Point, means: IterableOnce[Point]): Point = {
val it = means.iterator
assert(it.nonEmpty)
var closest = it.next()
var minDistance = p.squareDistance(closest)
while (it.hasNext) {
val point = it.next()
val distance = p.squareDistance(point)
if (distance < minDistance) {
minDistance = distance
closest = point
}
}
closest
}
def classify(points: Seq[Point], means: Seq[Point]): Map[Point, Seq[Point]] = {
means.map{(_, Seq())}.toMap ++ points.groupBy(findClosest(_, means))
}
def classify(points: ParSeq[Point], means: ParSeq[Point]): ParMap[Point, ParSeq[Point]] = {
means.map{(_, ParSeq())}.toMap ++ points.groupBy(findClosest(_, means))
}
def findAverage(oldMean: Point, points: Seq[Point]): Point = if (points.isEmpty) oldMean else {
var x = 0.0
var y = 0.0
var z = 0.0
points.foreach { p =>
x += p.x
y += p.y
z += p.z
}
new Point(x / points.length, y / points.length, z / points.length)
}
def findAverage(oldMean: Point, points: ParSeq[Point]): Point = if (points.isEmpty) oldMean else {
var x = 0.0
var y = 0.0
var z = 0.0
points.seq.foreach { p =>
x += p.x
y += p.y
z += p.z
}
new Point(x / points.length, y / points.length, z / points.length)
}
def update(classified: Map[Point, Seq[Point]], oldMeans: Seq[Point]): Seq[Point] = {
oldMeans.par.map(oldMean => findAverage(oldMean, classified(oldMean)))
}
def update(classified: ParMap[Point, ParSeq[Point]], oldMeans: ParSeq[Point]): ParSeq[Point] = {
oldMeans.par.map(oldMean => findAverage(oldMean, classified(oldMean)))
}
def converged(eta: Double, oldMeans: Seq[Point], newMeans: Seq[Point]): Boolean = {
(oldMeans zip newMeans)
.forall(entry => entry._1.squareDistance(entry._2) <= eta)
}
def converged(eta: Double, oldMeans: ParSeq[Point], newMeans: ParSeq[Point]): Boolean = {
(oldMeans zip newMeans)
.forall(entry => entry._1.squareDistance(entry._2) <= eta)
}
#tailrec
final def kMeans(points: Seq[Point], means: Seq[Point], eta: Double): Seq[Point] = {
val meansClassification = classify(points, means)
val newMeans = update(meansClassification, means)
if (!converged(eta)(means, newMeans))
kMeans(points, newMeans, eta)
else
newMeans
}
#tailrec
final def kMeans(points: ParSeq[Point], means: ParSeq[Point], eta: Double): ParSeq[Point] = {
val meansClassification = classify(points, means)
val newMeans = update(meansClassification, means)
if (!converged(eta)(means, newMeans))
kMeans(points, newMeans, eta)
else
newMeans
}
}
/** Describes one point in three-dimensional space.
*
* Note: deliberately uses reference equality.
*/
class Point(val x: Double, val y: Double, val z: Double) {
private def square(v: Double): Double = v * v
def squareDistance(that: Point): Double = {
square(that.x - x) + square(that.y - y) + square(that.z - z)
}
private def round(v: Double): Double = (v * 100).toInt / 100.0
override def toString = s"(${round(x)}, ${round(y)}, ${round(z)})"
}
object KMeansRunner {
val standardConfig = config(
Key.exec.minWarmupRuns -> 20,
Key.exec.maxWarmupRuns -> 40,
Key.exec.benchRuns -> 25,
Key.verbose -> true
) withWarmer(new Warmer.Default)
def main(args: Array[String]): Unit = {
val kMeans = new KMeans()
val numPoints = 500000
val eta = 0.01
val k = 32
val points = kMeans.generatePoints(k, numPoints)
val means = kMeans.initializeMeans(k, points)
val seqtime = standardConfig measure {
kMeans.kMeans(points, means, eta)
}
val parPoints = points.par
val parMeans = means.par
val partime = standardConfig measure {
kMeans.kMeans(parPoints, parMeans, eta)
}
// Additional `println` to avoid bad interaction with JLine output
println()
println()
println()
println()
println(s"sequential time: $seqtime")
println(s"parallel time: $partime")
println(s"speedup: ${seqtime.value / partime.value}")
println()
println()
println()
}
// Workaround Dotty's handling of the existential type KeyValue
implicit def keyValueCoerce[T](kv: (Key[T], T)): KeyValue = {
kv.asInstanceOf[KeyValue]
}
}
The interface:
package kmeans
import scala.collection.{Map, Seq}
import scala.collection.parallel.{ParMap, ParSeq}
/**
* The interface used by the grading infrastructure. Do not change signatures
* or your submission will fail with a NoSuchMethodError.
*/
trait KMeansInterface {
def classify(points: Seq[Point], means: Seq[Point]): Map[Point, Seq[Point]]
def classify(points: ParSeq[Point], means: ParSeq[Point]): ParMap[Point, ParSeq[Point]]
def update(classified: Map[Point, Seq[Point]], oldMeans: Seq[Point]): Seq[Point]
def update(classified: ParMap[Point, ParSeq[Point]], oldMeans: ParSeq[Point]): ParSeq[Point]
def converged(eta: Double, oldMeans: Seq[Point], newMeans: Seq[Point]): Boolean
def converged(eta: Double, oldMeans: ParSeq[Point], newMeans: ParSeq[Point]): Boolean
def kMeans(points: Seq[Point], means: Seq[Point], eta: Double): Seq[Point]
def kMeans(points: ParSeq[Point], means: ParSeq[Point], eta: Double): ParSeq[Point]
}
The method should be called as converged(eta, means, newMeans) not converged(eta)(means, newMeans). If you look, both def converged are defined with a single parameter list (with 3 parameters), not with two.
The most relevant part of this error is not the part you quoted but
cannot be applied to (Double)

Monadic approach to estimating PI in scala

I'm trying to understand how to leverage monads in scala to solve simple problems as way of building up my familiarity. One simple problem is estimating PI using a functional random number generator. I'm including the code below for a simple stream based approach.
I'm looking for help in translating this to a monadic approach. For example, is there an idiomatic way convert this code to using the state (and other monads) in a stack safe way?
trait RNG {
def nextInt: (Int, RNG)
def nextDouble: (Double, RNG)
}
case class Point(x: Double, y: Double) {
val isInCircle = (x * x + y * y) < 1.0
}
object RNG {
def nonNegativeInt(rng: RNG): (Int, RNG) = {
val (ni, rng2) = rng.nextInt
if (ni > 0) (ni, rng2)
else if (ni == Int.MinValue) (0, rng2)
else (ni + Int.MaxValue, rng2)
}
def double(rng: RNG): (Double, RNG) = {
val (ni, rng2) = nonNegativeInt(rng)
(ni.toDouble / Int.MaxValue, rng2)
}
case class Simple(seed: Long) extends RNG {
def nextInt: (Int, RNG) = {
val newSeed = (seed * 0x5DEECE66DL + 0xBL) & 0xFFFFFFFFFFFFL
val nextRNG = Simple(newSeed)
val n = (newSeed >>> 16).toInt
(n, nextRNG)
}
def nextDouble: (Double, RNG) = {
val (n, nextRNG) = nextInt
double(nextRNG)
}
}
}
object PI {
import RNG._
def doubleStream(rng: Simple):Stream[Double] = rng.nextDouble match {
case (d:Double, next:Simple) => d #:: doubleStream(next)
}
def estimate(rng: Simple, iter: Int): Double = {
val doubles = doubleStream(rng).take(iter)
val inside = (doubles zip doubles.drop(3))
.map { case (a, b) => Point(a, b) }
.filter(p => p.isInCircle)
.size * 1.0
(inside / iter) * 4.0
}
}
// > PI.estimate(RNG.Simple(10), 100000)
// res1: Double = 3.14944
I suspect I'm looking for something like replicateM from the Applicative monad in cats but I'm not sure how to line up the types or how to do it in a way that doesn't accumulate intermediate results in memory. Or, is there a way to do it with a for comprehension that can iteratively build up Points?
Id you want to iterate using monad in a stack safe way, then there is a tailRecM method implemented in Monad type class:
// assuming random generated [-1.0,1.0]
def calculatePi[F[_]](iterations: Int)
(random: => F[Double])
(implicit F: Monad[F]): F[Double] = {
case class Iterations(total: Int, inCircle: Int)
def step(data: Iterations): F[Either[Iterations, Double]] = for {
x <- random
y <- random
isInCircle = (x * x + y * y) < 1.0
newTotal = data.total + 1
newInCircle = data.inCircle + (if (isInCircle) 1 else 0)
} yield {
if (newTotal >= iterations) Right(newInCircle.toDouble / newTotal.toDouble * 4.0)
else Left(Iterations(newTotal, newInCircle))
}
// iterates until Right value is returned
F.tailRecM(Iterations(0, 0))(step)
}
calculatePi(10000)(Future { Random.nextDouble }).onComplete(println)
It uses by-name param because you could try to pass there something like Future (even though the Future is not lawful), which are eager, so you would end up with evaluating the same thing time and time again. With by name param at least you have the chance of passing there a recipe for side-effecting random. Of course, if we use Option, List as a monad holding our "random" number, we should also expect funny results.
The correct solution would be using something that ensures that this F[A] is lazily evaluated, and any side effect inside is evaluated each time you need a value from inside. For that you basically have to use some of Effects type classes, like e.g. Sync from Cats Effects.
def calculatePi[F[_]](iterations: Int)
(random: F[Double])
(implicit F: Sync[F]): F[Double] = {
...
}
calculatePi(10000)(Coeval( Random.nextDouble )).value
calculatePi(10000)(Task( Random.nextDouble )).runAsync
Alternatively, if you don't care about purity that much, you could pass side effecting function or object instead of F[Int] for generating random numbers.
// simplified, hardcoded F=Coeval
def calculatePi(iterations: Int)
(random: () => Double): Double = {
case class Iterations(total: Int, inCircle: Int)
def step(data: Iterations) = Coeval {
val x = random()
val y = random()
val isInCircle = (x * x + y * y) < 1.0
val newTotal = data.total + 1
val newInCircle = data.inCircle + (if (isInCircle) 1 else 0)
if (newTotal >= iterations) Right(newInCircle.toDouble / newTotal.toDouble * 4.0)
else Left(Iterations(newTotal, newInCircle))
}
Monad[Coeval].tailRecM(Iterations(0, 0))(step).value
}
Here is another approach that my friend Charles Miller came up with. It's a bit more direct since it uses RNG directly but it follows the same approach provided by #Mateusz Kubuszok above that leverages Monad.
The key difference is that it leverages the State monad so we can thread the RNG state through the computation and generate the random numbers using the "pure" random number generator.
import cats._
import cats.data._
import cats.implicits._
object PICharles {
type RNG[A] = State[Long, A]
object RNG {
def nextLong: RNG[Long] =
State.modify[Long](
seed ⇒ (seed * 0x5DEECE66DL + 0xBL) & 0xFFFFFFFFFFFFL
) >> State.get
def nextInt: RNG[Int] = nextLong.map(l ⇒ (l >>> 16).toInt)
def nextNatural: RNG[Int] = nextInt.map { i ⇒
if (i > 0) i
else if (i == Int.MinValue) 0
else i + Int.MaxValue
}
def nextDouble: RNG[Double] = nextNatural.map(_.toDouble / Int.MaxValue)
def runRng[A](seed: Long)(rng: RNG[A]): A = rng.runA(seed).value
def unsafeRunRng[A]: RNG[A] ⇒ A = runRng(System.currentTimeMillis)
}
object PI {
case class Step(count: Int, inCircle: Int)
def calculatePi(iterations: Int): RNG[Double] = {
def step(s: Step): RNG[Either[Step, Double]] =
for {
x ← RNG.nextDouble
y ← RNG.nextDouble
isInCircle = (x * x + y * y) < 1.0
newInCircle = s.inCircle + (if (isInCircle) 1 else 0)
} yield {
if (s.count >= iterations)
Right(s.inCircle.toDouble / s.count.toDouble * 4.0)
else
Left(Step(s.count + 1, newInCircle))
}
Monad[RNG].tailRecM(Step(0, 0))(step(_))
}
def unsafeCalculatePi(iterations: Int) =
RNG.unsafeRunRng(calculatePi(iterations))
}
}
Thanks Charles & Mateusz for your help!

How to calculate the std in scala with high order functions

I am new to scala.
Lately I have tried to write some math functions, but I don't understand what I am doing wrong.
import scala.annotation.tailrec
import scala.math.{pow, sqrt}
object HikeStatProcessor {
def mean(list: List[Double]): Double = {
sum(list, value => value) / list.size
}
def std(list: List[Double]): Double = {
val means: Double = mean(list)
sqrt(pow(sum(list, (head: Double) => head - means), 2) / list.size)
}
private def sum(list: List[Double],
operation: Double => Double): Double = {
#tailrec
def innerCalc(xs: List[Double], accu: Double): Double = {
if (xs.isEmpty) return accu
println(accu + operation(xs.head))
innerCalc(xs.tail, accu + operation(xs.head))
}
innerCalc(list, 0)
}
}
val a = HikeStatProcessor.std(List(1, 2, 3))
Could you please provide a fix for the above code and a brief explanation of what I was doing wrong?
Consider this statement: sum(list, (head: Double) => head - means). Let's break it down into smaller steps.
sum(list , (head: Double) => head - means)
sum(List(1,2,3), x => x - 2)
sum(List(1-2, 2-2, 3-2))
-1 + 0 + 1 = 0
If you subtract the average from each element, the sum is always going to be zero (or close to it). What was your intention?
Thanks all for the answers.
I solved the issue by changing the def std to:
def std(list: List[Double]): Double = {
val means: Double = mean(list)
sqrt( sum(list, (head: Double) => pow(head - means, 2)) / (list.size - 1) )
}
The issue was caused by the fact that I did not evaluate the pow 2 within my anonymous function.

Patterns for same method & same parameters but different parameter names in scala

I'd like to be able to do:
object AddOrSubtract {
def apply(x: Int, adding: Int) = x + adding
def apply(x: Int, subtracting: Int) = x - subtracting
}
AddOrSubtract(1, adding = 5) // should be 6
AddOrSubtract(1, subtracting = 5) // should be -4
But I get the error:
Error:(1331, 7) method apply is defined twice;
the conflicting method apply was defined at line 1330:7
def apply(x: Int, subtracting: Int) = x - subtracting
I understand that this is because the two methods have the same signatures. Is there some pattern for getting around this? The only thing I can think of is using an implicit to change the type of value, like:
object AddOrSubtract {
implicit class AddInt(val x: Int)
implicit class SubInt(val x: Int)
def apply(x: Int, adding: AddInt) = x + adding.x
def apply(x: Int, subtracting: SubInt) = x - subtracting.x
def run(): Unit = {
AddOrSubtract(1, adding = 5)
AddOrSubtract(1, subtracting = 5)
}
}
But I'm curious if there is any other less inelegant way of accomplishing this?
Your example code may be over-simplified for your real-world use case. If so then this solution won't be applicable.
object AddOrSubtract {
def apply(x: Int, adding: Int=0, subtracting: Int=0) = x + adding - subtracting
}
AddOrSubtract(1, adding = 5) // res0: Int = 6
AddOrSubtract(1, subtracting = 5) // res1: Int = -4
AFAIK there is no good solution.
The only workaround I can imagine is
object AddOrSubtract {
def apply(x: Int, adding: Int = 0, subtracting: Int = 0) =
match (adding, subtracting) {
case (0, 0) => throw Error("either adding or subtracting is required")
case (x, 0) => x + adding
case (0, x) => x - subtracting
case (_, _) => throw Error("for now both adding and subtracting is not allowed")
}
AddOrSubtract(1, adding = 5) // should be 6
AddOrSubtract(1, subtracting = 5) // should be -4
But it is far from perfect

Rougly (or partially) sort a list in Scala

Considering a list of several million objects like:
case class Point(val name:String, val x:Double, val y:Double)
I need, for a given Point target, to pick the 10 other points which are closest to the target.
val target = Point("myPoint", 34, 42)
val points = List(...) // list of several million points
def distance(p1: Point, p2: Point) = ??? // return the distance between two points
val closest10 = points.sortWith((a, b) => {
distance(a, target) < distance(b, target)
}).take(10)
This method works but is very slow. Indeed, the whole list is exhaustively sorted for each target request, whereas past the first 10 closest points, I really don't care about any kind of sorting. I don't even need that the first 10 closest are returned in the correct order.
Ideally, I'd be looking for a "return 10 first and don't pay attention to the rest" kind of method..
Naive solution that I can think of would sound like this: sort by buckets of 1000, take first bucket, sort it by buckets of 100, take first bucket, sort it by buckets of 10, take first bucket, done.
Question is, I guess this must be a very common problem in CS, so before rolling out my own solution based on this naive approach, I'd like to know of any state-of-the-art way of doing that, or even if some standard method already exists.
TL;DR how to get the first 10 items of an unsorted list, without having to sort the whole list?
Below is a barebone method adapted from this SO answer for picking n smallest integers from a list (which can be enhanced to handle more complex data structure):
def nSmallest(n: Int, list: List[Int]): List[Int] = {
def update(l: List[Int], e: Int): List[Int] =
if (e < l.head) (e :: l.tail).sortWith(_ > _) else l
list.drop(n).foldLeft( list.take(n).sortWith(_ > _) )( update(_, _) )
}
nSmallest( 5, List(3, 2, 8, 2, 9, 1, 5, 5, 9, 1, 7, 3, 4) )
// res1: List[Int] = List(3, 2, 2, 1, 1)
Please note that the output is in reverse order.
I was looking at this and wondered if a PriorityQueue might be useful.
import scala.collection.mutable.PriorityQueue
case class Point(val name:String, val x:Double, val y:Double)
val target = Point("myPoint", 34, 42)
val points = List(...) //list of points
def distance(p1: Point, p2: Point) = ??? //distance between two points
//load points-priority-queue with first 10 points
val ppq = PriorityQueue(points.take(10):_*){
case (a,b) => distance(a,target) compare distance(b,target) //prioritize points
}
//step through everything after the first 10
points.drop(10).foldLeft(distance(ppq.head,target))((mxDst,nextPnt) =>
if (mxDst > distance(nextPnt,target)) {
ppq.dequeue() //drop current far point
ppq.enqueue(nextPnt) //load replacement point
distance(ppq.head,target) //return new max distance
} else mxDst)
val result: List[Double] = ppq.dequeueAll //10 closest points
How it can be done with QuickSelect. I used in-place QuickSelect. Basically, for every target point we calculate the distance between all points and target and use QuickSelect to get k-th smallest distance (k-th order statistic). Will this algo be faster than using sorting depends on factors like number of points, number of nearests and number of targets. In my machine for 3kk random generated points, 10 target points and asking for 10 nearest points, it's 2 times faster than using Sort algo:
Number of points: 3000000
Number of targets: 10
Number of nearest: 10
QuickSelect: 10737 ms.
Sort: 20763 ms.
Results from QuickSelect are valid
Code:
import scala.annotation.tailrec
import scala.concurrent.duration.Deadline
import scala.util.Random
case class Point(val name: String, val x: Double, val y: Double)
class NearestPoints(val points: Seq[Point]) {
private case class PointWithDistance(p: Point, d: Double) extends Ordered[PointWithDistance] {
def compare(that: PointWithDistance): Int = d.compareTo(that.d)
}
def distance(p1: Point, p2: Point): Double = {
Math.sqrt(Math.pow(p2.x - p1.x, 2) + Math.pow(p2.y - p1.y, 2))
}
def get(target: Point, n: Int): Seq[Point] = {
val pd = points.map(p => PointWithDistance(p, distance(p, target))).toArray
(1 to n).map(i => quickselect(i, pd).get.p)
}
// In-place QuickSelect from https://gist.github.com/mooreniemi/9e45d55c0410cad0a9eb6d62a5b9b7ae
def quickselect[T <% Ordered[T]](k: Int, xs: Array[T]): Option[T] = {
def randint(lo: Int, hi: Int): Int =
lo + scala.util.Random.nextInt((hi - lo) + 1)
#inline
def swap[T](xs: Array[T], i: Int, j: Int): Unit = {
val t = xs(i)
xs(i) = xs(j)
xs(j) = t
}
def partition[T <% Ordered[T]](xs: Array[T], l: Int, r: Int): Int = {
var pivotIndex = randint(l, r)
val pivotValue = xs(pivotIndex)
swap(xs, r, pivotIndex)
pivotIndex = l
var i = l
while (i <= r - 1) {
if (xs(i) < pivotValue) {
swap(xs, i, pivotIndex)
pivotIndex = pivotIndex + 1
}
i = i + 1
}
swap(xs, r, pivotIndex)
pivotIndex
}
#tailrec
def quickselect0[T <% Ordered[T]](xs: Array[T], l: Int, r: Int, k: Int): T = {
if (l == r) {
xs(l)
} else {
val pivotIndex = partition(xs, l, r)
k compare pivotIndex match {
case 0 => xs(k)
case -1 => quickselect0(xs, l, pivotIndex - 1, k)
case 1 => quickselect0(xs, pivotIndex + 1, r, k)
}
}
}
xs match {
case _ if xs.isEmpty => None
case _ if k < 1 || k > xs.length => None
case _ => Some(quickselect0(xs, 0, xs.size - 1, k - 1))
}
}
}
object QuickSelectVsSort {
def main(args: Array[String]): Unit = {
val rnd = new Random(42L)
val MAX_N: Int = 3000000
val NUM_OF_NEARESTS: Int = 10
val NUM_OF_TARGETS: Int = 10
println(s"Number of points: $MAX_N")
println(s"Number of targets: $NUM_OF_TARGETS")
println(s"Number of nearest: $NUM_OF_NEARESTS")
// Generate random points
val points = (1 to MAX_N)
.map(x => Point(x.toString, rnd.nextDouble, rnd.nextDouble))
// Generate target points
val targets = (1 to NUM_OF_TARGETS).map(x => Point(s"Target$x", rnd.nextDouble, rnd.nextDouble))
var start = Deadline.now
val np = new NearestPoints(points)
val viaQuickSelect = targets.map { case target =>
val nearest = np.get(target, NUM_OF_NEARESTS)
nearest
}
var end = Deadline.now
println(s"QuickSelect: ${(end - start).toMillis} ms.")
start = Deadline.now
val viaSort = targets.map { case target =>
val closest = points.sortWith((a, b) => {
np.distance(a, target) < np.distance(b, target)
}).take(NUM_OF_NEARESTS)
closest
}
end = Deadline.now
println(s"Sort: ${(end - start).toMillis} ms.")
// Validate
assert(viaQuickSelect.length == viaSort.length)
viaSort.zipWithIndex.foreach { case (p, idx) =>
assert(p == viaQuickSelect(idx))
}
println("Results from QuickSelect are valid")
}
}
For finding the top n elements in a list you can Quicksort it and terminate early. That is, terminate at the point where you know there are n elements that are bigger than the pivot. See my implementation in the Rank class of Apache Jackrabbit (in Java though), which does just that.