Scala multipolygon to h3 - scala

I'm following this tutorial https://www.databricks.com/notebooks/geomesa-h3-notebook.html and have encountered a problem. Namely the multiPolygonToH3 function fills only the first polygon.
This is my multiPolygon
MULTIPOLYGON(((37.60574672029919 55.76360304012307,37.62162539766735 55.76360304012307,37.62162539766735 55.755972617558065,37.60574672029919 55.755972617558065,37.60574672029919 55.76360304012307)), ((37.65310224248083 55.76408915667978,37.69576009465368 55.76408915667978,37.69576009465368 55.74327126367862,37.65310224248083 55.74327126367862,37.65310224248083 55.76408915667978)))
This is what it looks like on a map.
These are the resulting hexagons.
MULTIPOLYGON(((37.616284080162494 55.75749587534617,37.61691435094995 55.75693498087154,37.618044116645436 55.757001523709455,37.61854364272989 55.75762896752626,37.6179133750184 55.75818987125727,37.61678357814598 55.758123321914994)),((37.61250216381124 55.76086109517582,37.6131325179221 55.76030024270741,37.61426233450115 55.760366834316756,37.6147618281481 55.76099428489984,37.614131477109275 55.76155514662441,37.613001629350855 55.76148854850967)),((37.614131477109275 55.76155514662441,37.6147618281481 55.76099428489984,37.61589168164416 55.761060865132876,37.61639121528237 55.76168831359522,37.61576086731764 55.76224918457605,37.61463098263997 55.762182597838155)),((37.61915060665912 55.762448886267414,37.619780929173686 55.76188799089943,37.62091086223679 55.76195453049221,37.62141050397119 55.76258197195621,37.62078018453669 55.763142876580865,37.619650220287156 55.763076330484765)),((37.61452402704689 55.75799019397347,37.6151543202009 55.75742931462781,37.616284080162494 55.75749587534617,37.61678357814598 55.758123321914994,37.61615328806596 55.758684210516904,37.61502349692789 55.75861764329365)),((37.609505733638024 55.75709636476695,37.61013605529576 55.756535519062474,37.6112657356971 55.75660212040765,37.61176512561169 55.75722957396369,37.61113480702192 55.757790428924004,37.610005095449026 55.75772382107239)),((37.6112657356971 55.75660212040765,37.611896034993045 55.7560412595741,37.61302572113419 55.756107843040716,37.61352513915084 55.75673529384669,37.61289484292486 55.7572961639362,37.61176512561169 55.75722957396369)),((37.61980413649265 55.7565071390634,37.62043436254456 55.75594621433197,37.621564139701 55.75601272140809,37.62206372198287 55.7566401597189,37.62143349901089 55.757201093707,37.62030369067654 55.757134580127534)),((37.60885076372684 55.76303779302333,37.60948118187133 55.76247697669237,37.61061101814752 55.76254361056972,37.611110467458744 55.76317106728433,37.61048005238236 55.76373189287117,37.60935018492607 55.76366525248742)),((37.60787673714715 55.756402287122526,37.60850706187088 55.75584145067377,37.60963670536077 55.7559080633903,37.61013605529576 55.756535519062474,37.609505733638024 55.75709636476695,37.60837605897875 55.75702974554347)),((37.6127639573017 55.75848447959124,37.613394272821466 55.75792361537491,37.61452402704689 55.75799019397347,37.61502349692789 55.75861764329365,37.61439318448013 55.75917851676611,37.61326339907877 55.759111931662176)),((37.610741976493436 55.761355323023274,37.611372352971664 55.760794485685565,37.61250216381124 55.76086109517582,37.613001629350855 55.76148854850967,37.61237125594268 55.762049395103396,37.611241413924255 55.76198277910719)),((37.61426233450115 55.760366834316756,37.614892666243854 55.75980596671802,37.61602248856005 55.759872540446125,37.61652201031282 55.760499988277836,37.61589168164416 55.761060865132876,37.6147618281481 55.76099428489984)),((37.617651869544275 55.76056655062884,37.618282175842005 55.76000565864372,37.619412040806516 55.76007220311244,37.61991163065526 55.76069964607004,37.619281327435566 55.76126054731169,37.6181514312885 55.76119399633914)),((37.618044116645436 55.757001523709455,37.61867436506552 55.756440614106246,37.61980413649265 55.7565071390634,37.62030369067654 55.757134580127534,37.61967344533443 55.757695498987296,37.61854364272989 55.75762896752626)),((37.6179133750184 55.75818987125727,37.61854364272989 55.75762896752626,37.61967344533443 55.757695498987296,37.620173011406074 55.75832294068309,37.61954274677258 55.758883853670625,37.618412912988916 55.75881731570574)),((37.61263306426401 55.75967279000478,37.61326339907877 55.759111931662176,37.61439318448013 55.75917851676611,37.614892666243854 55.75980596671802,37.61426233450115 55.760366834316756,37.6131325179221 55.76030024270741)),((37.61061101814752 55.76254361056972,37.611241413924255 55.76198277910719,37.61237125594268 55.762049395103396,37.61287073336441 55.762676849067915,37.61224034065779 55.763237689786436,37.611110467458744 55.76317106728433)),((37.61400061230388 55.76274345368803,37.61463098263997 55.762182597838155,37.61576086731764 55.76224918457605,37.61626041284185 55.76287663366857,37.61563004557982 55.76343749877469,37.61450012971891 55.76337090553198)),((37.61615328806596 55.758684210516904,37.61678357814598 55.758123321914994,37.6179133750184 55.75818987125727,37.618412912988916 55.75881731570574,37.61778262598488 55.759378213564084,37.616652797933824 55.75931165771744)),((37.61589168164416 55.761060865132876,37.61652201031282 55.760499988277836,37.617651869544275 55.76056655062884,37.6181514312885 55.76119399633914,37.61752110569587 55.761754882450575,37.61639121528237 55.76168831359522)),((37.606116752318414 55.756896477847995,37.606747099401126 55.75633565652849,37.60787673714715 55.756402287122526,37.60837605897875 55.75702974554347,37.607745714960025 55.75759057611853,37.606616046045104 55.757523939016984)),((37.61739033443911 55.762943209028265,37.61802067932779 55.76238232879101,37.61915060665912 55.762448886267414,37.619650220287156 55.763076330484765,37.619019878476536 55.76363721997857,37.61788991995925 55.76357065599836)),((37.60585459958918 55.75927302583512,37.60648498526554 55.75871221626454,37.607614685350995 55.758778859873566,37.60811403093181 55.7594063195605,37.6074836483195 55.759967138386635,37.60635391706175 55.75990048827018)),((37.61563004557982 55.76343749877469,37.61626041284185 55.76287663366857,37.61739033443911 55.762943209028265,37.61788991995925 55.76357065599836,37.61725955577332 55.76413153036091,37.616129602990576 55.76406494849687)),((37.60709049268375 55.76353194246413,37.60772093319518 55.7629711412651,37.60885076372684 55.76303779302333,37.60935018492607 55.76366525248742,37.60871974748072 55.764226062942164,37.60758988576947 55.76415940467702)),((37.6092437679349 55.759472951797925,37.60987412818384 55.75891211784134,37.61100387093002 55.75897873219941,37.611503284601625 55.75960618702042,37.610872927420715 55.760167030232836,37.609743153499586 55.76010040936834)),((37.6074836483195 55.759967138386635,37.60811403093181 55.7594063195605,37.6092437679349 55.759472951797925,37.609743153499586 55.76010040936834,37.60911277395331 55.76066123745019,37.60798300577573 55.76059459870584)),((37.607614685350995 55.758778859873566,37.60824504866594 55.75821803517271,37.60937475449627 55.75828466090315,37.60987412818384 55.75891211784134,37.6092437679349 55.759472951797925,37.60811403093181 55.7594063195605)),((37.607745714960025 55.75759057611853,37.60837605897875 55.75702974554347,37.609505733638024 55.75709636476695,37.610005095449026 55.75772382107239,37.60937475449627 55.75828466090315,37.60824504866594 55.75821803517271)),((37.61224034065779 55.763237689786436,37.61287073336441 55.762676849067915,37.61400061230388 55.76274345368803,37.61450012971891 55.76337090553198,37.61386974008436 55.76393175550664,37.612739829962116 55.763865144381136)),((37.619412040806516 55.76007220311244,37.62004232473254 55.759511295997555,37.62117219542784 55.75957782258372,37.6216718133796 55.76020526278796,37.62104153253363 55.76076617915949,37.61991163065526 55.76069964607004)),((37.61778262598488 55.759378213564084,37.618412912988916 55.75881731570574,37.61954274677258 55.758883853670625,37.62004232473254 55.759511295997555,37.619412040806516 55.76007220311244,37.618282175842005 55.76000565864372)),((37.61302572113419 55.756107843040716,37.613655998067536 55.75554696707846,37.61478568994617 55.75561353266617,37.61528513606343 55.756240980721465,37.61465486220202 55.75680186593983,37.61352513915084 55.75673529384669)),((37.6055924171585 55.761649552852475,37.60622284143312 55.76108875503284,37.60735260386488 55.76115541165676,37.60785197319708 55.76178287260769,37.607221551986505 55.76234367968293,37.60609175837908 55.76227701655155)),((37.60572351208684 55.76046129196552,37.60635391706175 55.75990048827018,37.6074836483195 55.759967138386635,37.60798300577573 55.76059459870584,37.60735260386488 55.76115541165676,37.60622284143312 55.76108875503284)),((37.616414864850285 55.75630753493492,37.61704511634632 55.755746634588114,37.61817485086661 55.75581317092161,37.61867436506552 55.756440614106246,37.618044116645436 55.757001523709455,37.61691435094995 55.75693498087154)),((37.60546131480352 55.76283780849497,37.60609175837908 55.76227701655155,37.607221551986505 55.76234367968293,37.60772093319518 55.7629711412651,37.60709049268375 55.76353194246413,37.60596066789893 55.763465272825286)),((37.61465486220202 55.75680186593983,37.61528513606343 55.756240980721465,37.616414864850285 55.75630753493492,37.61691435094995 55.75693498087154,37.616284080162494 55.75749587534617,37.6151543202009 55.75742931462781)),((37.61100387093002 55.75897873219941,37.61163420881477 55.75841788311279,37.6127639573017 55.75848447959124,37.61326339907877 55.759111931662176,37.61263306426401 55.75967279000478,37.611503284601625 55.75960618702042)),((37.60898177255083 55.76184951785901,37.60961217139562 55.76128869565234,37.610741976493436 55.761355323023274,37.611241413924255 55.76198277910719,37.61061101814752 55.76254361056972,37.60948118187133 55.76247697669237)),((37.61237125594268 55.762049395103396,37.613001629350855 55.76148854850967,37.614131477109275 55.76155514662441,37.61463098263997 55.762182597838155,37.61400061230388 55.76274345368803,37.61287073336441 55.762676849067915)),((37.607221551986505 55.76234367968293,37.60785197319708 55.76178287260769,37.60898177255083 55.76184951785901,37.60948118187133 55.76247697669237,37.60885076372684 55.76303779302333,37.60772093319518 55.7629711412651)),((37.61439318448013 55.75917851676611,37.61502349692789 55.75861764329365,37.61615328806596 55.758684210516904,37.616652797933824 55.75931165771744,37.61602248856005 55.759872540446125,37.614892666243854 55.75980596671802)),((37.61967344533443 55.757695498987296,37.62030369067654 55.757134580127534,37.62143349901089 55.757201093707,37.62193309318218 55.757828532649484,37.62130285092005 55.7583894607659,37.620173011406074 55.75832294068309)),((37.61113480702192 55.757790428924004,37.61176512561169 55.75722957396369,37.61289484292486 55.7572961639362,37.613394272821466 55.75792361537491,37.6127639573017 55.75848447959124,37.61163420881477 55.75841788311279)),((37.61576086731764 55.76224918457605,37.61639121528237 55.76168831359522,37.61752110569587 55.761754882450575,37.61802067932779 55.76238232879101,37.61739033443911 55.762943209028265,37.61626041284185 55.76287663366857)),((37.60735260386488 55.76115541165676,37.60798300577573 55.76059459870584,37.60911277395331 55.76066123745019,37.60961217139562 55.76128869565234,37.60898177255083 55.76184951785901,37.60785197319708 55.76178287260769)),((37.61752110569587 55.761754882450575,37.6181514312885 55.76119399633914,37.619281327435566 55.76126054731169,37.619780929173686 55.76188799089943,37.61915060665912 55.762448886267414,37.61802067932779 55.76238232879101)),((37.61602248856005 55.759872540446125,37.616652797933824 55.75931165771744,37.61778262598488 55.759378213564084,37.618282175842005 55.76000565864372,37.617651869544275 55.76056655062884,37.61652201031282 55.760499988277836)),((37.60911277395331 55.76066123745019,37.609743153499586 55.76010040936834,37.610872927420715 55.760167030232836,37.611372352971664 55.760794485685565,37.610741976493436 55.761355323023274,37.60961217139562 55.76128869565234)),((37.60937475449627 55.75828466090315,37.610005095449026 55.75772382107239,37.61113480702192 55.757790428924004,37.61163420881477 55.75841788311279,37.61100387093002 55.75897873219941,37.60987412818384 55.75891211784134)),((37.61289484292486 55.7572961639362,37.61352513915084 55.75673529384669,37.61465486220202 55.75680186593983,37.6151543202009 55.75742931462781,37.61452402704689 55.75799019397347,37.613394272821466 55.75792361537491)),((37.60963670536077 55.7559080633903,37.610267007724666 55.75534721181258,37.61139665695617 55.755413806651326,37.611896034993045 55.7560412595741,37.6112657356971 55.75660212040765,37.61013605529576 55.756535519062474)),((37.610872927420715 55.760167030232836,37.611503284601625 55.75960618702042,37.61263306426401 55.75967279000478,37.6131325179221 55.76030024270741,37.61250216381124 55.76086109517582,37.611372352971664 55.760794485685565)),((37.605985679666155 55.75808475446225,37.606616046045104 55.757523939016984,37.607745714960025 55.75759057611853,37.60824504866594 55.75821803517271,37.607614685350995 55.758778859873566,37.60648498526554 55.75871221626454)),((37.619281327435566 55.76126054731169,37.61991163065526 55.76069964607004,37.62104153253363 55.76076617915949,37.62154116237643 55.761393619993854,37.62091086223679 55.76195453049221,37.619780929173686 55.76188799089943)),((37.61954274677258 55.758883853670625,37.620173011406074 55.75832294068309,37.62130285092005 55.7583894607659,37.6218024569813 55.7590169003395,37.62117219542784 55.75957782258372,37.62004232473254 55.759511295997555)))
And what they look like on a map.
In the script itself geometry.getNumGeometries returns 2 which means it identified all polygons.
I thought it might be because of the getGeometryN(0) in the
points = List(
geometry
.getGeometryN(0)
.getCoordinates
.toList
.map(coord => new GeoCoord(coord.y, coord.x)): _*)
And tried to manualy concat getGeometryN(0) and getGeometryN(1) but got same hexagons
points = List(
geometry
.getGeometryN(0)
.getCoordinates
.toList
.map(coord => new GeoCoord(coord.y, coord.x)): _*) ++
List(
geometry
.getGeometryN(1)
.getCoordinates
.toList
.map(coord => new GeoCoord(coord.y, coord.x)): _*)
What am I doing wrong?
EDIT
If I replace point with this.
points = List(
geometry
.getGeometryN(0)
.getCoordinates
.toList
.map(coord => new GeoCoord(coord.y, coord.x)): _*) ++
List(
geometry
.getGeometryN(1)
.getCoordinates
.toList
.map(coord => new GeoCoord(coord.y, coord.x)): _*)
And holes with
holes = (2 until numGeometries).toList.map(n => {
List(
geometry
.getGeometryN(n)
.getCoordinates
.toList
.map(coord => new GeoCoord(coord.y, coord.x)): _*).asJava
})
I get 2 sets of hexagons. But what If don't know what my multipolygon looks like? I can't manualy set every property.

Related

How to convert SparseVector to MatrixEntry

this is my SparseVector:
mx.foreach(println)
SparseVector((0,1.0), (1,0.0), (2,0.0), (3,0.0), (4,0.0), (5,0.0), (6,0.0), (7,0.0), (8,0.0), (9,0.0), (10,0.0), (11,0.0), (12,0.0))
SparseVector((0,0.0), (1,1.0), (2,0.0), (3,0.0), (4,0.0), (5,0.0), (6,0.0), (7,0.0), (8,0.0), (9,0.0), (10,0.0), (11,0.0), (12,0.0))
SparseVector((0,0.0), (1,0.0), (2,1.0), (3,0.0), (4,0.0), (5,0.0), (6,0.0), (7,0.0), (8,0.0), (9,0.0), (10,0.0), (11,0.0), (12,0.0))
SparseVector((0,0.0), (1,0.0), (2,0.0), (3,1.0), (4,0.0), (5,0.0), (6,0.0), (7,0.0), (8,0.0), (9,0.0), (10,0.0), (11,0.0), (12,0.0))
i want convert to MatrixEntry?
EDIT: given that mx's type is RDD[SparseVector[Double]]:
val matrixEntries: RDD[MatrixEntry] = mx.zipWithIndex.flatMap {
case (vector, i) => vector.toArray.zipWithIndex.map {
case (v, j) => MatrixEntry(i, j, v)
}
}

How to create data frames from rdd of word's list

I have gone through all the answers of the stackoverflow and on internet but nothing works.so i have this rdd of list of words:
tweet_words=['tweet_text',
'RT',
'#ochocinco:',
'I',
'beat',
'them',
'all',
'for',
'10',
'straight',
'hours']
**What i have done till now:**
Df =sqlContext.createDataFrame(tweet_words,["tweet_text"])
and
tweet_words.toDF(['tweet_words'])
**ERROR**:
TypeError: Can not infer schema for type: <class 'str'>
Looking at the above code, you are trying to convert a list to a DataFrame. A good StackOverflow link on this is: https://stackoverflow.com/a/35009289/1100699.
Saying this, here's a working version of your code:
from pyspark.sql import Row
# Create RDD
tweet_wordsList = ['tweet_text', 'RT', '#ochocinco:', 'I', 'beat', 'them', 'all', 'for', '10', 'straight', 'hours']
tweet_wordsRDD = sc.parallelize(tweet_wordsList)
# Load each word and create row object
wordRDD = tweet_wordsRDD.map(lambda l: l.split(","))
tweetsRDD = wordRDD.map(lambda t: Row(tweets=t[0]))
# Infer schema (using reflection)
tweetsDF = tweetsRDD.toDF()
# show data
tweetsDF.show()
HTH!

OutOfMemoryError: Java heap space and memory variables in Spark

I have been trying to execute a scala program and the output somehow always seems to be something like this:
15/08/17 14:13:14 ERROR util.Utils: uncaught error in thread SparkListenerBus, stopping SparkContext
java.lang.OutOfMemoryError: Java heap space
at java.lang.AbstractStringBuilder.<init>(AbstractStringBuilder.java:64)
at java.lang.StringBuilder.<init>(StringBuilder.java:97)
at com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:339)
at com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2344)
at org.json4s.jackson.JsonMethods$class.compact(JsonMethods.scala:32)
at org.json4s.jackson.JsonMethods$.compact(JsonMethods.scala:44)
at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$1.apply(EventLoggingListener.scala:143)
at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$1.apply(EventLoggingListener.scala:143)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.EventLoggingListener.logEvent(EventLoggingListener.scala:143)
at org.apache.spark.scheduler.EventLoggingListener.onJobStart(EventLoggingListener.scala:169)
at org.apache.spark.scheduler.SparkListenerBus$class.onPostEvent(SparkListenerBus.scala:34)
at org.apache.spark.scheduler.LiveListenerBus.onPostEvent(LiveListenerBus.scala:31)
at org.apache.spark.scheduler.LiveListenerBus.onPostEvent(LiveListenerBus.scala:31)
at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:56)
at org.apache.spark.util.AsynchronousListenerBus.postToAll(AsynchronousListenerBus.scala:37)
at org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(AsynchronousListenerBus.scala:79)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1215)
at org.apache.spark.util.AsynchronousListenerBus$$anon$1.run(AsynchronousListenerBus.scala:63)
or like this
15/08/19 11:45:11 ERROR util.Utils: uncaught error in thread SparkListenerBus, stopping SparkContext
java.lang.OutOfMemoryError: GC overhead limit exceeded
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider$Impl.createInstance(DefaultSerializerProvider.java:526)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider$Impl.createInstance(DefaultSerializerProvider.java:505)
at com.fasterxml.jackson.databind.ObjectMapper._serializerProvider(ObjectMapper.java:2846)
at com.fasterxml.jackson.databind.ObjectMapper.writeValue(ObjectMapper.java:1902)
at com.fasterxml.jackson.core.base.GeneratorBase.writeObject(GeneratorBase.java:280)
at com.fasterxml.jackson.core.JsonGenerator.writeObjectField(JsonGenerator.java:1255)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:22)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:7)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:128)
at com.fasterxml.jackson.databind.ObjectMapper.writeValue(ObjectMapper.java:1902)
at com.fasterxml.jackson.core.base.GeneratorBase.writeObject(GeneratorBase.java:280)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:17)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:7)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:128)
at com.fasterxml.jackson.databind.ObjectMapper.writeValue(ObjectMapper.java:1902)
at com.fasterxml.jackson.core.base.GeneratorBase.writeObject(GeneratorBase.java:280)
at com.fasterxml.jackson.core.JsonGenerator.writeObjectField(JsonGenerator.java:1255)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:22)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:7)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:128)
at com.fasterxml.jackson.databind.ObjectMapper.writeValue(ObjectMapper.java:1902)
at com.fasterxml.jackson.core.base.GeneratorBase.writeObject(GeneratorBase.java:280)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:17)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:7)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:128)
at com.fasterxml.jackson.databind.ObjectMapper.writeValue(ObjectMapper.java:1902)
at com.fasterxml.jackson.core.base.GeneratorBase.writeObject(GeneratorBase.java:280)
at com.fasterxml.jackson.core.JsonGenerator.writeObjectField(JsonGenerator.java:1255)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:22)
at org.json4s.jackson.JValueSerializer.serialize(JValueSerializer.scala:7)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:128)
at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:2881)
Are these errors on the driver or executor side?
I am a bit confused with the memory variables that Spark uses. My current settings are
spark-env.sh
export SPARK_WORKER_MEMORY=6G
export SPARK_DRIVER_MEMORY=6G
export SPARK_EXECUTOR_MEMORY=4G
spark-defaults.conf
# spark.driver.memory 6G
# spark.executor.memory 4G
# spark.executor.extraJavaOptions ' -Xms5G -Xmx5G '
# spark.driver.extraJavaOptions ' -Xms5G -Xmx5G '
Do I need to uncomment any of the variables contained in spark-defaults.conf, or are they redundant?
Is for example setting SPARK_WORKER_MEMORY equivalent to setting the spark.executor.memory?
Part of my scala code where it stops after a few iterations:
val filteredNodesGroups = connCompGraph.vertices.map{ case(_, array) => array(pagerankIndex) }.distinct.collect
for (id <- filteredNodesGroups){
val clusterGraph = connCompGraph.subgraph(vpred = (_, attr) => attr(pagerankIndex) == id)
val pagerankGraph = clusterGraph.pageRank(0.15)
val completeClusterPagerankGraph = clusterGraph.outerJoinVertices(pagerankGraph.vertices) {
case (uid, attrList, Some(pr)) =>
attrList :+ ("inClusterPagerank:" + pr)
case (uid, attrList, None) =>
attrList :+ ""
}
val sortedClusterNodes = completeClusterPagerankGraph.vertices.toArray.sortBy(_._2(pagerankIndex + 1))
println(sortedClusterNodes(0)._2(1) + " with rank: " + sortedClusterNodes(0)._2(pagerankIndex + 1))
}
Many questions disguised as one. Thank you in advance!
I'm not a Spark expert, but there is line that seems suspicious to me :
val filteredNodesGroups = connCompGraph.vertices.map{ case(_, array) => array(pagerankIndex) }.distinct.collect
Basically, by using the collect method, you are getting back all the data from your executors (before even processing it) to the driver. Do you have any idea about the size of this data ?
In order to fix this, you should proceed in a more functional way. To extract the distinct values, you could for example use a groupBy and map :
val pairs = connCompGraph.vertices.map{ case(_, array) => array(pagerankIndex) }
pairs.groupBy(_./* the property to group on */)
.map { case (_, arrays) => /* map function */ }
Regarding the collect, there should be a way to sort each partition and then to return the (processed) result to the driver. I would like to help you more but I need more information about what you are trying to do.
UPDATE
After digging a little bit, you could sort your data using shuffling as described here
UPDATE
So far, I've tried to avoid the collect, and to get the data back to the driver as much as possible, but I've no idea how to solve this :
val filteredNodesGroups = connCompGraph.vertices.map{ case(_, array) => array(pagerankIndex) }.distinct()
val clusterGraphs = filteredNodesGroups.map { id => connCompGraph.subgraph(vpred = (_, attr) => attr(pagerankIndex) == id) }
val pageRankGraphs = clusterGraphs.map(_.pageRank(0.15))
Basically, you need to join two RDD[Graph[Array[String], String]], but I don't know what key to use, and secondly this would necessarily return an RDD of RDD (I don't know if you can even do that). I'll try to find something later this day.

scala sortWith: negative numbers are not getting sorted

val reslist = List(200.0,-100.00,50.80,-400.83, 800.003,-6.513214114672146E85, -1.2425461624057028E86, -4.7624471630469706E86, -3.6046499228286203E86, 0.0, -8.833653923554989E85, 0.000, -4.795843631190487E85, -5.34142100270833E86, -3.48087737474366E85, -2.811146396971388E86, -6.923235225460886E86, -6.513214114672146E85, 0.00000, -1.2425461624057028E86, -7.073704018243951E85, -9.633244016491059E86, -1.1418901590222212E86, -2.115257701350766E86, -1.1418901590222212E86, -3.48087737474366E85,-1.0676381955303372E86,500.56, 2.900556,400.56,-48956.00,4509.0005);
val weightlistzi = reslist.zipWithIndex
// List((200.0,0), (-100.0,1), (50.8,2), (-400.83,3), (800.003,4), (-6.513214114672146E85,5), (-1.2425461624057028E86,6), (-4.7624471630469706E86,7), (-3.6046499228286203E86,8), (0.0,9), (-8.833653923554989E85,10), (0.0,11), (-4.795843631190487E85,12), (-5.34142100270833E86,13), (-3.48087737474366E85,14), (-2.811146396971388E86,15), (-6.923235225460886E86,16), (-6.513214114672146E85,17), (0.0,18), (-1.2425461624057028E86,19), (-7.073704018243951E85,20), (-9.633244016491059E86,21), (-1.1418901590222212E86,22), (-2.115257701350766E86,23), (-1.1418901590222212E86,24), (-3.48087737474366E85,25), (-1.0676381955303372E86,26), (500.56,27), (2.900556,28), (400.56,29), (-48956.0,30), (4509.0005,31))
// I am sorting it here.
val resultlist = weightlistzi.sortWith { (x: (Double,Int), y: (Double,Int)) => x._1 > y._1 }
Here is the resulting list. As you can see, -100.0, -400.83, -48956.0 are occurring before -3 and the rest....
// List[(Double, Int)] = List((4509.0005,31), (800.003,4), (500.56,27), (400.56,29), (200.0,0), (50.8,2), (2.900556,28), (0.0,9), (0.0,11), (0.0,18), (-100.0,1), (-400.83,3), (-48956.0,30), (-3.48087737474366E85,14), (-3.48087737474366E85,25), (-4.795843631190487E85,12), (-6.513214114672146E85,5), (-6.513214114672146E85,17), (-7.073704018243951E85,20), (-8.833653923554989E85,10), (-1.0676381955303372E86,26), (-1.1418901590222212E86,22), (-1.1418901590222212E86,24), (-1.2425461624057028E86,6), (-1.2425461624057028E86,19), (-2.115257701350766E86,23), (-2.811146396971388E86,15), (-3.6046499228286203E86,8), (-4.7624471630469706E86,7), (-5.34142100270833E86,13), (-6.923235225460886E86,16), (-9.633244016491059E86,21))
There is E character in your numbers. This is Scientific notation.
-1.1E86 means -1.1 * (10^86). And -1.1 * (10^86) is less than -400.83 (-4.0083 * 10^2).

JFreeChart histogram with varying bin sizes

How would I go about creating a histogram plot with JFreeChart, where the bins are exponentially growing, e.g. having intervals [0, 0.1), [0.1, 0.2), [0.2, 0.4), [0.4, 0.8) etc.
There is a HistogramBin class but it only seems to be used by HistogramDataset, which in turn doesn't seem to support varying bin sizes.
I can use a regular XY bar chart with pre-binning. Here is an example using scala-chart and a few extra numeric operations:
import de.sciss.numbers.Implicits._
import scalax.chart.{ChartFactories, Charting}
import Charting._
def mkHisto(durations: Seq[Double]): Chart[_] = {
// ..., -0.54, -0.18, -0.06, +0.06, +0.18, +0.54, ....
def bin(dur: Double) =
((dur.abs / 0.06).log / 3.log + 1).toInt.clip(0, 10) * dur.signum
val binned = durations.map(bin).counted.toSeq.sortBy(_._1)
val ds = binned.toXYSeriesCollection()
val ch = ChartFactories.XYBarChart(ds,
domainAxisLabel = "bin", rangeAxisLabel = "frequency", legend = false)
ChartFactories.XYBarChart(ds)
}
implicit class RichIterable[A, CC[~] <: Iterable[~]](it: CC[A]) {
def counted: Map[A, Int] = (Map.empty[A, Int].withDefaultValue(0) /: it)((m, e) =>
m.updated(e, m(e) + 1))
}
Without further plot/renderer customisation, this looks like this, given particular input data:
Test data:
val x = Vector(22.23312925170068, 15.460136054421769, 10.453968253968254, 0.7362131519274376, 1.9141043083900227, -80.39263038548752, 5.153378684807256, 44.46625850340136, -0.2944897959183673, -192.94580498866213, 7.566507936507937, -295.0935827664399, -11.909251700680272, -16.433492063492064, -15.60591836734694, 23.64532879818594, -22.69950113378685, 108.23530612244897, 2.352925170068027, 27.058820861678004, -22.352925170068026, -4.705873015873016, -4.5397278911564625, -1.7646938775510204, 81.76471655328798, -6.470589569160998, 30.967732426303854, 13.76344671201814, 2.867392290249433, 26.379931972789116, -17.51825396825397, 23.746961451247167, -30.410430839002267, -13.722630385487529, -146.28009070294786, -130.3674603174603, 1.4976643990929706, 0.936031746031746, 2.3244444444444445, -61.95258503401361, -50.051859410430836, -47.48528344671202, -69.55430839002267, -119.96390022675737, -3.2929931972789115, -3.2929931972789115, -5.447936507936508, -18.498798185941045, -18.78934240362812, 18.498798185941045, 58.11138321995465, 33.510884353741496, 44.55206349206349, 30.02421768707483, -113.5108843537415, 27.40918367346939, -1.1934920634920634, -120.73696145124717, 5.617437641723356, 5.617437641723356, 3.8740816326530614, 3.8740816326530614, -126.38659863945578, 17.820839002267572, 18.5956462585034, -26.731224489795917, -16.920226757369615, -52.91142857142857, -24.26716553287982, 7.346938775510204, -11.354353741496599, -11.13172335600907, -1.7810657596371882, -0.7305215419501134, -117.56074829931973, -2.240249433106576, 0.12176870748299319, -113.44839002267574, -13.12498866213152, -86.2028798185941, -5.47891156462585, -2.240249433106576, -0.09741496598639456, -1.168843537414966, -10.227256235827664, -4.577936507936508, -51.70156462585034, -0.4870068027210884, -0.7548752834467121, -5.405827664399093, 6.185079365079365, -29.959365079365078, -1.6558503401360545, -7.183424036281179, -1.0714285714285714, -16.747414965986394, -2.897732426303855, -0.9740362811791383, -63.77263038548753, -8.098163265306123, -18.388820861678006, -6.699387755102041, 17.079750566893424, 33.644172335600906, -37.55328798185941, -6.772993197278912, 28.564421768707483, -3.7546031746031745, -14.47281179138322, -5.963197278911565, 26.282222222222224, 16.35233560090703, 11.564761904761905, 12.0, 9.015555555555556, -13.994920634920636, -4.041451247165533, -4.103628117913832, -9.994149659863945, -3.35750566893424, -0.7461224489795918, -0.6839455782312925, -21.39922902494331, -1.7409297052154196, -4.414512471655329, -2.4248526077097505, -5.4714965986394555, -32.42954648526077, -2.549206349206349, 11.626938775510204, -18.03108843537415, -7.150272108843537, 2.7357596371882087, -0.06217687074829932, 0.18653061224489795, -2.3005215419501135, -39.17097505668934, -0.3730612244897959, 0.6217687074829932, 1.181360544217687, 1.1191836734693879, 0.13990929705215419, 0.09326530612244897, 0.09326530612244897, 0.21759637188208616, 0.1554421768707483, 0.1554421768707483, 46.169160997732426, 1.1940136054421768, 60.49750566893424, 6.965192743764172, -5.572154195011338, 19.041315192743763, 109.88430839002268, 54.30657596371882, -5.252517006802721, 202.51149659863947, 4.96172335600907, -50.526326530612245, -1.1271655328798187, -10.404625850340135, -31.21387755102041, -1.7774603174603174, -3.8150340136054424, -4.638730158730159, -86.35839002267574, -14.132947845804988, 6.936417233560091, -107.85619047619048, 0.8880498866213152, -9.766190476190475, 3.8756009070294786, 65.26315192743765, -283.5050793650794, -0.5741723356009071, -0.1953061224489796, 0.5524943310657596, -37.47823129251701, -21.6, -6.4, -19.771428571428572, -100.0, -22.514285714285716, 18.742857142857144, -16.9443537414966, -2.0285714285714285, 13.057142857142857, -5.457142857142857, -15.342857142857143, 11.257142857142858, -15.480544217687076, -8.657142857142857, 1.7428571428571429, 3.914285714285714, 4.857142857142857, -8.485714285714286, 0.8857142857142857, -0.7714285714285715, -0.6857142857142857, 92.43299319727892, -5.503673469387755, -12.972993197278912, -8.255510204081633, -181.06972789115648, -180.77814058956915, -25.25798185941043, -25.25798185941043, -30.27027210884354, -31.449637188208616, -13.759229024943311, -11.007392290249433, -2.358730158730159, -1.2226757369614512, 9.106235827664399, -307.3968934240363, 2.0236054421768706, -10.522766439909297, -9.409773242630385, -8.600340136054422, -8.39798185941043, -7.48734693877551, 44.11467120181406, 66.37437641723356, 58.988185941043085, 92.58009070294784, 62.7318820861678, -82.63780045351474, -76.79596371882086, -65.26138321995465, -103.40639455782313, -139.21777777777777, 77.70657596371882, 93.18718820861677, 93.18716553287982, -31.044104308390022, 33.69308390022676, -32.37773242630386, 50.084331065759635, 48.971337868480724, -5.261383219954649, -19.189886621315193, 1.9224263038548752, 0.8094557823129251, -1.5177097505668935, -1.6188888888888888, -0.6078911564625851, 0.089297052154195, -21.281791383219954, -2.2843990929705216, -1.9088662131519274, 1.3155555555555556, -2.734308390022676, -26.930340136054422, 135.16766439909298, -112.22884353741496, 21.358548752834466, -31.16079365079365, -16.921768707482993, 30.02580498866213, -33.637142857142855, -21.874467120181407, -0.5675056689342404, 90.3869387755102, -35.49442176870748, -3.0954421768707485, 3.0954421768707485, -15.033968253968254, -0.6792290249433106, 0.9962131519274376, 14.671700680272108, -6.520748299319728, 0.8603854875283447, 4.709433106575964, 7.290566893424036, -25.17736961451247, -28.8, -3.079251700680272, -0.9962358276643991, 145.2679365079365, -113.0264172335601, 41.660385487528345, -1.0868027210884355, -119.54716553287982, 22.0981179138322, -279.7477551020408, -47.021519274376416, 3.227641723356009, 3.006802721088435, -0.9173242630385487, 1.2910430839002267, -153.88185941043085, 1.0074603174603174, 2.0149433106575962, -1.5671655328798186, 0.5596825396825397, -154.90068027210884, -34.02984126984127, -34.25371882086168, -41.082086167800455, -37.16417233560091, -0.11192743764172336, -37.879886621315194, -24.738798185941043, -2.4626757369614514, -19.25374149659864, -24.40299319727891, -6.380589569160998, -0.27984126984126984, -0.3078231292517007, -0.13990929705215419, -0.13990929705215419, -1.063424036281179, -1.7910430839002267, -0.12594104308390022, 0.30784580498866215, -0.09938775510204081, -0.10480725623582766, -0.10480725623582766, -39.196507936507935, 18.213061224489795, 13.785895691609978, -6.266326530612245, -0.8355102040816327, 14.830272108843538, -2.786077097505669, -9.43718820861678, 0.23512471655328798, 0.2821315192743764, -26.80249433106576, 0.721655328798186, -84.25963718820861, 27.050362811791384, 27.050362811791384)
Test:
mkHisto(x).show()
What remains to be done is customising x-axis labelling. The following addition to mkHisto changes colours and puts the proper x-axis labels:
import java.awt.Color
import org.jfree.chart.axis.{NumberTickUnit, NumberAxis}
import org.jfree.chart.plot.ValueMarker
import org.jfree.chart.renderer.xy.{StandardXYBarPainter, XYBarRenderer}
def lim(idx: Int) =
if (idx == 0) 0.0 else ((idx.abs - 1) * 3.log).exp * 0.06 * idx.signum
val plot = ch.plot
plot.getRenderer.asInstanceOf[XYBarRenderer].setBarPainter(new StandardXYBarPainter())
plot.addDomainMarker(new ValueMarker(0))
plot.setBackgroundPaint (Color.white )
plot.setDomainGridlinePaint (Color.lightGray)
plot.setRangeGridlinePaint (Color.lightGray)
plot.getRenderer.setSeriesPaint(0, Color.darkGray )
val xAxis = plot.getDomainAxis.asInstanceOf[NumberAxis]
xAxis.setTickUnit(new NumberTickUnit(1) {
override def valueToString(bin: Double) = {
val sig = if (bin < 0) "\u2212" else if (bin > 0) "+" else ""
f"""$sig${lim(bin.toInt).abs}%1.2f""""
}
})
xAxis.setVerticalTickLabels(true)
val yAxis = plot.getRangeAxis
yAxis.setStandardTickUnits(NumberAxis.createIntegerTickUnits())
Final result: