plotly mapbox - create clusters in mapview - mapbox

I am building Dash App that uses plotly scattermapbox graph object. In the current map view each point is represented as a circle. As a user zooms-in and out, I'd like to cluster the points and create groupings. Here's my code for reference.
import dash
from dash import dcc
import pandas as pd
df = pd.DataFrame({
'x': [1, 2, 3],
'Lat': [37.774322, 37.777035, 37.773033],
'Long': [-122.489761, -122.485555, -122.491220]
})
layout = html.Div(
dcc.Graph(id="map"),
dcc.Input(id="inp")
)
#app.callback(
Output('map','figure'),
Input('inp','value')
)
def fin(val):
# do something
data = []
data.append({
"type": "scattermapbox",
"lat": df["Lat"],
"lon": df["Long"],
"name": "Location",
"showlegend": False,
"hoverinfo": "text",
"mode": "markers",
"clickmode": "event+select",
"customdata": df.loc[:,cd_cols].values,
"marker": {
"symbol": "circle",
"size": 8,
"opacity": 0.7,
"color": "black"
}
}
)
layout = {
"autosize": True,
"hovermode": "closest",
"mapbox": {
"accesstoken": MAPBOX_KEY,
"bearing": 0,
"center": {
"lat": xxx,
"lon": xxx
},
"pitch": 0,
"zoom": zoom,
"style": "satellite-streets",
},
}
return ({'data': data, 'layout': layout})

try using plotly.graph_objects.scattermapbox.Cluster. Hope this helps:
from dash import dcc, html, Dash, Output, Input
import pandas as pd
import plotly.graph_objects as go
app = Dash(__name__)
df = pd.DataFrame({
'x': [1, 2, 3],
'Lat': [37.774322, 37.777035, 37.773033],
'Long': [-122.489761, -122.485555, -122.491220]
})
#app.callback(
Output('map','figure'),
Input('inp','value')
)
def fin(val):
data = []
data.append({
"type": "scattermapbox",
"lat": df["Lat"],
"lon": df["Long"],
"name": "Location",
"showlegend": False,
"hoverinfo": "text",
"mode": "markers",
"clickmode": "event+select",
"customdata": df.loc[:,['Lat', 'Long']].values,
"marker": {
"symbol": "circle",
"size": 8,
"opacity": 0.7,
"color": "black"
},
"cluster": {'maxzoom': 14}
}
)
layout = {
"autosize": True,
"hovermode": "closest",
"mapbox": {
"bearing": 0,
"center": {
"lat": 37.774322,
"lon": -122.489761
},
"pitch": 0,
"zoom": 7,
"style": "open-street-map",
},
}
return ({'data': data, 'layout': layout})
app.layout = html.Div(
[dcc.Graph(id="map"),
dcc.Input(id="inp")]
)
if __name__ == '__main__':
app.run_server(debug=True)
Notice the added cluster parameters I added to data.
p.s - make sure you are using a new version of dash for this to work. I used the latest version - dash-2.7.1.

Related

AWS Glue pySpark Filter & Manual Mapping of Several Columns

I'm using AWS Glue Studio with DynamicFrameCollections. I created a custom transformation where I am looking to filter by multiple columns and modify 2 column in the row based off a static mapping list. I'm struggling to figure out what the most efficient way to do this - pandas, udfs, or something completely different?
Consider the sample dataframe:
data = [{"Category": 'A', "Subcategory": 2, "Value": 121.44, "Properties": {}},
{"Category": 'B', "Subcategory": 2, "Value": 300.01, "Properties": None},
{"Category": 'C', "Subcategory": 3, "Value": 10.99, "Properties": { "Active":True } },
{"Category": 'E', "Subcategory": 4, "Value": 33.87, "Properties": { "Active":True, "ReadOnly": False }},
{"Category": 'E', "Subcategory": 1, "Value": 11.37, "Properties": { "Active":True }}
]
df = spark.createDataFrame(data)
I need to filter and transform by Category and Subcategory. Below is the sample mapping with the key as the category and subcategory merged while the first value in the array must be created as a new column ActivityName and the second values must be merged with the Properties column:
mapping= {"A2": ["EatingFood", { "Visible": True }],
"A3": ["DrinkingWater", { "Visible": False }],
"B2": ["Sleep", { "Visible": False }],
"C3": ["Exercise", { "Visible": False }],
"E4": ["Running", { "Visible": False }],
}
The output data I am expecting is:
resultingData = [{"Category": 'A', "Subcategory": 2, "ActivityName":"EatingFood", "Value": 121.44, "Properties": { "Visible": True }},
{"Category": 'B', "Subcategory": 2, "ActivityName":"Sleep", "Value": 300.01, "Properties": {"Visible": False}},
{"Category": 'C', "Subcategory": 3, "ActivityName":"Exercise", "Value": 10.99, "Properties": { "Active":True, "Visible": False } },
{"Category": 'E', "Subcategory": 4, "ActivityName":"Running", "Value": 33.87, "Properties": { "Active":True, "ReadOnly": False, "Visible": False }}
]
Note that the last data entry, E1 is missing because it was not in my mapping filter.
Is there any way to achieve this? I have a large list of items that I need to manually filter/map/transform like this. Thank you.
I got this working by transforming the dynamicframe into a dataframe and processing it using glue functions. Here's what I did:
def FilterAndMap (glueContext, dfc) -> DynamicFrameCollection:
from pyspark.sql.types import StringType, ArrayType
from awsglue.dynamicframe import DynamicFrame
import pyspark.sql.functions as f
import json
mapping= {"A2": ["EatingFood", json.dumps({ "Visible": True })],
"A3": ["DrinkingWater", json.dumps({ "Visible": False })],
"B2": ["Sleep", json.dumps({ "Visible": False })],
"C3": ["Exercise", json.dumps({ "Visible": False })],
"E4": ["Running", json.dumps({ "Visible": False })],
}
df = dfc.select(list(dfc.keys())[0]).toDF()
def func_filter_udf(concat_str):
return mapping[concat_str]
def func_map_udf(map_str):
if map_str[1]:
map_string = json.loads(map_str[0])
ret_val = json.dumps({**map_string, **json.loads(map_str[1])})
else:
ret_val = map_str[0]
return ret_val
filter_udf = f.udf(func_filter_udf, ArrayType(StringType()))
map_udf = f.udf(func_map_udf, StringType())
df = df.filter(f.concat("Category", "Subcategory").isin([*mapping]))
df = df.withColumn("concat_col", filter_udf(f.concat("Category", "Subcategory")))
df = (df.withColumn("ActivityName", df.concat_col[0]).
withColumn("Properties", map_udf(f.struct(df.concat_col[1], df.Properties))))
df = df.drop("concat_col")
dyf_processed = DynamicFrame.fromDF(df, glueContext, "filtered")
return(DynamicFrameCollection({"filtered": dyf_processed }, glueContext))

Remove empty space from Streamlit Echarts

I am rendering a gauge component in the following way, within my Streamlit app:
option = {
"series": [
{
"type": "gauge",
"startAngle": 180,
"endAngle": 0,
"min": min_range_val,
"max": max_range_val,
"center": ["40%", "40%"],
"splitNumber": 5,
"axisLine": {
"lineStyle": {
"width": 6,
"color": [
[0.25, "#FF403F"],
[0.5, "#ffa500"],
[0.75, "#FDDD60"],
[1, "#64C88A"],
],
}
},
"pointer": {
"icon": "path://M12.8,0.7l12,40.1H0.7L12.8,0.7z",
"length": "12%",
"width": 30,
"offsetCenter": [0, "-60%"],
"itemStyle": {"color": "auto"},
},
"axisTick": {"length": 10, "lineStyle": {"color": "auto", "width": 2}},
"splitLine": {"length": 15, "lineStyle": {"color": "auto", "width": 5}},
"axisLabel": {
"color": "#464646",
"fontSize": 12,
"distance": -60,
},
"title": {"offsetCenter": [0, "-20%"], "fontSize": 20},
"detail": {
"fontSize": 30,
"offsetCenter": [0, "0%"],
"valueAnimation": True,
"color": "auto",
"formatter": "{value}%",
},
"data": [{"value": value, "name": caption}],
}
]
}
st_echarts(option, width="450px", height="350px", key="gauge")
However, it seems like an additional empty extra white space is added at the bottom of the component (as from the following image).
How can I effectively remove that and keep only a tiny margin all around the gauge?
The following parameters must be added:
radius: '120%',
center: ['50%', '80%']
The latter one should be adjusted according to specific use cases.

How to get the GitHub data from various URLs and stored in the single mongoDB

I'm trying to get the GitHub data using Talend big data. The thing is, i have multiple URLs,because used each URL to take some values & stored into single mongoDB. The below order only i'm going to try & get the informations,
https://api.github.com/users/sample/repos
https://api.github.com/repos/sample/awesome-ciandcd/commits
https://api.github.com/repos/sample/awesome-ciandcd/contributors
Each URLs are giving the single JSONArray with multiple data format.Please give some suggestion to do this. I've already tried with sub-jobs component. But not get clear job.
My Output Should be like,
{
"gitname": "sample",
"gitType" : "User",
"password": "password",
"repoCount": 3,
"repositoryDetails": [
{
"repoName": "MergeCheckRepository",
"fileCount": 10,
"branchCount": 6,
"releaseCount": 2,
"commitsCount": 10,
"contributorsCount": 3,
"totalPulls": 1,
"mergeCount": 1,
"totalIssues": 12,
"closedIssueCount": 3,
"watchersCount": 1,
"stargazersCount": 4,
"contributorsDetails": [
{
"login": "sample",
"avatarURL": "https://avatars2.githubusercontent.com/u/30261572?v=4",
"contributions": 3
}
],
"commitDetails": [
{
"name": "sample",
"email": "sampletest#test.com",
"date": "2017-07-20T09:09:09Z"
}
]
},
{
"repoName": "Dashboard",
"filecount": 19,
"branchCount": 4,
"releasecount": 2,
"commitsCount": 5,
"contributorsCount": 3,
"totalPulls": 1,
"totalIssues": 2,
"closedIssueCount": 3,
"watchersCount": 1,
"stargazersCount": 4,
"contributorsDetails": [
{
"login": "sample",
"avatarURL": "https://avatars2.githubusercontent.com/u/30261572?v=4",
"contributions": 3
},
{
"login": "Dashboard",
"avatarURL": "https://avatars2.githubusercontent.com/u/30261572?v=4",
"contributions": 3
}
],
"commitDetails": [
{
"name": "sample",
"email": "sampletest#test.com",
"date": "2017-07-14T09:09:09Z"
},
{
"name": "Dashboard",
"email": "prakash.thangasamy#test.com",
"date": "2017-07-19T09:09:09Z"
},
{
"name": "testrepo",
"email": "test.dashboard#test.com",
"date": "2017-07-20T09:09:09Z"
}
]
}
]
}
How to achieve this one with sub-job? Is there any other way to do this?

Color only the edge of a circle mapbox gl js

I want to show the outline of a circle on an interactive map (no fill) however, the paint options in mapbox-gl-js seem limited to fill only.
https://www.mapbox.com/mapbox-gl-style-spec/#layers-circle
var styles = [{
"id": 'points',
"interactive": true,
"type": "circle",
"source": "geojson",
"paint": {
"circle-radius": 5,
"circle-color": "#000
},
"filter": ["in", "$type", "Point"]
}, {
"type": "line",
"source": "geojson",
"layout": {
"line-cap": "round",
"line-join": "round"
},
"paint": {
"line-color": "#000",
"line-width": 2.5
},
"filter": ["in", "$type", "LineString"]
}];
Am i missing something or is this just not possible?
This is now possible, with circle-opacity.
E.g.:
"paint": {
"circle-opacity": 0,
"circle-stroke-width": 1,
"circle-stroke-color": #000
}
Not currently possible. Current top workaround appears to be layering two circles of slightly different sizes.
https://github.com/mapbox/mapbox-gl-js/issues/1713
https://github.com/mapbox/mapbox-gl-style-spec/issues/379
I'm having trouble running custom color 'match' and having opacity controls running simultaneously.
I can get both working, but not at the same time. See code below.
var coorAddresses = [ [ -75.7040473, 45.418067,"Medium" ], [-75.7040473, 45.418067, "Medium"], [-79.32930440000001, 43.7730495, "Unknown"]]
$.getJSON(coodAddresses, function(data) {
for(var itemIndex in data) {
// push new feature to the collection
featureCollection.push({
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [data[itemIndex][0], data[itemIndex][1]]
},
"properties": {
"size_by": data[itemIndex][2],
"color_by": data[itemIndex][2]
},
});
}
});
map.on('load', function () {
map.addLayer({
"id": "points",
"type": "circle",
"source": {
"type": "geojson",
"data": {
"type": "FeatureCollection",
"features": featureCollection
}
},
"paint": {
"circle-color": [
'match',
['get', 'size_by'],
'Easy',
'#e4f400',
'Medium',
'#f48a00',
'Unknown',
'#6af400',
/* other */ '#00e4f4'
],
"circle-radius": [
'match',
['get', 'size_by'],
'Easy',
4,
'Medium',
7,
'Unknown',
2,
/* other */ 1000
],
// "circle-opacity": 0, // color does not show if i uncomment these lines
// "circle-stroke-width": 1, // do not get desired 'hollow' circle unless these lines run
}});
Trying to troubleshoot.

Fields are empty when doing GET in elastic4s

I'm trying to implement a service in my play2 app that uses elastic4s to get a document by Id.
My document in elasticsearch:
curl -XGET 'http://localhost:9200/test/venues/3659653'
{
"_index": "test",
"_type": "venues",
"_id": "3659653",
"_version": 1,
"found": true,
"_source": {
"id": 3659653,
"name": "Salong Anna och Jag",
"description": "",
"telephoneNumber": "0811111",
"postalCode": "16440",
"streetAddress": "Kistagången 12",
"city": "Kista",
"lastReview": null,
"location": {
"lat": 59.4045675,
"lon": 17.9502138
},
"pictures": [],
"employees": [],
"reviews": [],
"strongTags": [
"skönhet ",
"skönhet ",
"skönhetssalong"
],
"weakTags": [
"Frisörsalong",
"Frisörer"
],
"reviewCount": 0,
"averageGrade": 0,
"roundedGrade": 0,
"recoScore": 0
}
}
My Service:
#Singleton
class VenueSearchService extends ElasticSearchService[IndexableVenue] {
/**
* Elastic search conf
*/
override def path = "test/venues"
def getVenue(companyId: String) = {
val resp = client.execute(
get id companyId from path
).map { response =>
// transform response to IndexableVenue
response
}
resp
}
If I use getFields() on the response object I get an empty object. But if I call response.getSourceAsString I get the document as json:
{
"id": 3659653,
"name": "Salong Anna och Jag ",
"description": "",
"telephoneNumber": "0811111",
"postalCode": "16440",
"streetAddress": "Kistagången 12",
"city": "Kista",
"lastReview": null,
"location": {
"lat": 59.4045675,
"lon": 17.9502138
},
"pictures": [],
"employees": [],
"reviews": [],
"strongTags": [
"skönhet ",
"skönhet ",
"skönhetssalong"
],
"weakTags": [
"Frisörsalong",
"Frisörer"
],
"reviewCount": 0,
"averageGrade": 0,
"roundedGrade": 0,
"recoScore": 0
}
As you can se the get request omits info:
"_index": "test",
"_type": "venues",
"_id": "3659653",
"_version": 1,
"found": true,
"_source": {}
If I try to do a regular search:
def getVenue(companyId: String) = {
val resp = client.execute(
search in "test"->"venues" query s"id:${companyId}"
//get id companyId from path
).map { response =>
Logger.info("response: "+response.toString)
}
resp
}
I get:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "venues",
"_id": "3659653",
"_score": 1,
"_source": {
"id": 3659653,
"name": "Salong Anna och Jag ",
"description": "",
"telephoneNumber": "0811111",
"postalCode": "16440",
"streetAddress": "Kistagången 12",
"city": "Kista",
"lastReview": null,
"location": {
"lat": 59.4045675,
"lon": 17.9502138
},
"pictures": [],
"employees": [],
"reviews": [],
"strongTags": [
"skönhet ",
"skönhet ",
"skönhetssalong"
],
"weakTags": [
"Frisörsalong",
"Frisörer"
],
"reviewCount": 0,
"averageGrade": 0,
"roundedGrade": 0,
"recoScore": 0
}
}
]
}
}
My Index Service:
trait ElasticIndexService [T <: ElasticDocument] {
val clientProvider: ElasticClientProvider
def path: String
def indexInto[T](document: T, id: String)(implicit writes: Writes[T]) : Future[IndexResponse] = {
Logger.debug(s"indexing into $path document: $document")
clientProvider.getClient.execute {
index into path doc JsonSource(document) id id
}
}
}
case class JsonSource[T](document: T)(implicit writes: Writes[T]) extends DocumentSource {
def json: String = {
val js = Json.toJson(document)
Json.stringify(js)
}
}
and indexing:
#Singleton
class VenueIndexService #Inject()(
stuff...) extends ElasticIndexService[IndexableVenue] {
def indexVenue(indexableVenue: IndexableVenue) = {
indexInto(indexableVenue, s"${indexableVenue.id.get}")
}
Why is getFields empty when doing get?
Why is query info left out when doing getSourceAsString in a get request?
Thank you!
What you're hitting in question 1 is that you're not specifying which fields to return. By default ES will return the source and not fields (other than type and _id). See http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-fields.html
I've added a test to elastic4s to show how to retrieve fields, see:
https://github.com/sksamuel/elastic4s/blob/master/src%2Ftest%2Fscala%2Fcom%2Fsksamuel%2Felastic4s%2FSearchTest.scala
I am not sure on question 2.
The fields are empty because elasticsearch don't return it.
If you need fields, you must indicate in query what field you need:
this is you search query without field:
search in "test"->"venues" query s"id:${companyId}"
and in this query we indicate which field we want to, in this case 'name' and 'description':
search in "test"->"venues" fields ("name","description") query s"id:${companyId}"
now you can retrieve the fields:
for(x <- response.getHits.hits())
{
println(x.getFields.get("name").getValue)
You found a getSourceAsString in a get request because the parameter _source is to default 'on' and fields is to default 'off'.
I hope this will help you