I am trying to have both a scatter plot and a barplot in leaflet. The datetable, the leaflet and the scatter plot work fine. The problem is
the barplot does not work when in leaflet we select some points in map as shown in the following figure. Why scatter plot works fine but bar plot does not?
How to solve this problem? Here is the R code:
#R code
library(leaflet)
library(crosstalk)
library(DT)
library(dplyr)
library(htmltools)
library(summarywidget)
library(plotly)
#devtools::install_github("jcheng5/d3scatter")
library(d3scatter)
data_2<-structure(list(ID = 1:8, Name1 = c("A", "A", "A", "C", "B", "B",
"A", "B"), Name2 = c("a", "b", "b", "a", "b", "a", "b", "c"),
Value1 = c(12, 43, 54, 34, 23, 77, 44, 22), Value2 = c(0,
1, 1, 0, 0, 0, 0, 2), Lat = c(51.1, 51.6, 57.3, 52.4, 56.3,
54.3, 60.4, 49.2), Lon = c(5, -3, -2, -1, 4, 3, -5, 0), lab_DB = c("blue",
"blue", "blue", "green", "red", "red", "blue", "red")), class = "data.frame", row.names = c(NA,-8L))
sdf <- SharedData$new(data_2, key=~ID)
lmap <- leaflet(data = sdf) %>% addTiles() %>%
addCircleMarkers(data = sdf,
lng = ~Lon,
lat = ~Lat,
group = ~Name1 ,color = ~lab_DB
,radius =3
)
dtable <- datatable(sdf , width = "100%",editable=TRUE)
ggplt<-ggplot(sdf, aes(x=factor(Value2)))+
geom_bar(stat="count", width=0.7, fill="steelblue")
d3<-d3scatter(sdf , x=~Value1 ,y=~Value2, width="100%", height=300)
bscols( widths=c(6,6,0), list(lmap, d3),list(dtable,ggplotly(ggplt)))
The below code shows the counts of #0, #1 and #2 for "value2" calculated correctly! (showed in the caption of datatable) but something wrongs with barplot!!
#R code
library(leaflet)
library(crosstalk)
library(DT)
library(dplyr)
library(htmltools)
library(summarywidget)
library(plotly)
#devtools::install_github("jcheng5/d3scatter")
library(d3scatter)
data_2<-structure(list(ID = 1:8, Name1 = c("A", "A", "A", "C", "B", "B",
"A", "B"), Name2 = c("a", "b", "b", "a", "b", "a", "b", "c"),
Value1 = c(12, 43, 54, 34, 23, 77, 44, 22), Value2 = c(0,
1, 1, 0, 0, 0, 0, 2), Lat = c(51.1, 51.6, 57.3, 52.4, 56.3,
54.3, 60.4, 49.2), Lon = c(5, -3, -2, -1, 4, 3, -5, 0), lab_DB = c("blue",
"blue", "blue", "green", "red", "red", "blue", "red")), class = "data.frame", row.names = c(NA,-8L))
sdf <- SharedData$new(data_2, key=~ID)
lmap <- leaflet(data = sdf) %>% addTiles() %>%
addCircleMarkers(data = sdf,
lng = ~Lon,
lat = ~Lat,
group = ~Name1 ,color = ~lab_DB
,radius =3
)
ggplt<-ggplotly(sdf %>% ggplot( aes(x=factor(Value2)))+
geom_bar(stat="count", width=0.7, fill="steelblue"))
d3<-d3scatter(sdf , x=~Value1 ,y=~Value2, width="100%", height=300)
dtable <- datatable(sdf , width = "100%",editable=TRUE,
caption=tags$caption("Value2: #0: ",summarywidget(sdf , selection=~Value2==0)
," Value2: #1: ",summarywidget(sdf , selection=~Value2==1)
," Value2: #1: ",summarywidget(sdf , selection=~Value2==2)
))
bscols( list(lmap, dtable),list(d3,ggplt), htmltools::p(summarywidget(sdf , selection=~Value2==0,column="Value2")
,summarywidget(sdf , selection=~Value2==1,column="Value2")
,summarywidget(sdf , selection=~Value2==2,column="Value2")
, style="display:none;"))
Here is a solution with shiny. Again I use a callback function with your datatable to subset the shared data sdf so you can click the column you are interested in and display a bar chart:
library(shiny)
library(leaflet)
library(crosstalk)
library(DT)
library(dplyr)
library(htmltools)
library(summarywidget)
library(plotly)
library(d3scatter)
data_2 <- structure(
list(ID = 1:8,
Name1 = c("A", "A", "A", "C", "B", "B", "A", "B"),
Name2 = c("a", "b", "b", "a", "b", "a", "b", "c"),
Value1 = c(12, 43, 54, 34, 23, 77, 44, 22),
Value2 = c(0, 1, 1, 0, 0, 0, 0, 2),
Lat = c(51.1, 51.6, 57.3, 52.4, 56.3, 54.3, 60.4, 49.2),
Lon = c(5, -3, -2, -1, 4, 3, -5, 0),
lab_DB = c("blue", "blue", "blue", "green", "red", "red", "blue", "red")),
class = "data.frame",
row.names = c(NA,-8L))
ui <- fluidPage(
fluidRow(
column(6, leafletOutput("lmap")),
column(6, d3scatterOutput("scatter"))
),
fluidRow(
column(6, DTOutput("table")),
column(6,
style = "padding-top: 105px;",
plotlyOutput("plot"))
)
)
server <- function(input, output) {
sdf <- SharedData$new(data_2, key=~ID)
output$lmap <- renderLeaflet({
leaflet(data = sdf) %>%
addTiles() %>%
addCircleMarkers(data = sdf,
lng = ~Lon,
lat = ~Lat,
group = ~Name1 ,color = ~lab_DB,
radius =3)
})
output$scatter <- renderD3scatter({
d3scatter(sdf,
x = ~Value1 ,
y = ~Value2,
width = "100%",
height=300)
})
output$table <- renderDT({
datatable(
sdf,
filter = 'top',
editable=TRUE,
extensions = c('Select', 'Buttons'),
selection = 'none',
options = list(select = list(style = 'os',
items = 'row'),
dom = 'Bfrtip',
autoWidth = TRUE,
buttons = list('copy' ,
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf', 'print'),
text = 'Download'))),
caption = tags$caption("Value2: #0: ",
summarywidget(sdf, selection = ~Value2 == 0),
" Value2: #1: ", summarywidget(sdf, selection = ~Value2 == 1),
" Value2: #2: ", summarywidget(sdf, selection = ~Value2 == 2)),
# This part is new: callback to get col number as `input$col`
callback = JS("table.on('click.dt', 'td', function() {
var col=table.cell(this).index().column;
var data = [col];
Shiny.onInputChange('col',data );
});")
)
},
server = FALSE)
# plotly bar chart
output$plot <- renderPlotly({
req(input$col)
dat <- sdf$data(withSelection = TRUE) %>%
filter(selected_ == TRUE) %>%
pull(input$col) %>%
table()
fig <- plot_ly(
x = names(dat),
y = dat,
name = "Count",
type = "bar"
)
fig
})
}
shinyApp(ui, server)
If you are only interested in column Value2 then the approach below works as well:
library(shiny)
library(leaflet)
library(crosstalk)
library(DT)
library(dplyr)
library(htmltools)
library(summarywidget)
library(plotly)
library(d3scatter)
data_2 <- structure(
list(ID = 1:8,
Name1 = c("A", "A", "A", "C", "B", "B", "A", "B"),
Name2 = c("a", "b", "b", "a", "b", "a", "b", "c"),
Value1 = c(12, 43, 54, 34, 23, 77, 44, 22),
Value2 = c(0, 1, 1, 0, 0, 0, 0, 2),
Lat = c(51.1, 51.6, 57.3, 52.4, 56.3, 54.3, 60.4, 49.2),
Lon = c(5, -3, -2, -1, 4, 3, -5, 0),
lab_DB = c("blue", "blue", "blue", "green", "red", "red", "blue", "red")),
class = "data.frame",
row.names = c(NA,-8L))
ui <- fluidPage(
fluidRow(
column(6, leafletOutput("lmap")),
column(6, d3scatterOutput("scatter"))
),
fluidRow(
column(6, DTOutput("table")),
column(6,
style = "padding-top: 105px;",
plotlyOutput("plot"))
)
)
server <- function(input, output) {
sdf <- SharedData$new(data_2, key=~ID)
output$lmap <- renderLeaflet({
leaflet(data = sdf) %>%
addTiles() %>%
addCircleMarkers(data = sdf,
lng = ~Lon,
lat = ~Lat,
group = ~Name1 ,color = ~lab_DB,
radius =3)
})
output$scatter <- renderD3scatter({
d3scatter(sdf,
x = ~Value1 ,
y = ~Value2,
width = "100%",
height=300)
})
output$table <- renderDT({
datatable(
sdf,
filter = 'top',
editable=TRUE,
extensions = c('Select', 'Buttons'),
selection = 'none',
options = list(select = list(style = 'os',
items = 'row'),
dom = 'Bfrtip',
autoWidth = TRUE,
buttons = list('copy' ,
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf', 'print'),
text = 'Download'))),
caption = tags$caption("Value2: #0: ",
summarywidget(sdf, selection = ~Value2 == 0),
" Value2: #1: ", summarywidget(sdf, selection = ~Value2 == 1),
" Value2: #2: ", summarywidget(sdf, selection = ~Value2 == 2))
)
},
server = FALSE)
# plotly bar chart
output$plot <- renderPlotly({
dat <- sdf$data(withSelection = TRUE) %>% filter(selected_ == TRUE)
p <- ggplot(data = dat,
aes(x=factor(Value2))) +
geom_bar(stat="count", width=0.7, fill="steelblue")
ggplotly(p)
})
}
shinyApp(ui, server)
I'm trying to leverage expss to automate some reporting currently done in Excel via R. I'm generally needing to summarise a lot of values across some grouping (rows) relative to some fields (columns). I'm finding it difficult to get rid of the cell description.
Here's an example:
animals <- data.table(
animal = c(1, 1, 2, 2, 3, 3, 4, 4),
standing = c(1, 2, 1, 2, 1, 2, 1 ,2),
height = c(50, 70, 75, 105, 25, 55, 10, 20)
)
animals <- expss::apply_labels(
animals,
animal = "animal",
animal = c("cat" = 1, "dog" = 2, "turtle" = 3, "rat" = 4),
standing = "standing",
standing = c("no" = 1, "yes" = 2),
height = "height"
)
expss::expss_output_viewer()
animals %>%
expss::tab_cells(height) %>%
expss::tab_cols(animal) %>%
expss::tab_rows(standing) %>%
expss::tab_stat_sum(label = "") %>%
expss::tab_pivot()
You will see that "height" is printed as a label, how do I get rid of it please?
Thanks!
"|" assigned as label suppress both label and variable name:
library(expss)
animals <- data.table(
animal = c(1, 1, 2, 2, 3, 3, 4, 4),
standing = c(1, 2, 1, 2, 1, 2, 1 ,2),
height = c(50, 70, 75, 105, 25, 55, 10, 20)
)
animals <- expss::apply_labels(
animals,
animal = "animal",
animal = c("cat" = 1, "dog" = 2, "turtle" = 3, "rat" = 4),
standing = "standing",
standing = c("no" = 1, "yes" = 2),
height = "|" # to suppress label
)
expss::expss_output_viewer()
animals %>%
expss::tab_cells(height) %>%
expss::tab_cols(animal) %>%
expss::tab_rows(standing) %>%
expss::tab_stat_sum(label = "") %>%
expss::tab_pivot()
I'm new in Scala and Spark and i don't know how to do this.
I have preprocessed a CSV file, resulting in an RDD that contains lists with this format:
List("2014-01-01T23:56:06.0", NaN, 1, NaN)
List("2014-01-01T23:56:06.0", NaN, NaN, 2)
All lists have the same number of elements.
What I want to do is to combine the lists having the same first element (the timestamp). For example, I want this two example lists to produce only one List, with the following values:
List("2014-01-01T23:56:06.0", NaN, 1, 2)
Thanks for your help :)
# Below can help you in achieving your target
val input_rdd1 = spark.sparkContext.parallelize(List(("2014-01-01T23:56:06.0", "NaN", "1", "NaN")))
val input_rdd2 = spark.sparkContext.parallelize(List(("2014-01-01T23:56:06.0", "NaN", "NaN", "2")))
//added one more row for your data
val input_rdd3 = spark.sparkContext.parallelize(List(("2014-01-01T23:56:06.0", "2", "NaN", "NaN")))
val input_df1 = input_rdd1.toDF("col1", "col2", "col3", "col4")
val input_df2 = input_rdd2.toDF("col1", "col2", "col3", "col4")
val input_df3 = input_rdd3.toDF("col1", "col2", "col3", "col4")
val output_df = input_df1.union(input_df2).union(input_df3).groupBy($"col1").agg(min($"col2").as("col2"), min($"col3").as("col3"), min($"col4").as("col4"))
output_df.show
output:
+--------------------+----+----+----+
| col1|col2|col3|col4|
+--------------------+----+----+----+
|2014-01-01T23:56:...| 2| 1| 2|
+--------------------+----+----+----+
If array tail values are doubles, can be implemented in this way (as sachav suggests):
val original = sparkContext.parallelize(
Seq(
List("2014-01-01T23:56:06.0", NaN, 1.0, NaN),
List("2014-01-01T23:56:06.0", NaN, NaN, 2.0)
)
)
val result = original
.map(v => v.head -> v.tail)
.reduceByKey(
(acc, curr) => acc.zip(curr).map({ case (left, right) => if (left.asInstanceOf[Double].isNaN) right else left }))
.map(v => v._1 :: v._2)
result.foreach(println)
Output is:
List(2014-01-01T23:56:06.0, NaN, 1.0, 2.0)