mutlievel data simulation using simglm: how to i simulate random effects at level 1 - simulation

I want to add random effects at level 1. Below is the working code with level 2 simulated.
I have two questions i hope folks can help with
How do i get a reasonable estimate of level 2 variance (assuming i have sample data). Can i just square the between person SD on the dv?
how do simulate level 1 variance and how do i determine a reasonable value at level 1.
I've tried: randomeffect = list(int_neighborhood = list(variance = 8, var_level = 2),
weight= list(variance = 8, var_level = 1 ))
but that kicks an error
This code works without level 1
ctrl <- lmeControl(opt='optim');
sim_arguments <- list(
formula = y ~ 1 + weight + age + sex + (1 | neighborhood),
reg_weights = c(4, -0.03, 0.2, 0.33),
fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
age = list(var_type = 'ordinal', levels = 30:60),
sex = list(var_type = 'factor', levels = c('male', 'female'))),
randomeffect = list(int_neighborhood = list(variance = 8, var_level = 2)),
sample_size = list(level1 = 62, level2 = 60)
)
nested_data <- sim_arguments %>%
simulate_fixed(data = NULL, .) %>%
simulate_randomeffect(sim_arguments) %>%
simulate_error(sim_arguments) %>%
generate_response(sim_arguments)
RandomIntercept <- lme(fixed= y ~1 + weight + age + sex ,
random= ~ 1 | neighborhood,
correlation = corAR1(),
data=nested_data,
control=ctrl,
na.action=na.exclude)
summary(RandomIntercept)
RandomSlope <-lme(fixed= y ~1 + weight + age + sex ,
random= ~ 1 +weight| neighborhood,
correlation = corAR1(),
data=nested_data,
control=ctrl,
na.action=na.exclude)
summary(RandomSlope)
anova(RandomIntercept,RandomSlope)

Related

summary row with gtsummary

I am trying to create a table of events with gtsummary and I would like to obtain a final row counting the events of the previous rows. add_overall() and add_n() do add the total but in a column, counting the same event across groups but not the overall events.
I created this example.
x1 <- sample(c("No", "Yes"), 30, replace = TRUE, prob = c(0.85, 0.15))
x2 <- sample(c("No", "Yes"), 30, replace = TRUE, prob = c(0.9, 0.1))
x3 <- sample(c("No", "Yes"), 30, replace = TRUE, prob = c(0.75, 0.25))
y <- sample(c("A", "B"), 30, replace = TRUE, prob = c(0.5, 0.5))
df <- data.frame(as_factor(x1), as_factor(x2), as_factor(x3), as_factor(y))
colnames(df) <-c("event_1", "event_2", "event_3", "group")
tbl_summary(df, by=group, statistic = all_categorical() ~ "{n}")
example
I tried using summary_rows() function from gt package after converting the table to a gt object but there is an error when summarising because these variables are factors.
Any other ideas?
You can do this by adding a new variable to your data frame that is the row sum of each of the events. Then you can display that variable's sum in the summary table. Example below!
library(gtsummary)
#> #Uighur
library(tidyverse)
df <-
data.frame(
event_1 = sample(c(FALSE, TRUE), 30, replace = TRUE, prob = c(0.85, 0.15)),
event_2 = sample(c(FALSE, TRUE), 30, replace = TRUE, prob = c(0.9, 0.1)),
event_3 = sample(c(FALSE, TRUE), 30, replace = TRUE, prob = c(0.75, 0.25)),
group = sample(c("A", "B"), 30, replace = TRUE, prob = c(0.5, 0.5))
) |>
rowwise() |>
mutate(Total = sum(event_1, event_2, event_3))
tbl_summary(
df,
by = group,
type = Total ~ "continuous",
statistic =
list(all_categorical() ~ "{n}",
all_continuous() ~ "{sum}")
) |>
as_kable() # convert to kable to display on stack overflow
Characteristic
A, N = 16
B, N = 14
event_1
4
4
event_2
1
2
event_3
7
6
Total
12
12
Created on 2023-01-12 with reprex v2.0.2
Thank you so much (great package gtsummary). That works! I had some trouble summing over factors. If variables are factors the code
mutate(Total = sum(event_1=="Yes", event_2=="Yes", event_3=="Yes"))
does it.

Correcting satellite image overlays for Rayshader

I'm trying to improve the look of Rayshader by overlaying more recent (higher detail) satellite imagery (that I'm getting from the {leaflet} packages) but the overlay doesn't match with the 3D rendering.
Ideally I'm looking for a open-source solution that can get global satellite imagery. Bonus points if you find finer detail data for my area of interest - Hawaii.
One method using {geoviz} and {rayshader} uses the slippy_overlay() function to create a number of overlay images from either Mapbox (satellite, mapbox-streets-v8, mapbox-terrain-v2, mapbox-traffic-v1, terrain-rgb, mapbox-incidents-v1) or Stamen. Although I found mapbox-terrain-v2 the best it still lacks the detail I would like. Since it requires setting up an API for mapbox I just use stamen/watercolor below:
library(geoviz)
library(rayshader)
### Maui
lat = 20.785700
lon = -156.259204
square_km = 22
max_tiles = 10
dem <- mapzen_dem(lat, lon, square_km, max_tiles)
elev_matrix = matrix(
raster::extract(dem, raster::extent(dem), buffer=1000),
nrow = ncol(dem),
ncol = nrow(dem)
)
ambmat <- ambient_shade(elev_matrix, zscale = 30)
raymat <- ray_shade(elev_matrix, zscale = 30, lambert = TRUE)
watermap <- detect_water(elev_matrix)
overlay_img <-
slippy_overlay(dem,
image_source = "stamen",
image_type = "watercolor",
png_opacity = 0.3,
max_tiles = max_tiles)
elev_matrix %>%
sphere_shade(sunangle = 270, texture = "imhof4") %>%
add_water(detect_water(elev_matrix), color="imhof4") %>%
add_shadow(ray_shade(elev_matrix,zscale=3,maxsearch = 300),0.5) %>%
add_shadow(ambmat,0.5) %>%
add_overlay(overlay_img) %>%
plot_3d(elev_matrix,
solid = T,
water = T,
waterdepth = 0,
wateralpha = 0.5,
watercolor = "lightblue",
waterlinecolor = "white",
waterlinealpha = 0.5,
zscale= raster_zscale(dem) / 3,
fov=0,theta=135,zoom=0.75,phi=45, windowsize = c(1000,800))
I'm trying to adapt Will Bishop's workflow for getting overlays with the leaflet package but the result is very odd. Will's approach is a bit different as it fetches elevation data from USGS, which doesn't have baythmetric elevation which is must for me - so I used geoviz
library(leaflet)
# define bounding box with longitude/latitude coordinates
bbox <- list(
p1 = list(long = -156.8037, lat = 20.29737),
p2 = list(long = -155.7351, lat = 21.29577)
)
leaflet() %>%
addTiles() %>%
addRectangles(
lng1 = bbox$p1$long, lat1 = bbox$p1$lat,
lng2 = bbox$p2$long, lat2 = bbox$p2$lat,
fillColor = "transparent"
) %>%
fitBounds(
lng1 = bbox$p1$long, lat1 = bbox$p1$lat,
lng2 = bbox$p2$long, lat2 = bbox$p2$lat,
)
What's the area of my hillshade from geoviz?
dim(dem)
780 780 1
Okay so the overlay image needs to be 780 x 780 so I modify the helper functions to download the overlay with the World_Imagery base map:
define_image_size <- function(bbox, major_dim = 780) {
# calculate aspect ration (width/height) from lat/long bounding box
aspect_ratio <- abs((bbox$p1$long - bbox$p2$long) / (bbox$p1$lat - bbox$p2$lat))
# define dimensions
img_width <- ifelse(aspect_ratio > 1, major_dim, major_dim*aspect_ratio) %>% round()
img_height <- ifelse(aspect_ratio < 1, major_dim, major_dim/aspect_ratio) %>% round()
size_str <- paste(img_width, img_height, sep = ",")
list(height = img_height, width = img_width, size = size_str)
}
get_arcgis_map_image <- function(bbox, map_type = "World_Imagery", file = NULL,
width = 780, height = 780, sr_bbox = 4326) {
require(httr)
require(glue)
require(jsonlite)
url <- parse_url("https://utility.arcgisonline.com/arcgis/rest/services/Utilities/PrintingTools/GPServer/Export%20Web%20Map%20Task/execute")
# define JSON query parameter
web_map_param <- list(
baseMap = list(
baseMapLayers = list(
list(url = jsonlite::unbox(glue("https://services.arcgisonline.com/ArcGIS/rest/services/{map_type}/MapServer",
map_type = map_type)))
)
),
exportOptions = list(
outputSize = c(width, height)
),
mapOptions = list(
extent = list(
spatialReference = list(wkid = jsonlite::unbox(sr_bbox)),
xmax = jsonlite::unbox(max(bbox$p1$long, bbox$p2$long)),
xmin = jsonlite::unbox(min(bbox$p1$long, bbox$p2$long)),
ymax = jsonlite::unbox(max(bbox$p1$lat, bbox$p2$lat)),
ymin = jsonlite::unbox(min(bbox$p1$lat, bbox$p2$lat))
)
)
)
res <- GET(
url,
query = list(
f = "json",
Format = "PNG32",
Layout_Template = "MAP_ONLY",
Web_Map_as_JSON = jsonlite::toJSON(web_map_param))
)
if (status_code(res) == 200) {
body <- content(res, type = "application/json")
message(jsonlite::toJSON(body, auto_unbox = TRUE, pretty = TRUE))
if (is.null(file))
file <- tempfile("overlay_img", fileext = ".png")
img_res <- GET(body$results[[1]]$value$url)
img_bin <- content(img_res, "raw")
writeBin(img_bin, file)
message(paste("image saved to file:", file))
} else {
message(res)
}
invisible(file)
}
Now download the file, then load it
image_size <- define_image_size(bbox, major_dim = 780)
# fetch overlay image
overlay_file <- "maui_overlay.png"
get_arcgis_map_image(bbox, map_type = "World_Imagery", file = overlay_file,
# width = image_size$width, height = image_size$height,
sr_bbox = 4326)
overlay_img <- png::readPNG("maui_overlay.png")
Okay let's make the plot
elev_matrix %>%
sphere_shade(sunangle = 270, texture = "imhof4") %>%
add_water(detect_water(elev_matrix), color="imhof4") %>%
add_shadow(ray_shade(elev_matrix,zscale=3,maxsearch = 300),0.5) %>%
add_shadow(ambmat,0.5) %>%
add_overlay(overlay_img, alphacolor = 1) %>%
plot_3d(elev_matrix,
solid = T,
water = T,
waterdepth = 0,
wateralpha = 0.5,
watercolor = "lightblue",
waterlinecolor = "white",
waterlinealpha = 0.5,
zscale= raster_zscale(dem) / 3,
fov=0,theta=135,zoom=0.75,phi=45, windowsize = c(1000,800))
As you can see the overlay image is rotated to the hillshade.
Now I'm also realizing that fetching satellite with a bounding box method isn't ideal when you're trying to show bathymatrix data. It would be ideal to subset this overlay somehow programmatically but I'll probably just end up using inkscape once I've figured out how to rotate the overlay.
I tried to use the {magick}'s image_rotate() function to no avail:
library(magick)
maui <- magick::image_read("maui_overlay.png")
image_rotate(maui, 30) # -> maui_30
# image_write(maui_30, path = "maui_overlay_30.png", format = "png")
But magick has changed the dimensions:
# A tibble: 1 x 7
format width height colorspace matte filesize density
<chr> <int> <int> <chr> <lgl> <int> <chr>
1 PNG 1068 1068 sRGB TRUE 0 38x38
And will give an error with rayshader:
overlay_img <- png::readPNG("maui_overlay_30.png")
elev_matrix %>%
sphere_shade(sunangle = 270, texture = "imhof4") %>%
add_water(detect_water(elev_matrix), color="imhof4") %>%
add_shadow(ray_shade(elev_matrix,zscale=3,maxsearch = 300),0.5) %>%
add_shadow(ambmat,0.5) %>%
add_overlay(overlay_img, alphacolor = 1) %>%
plot_3d(elev_matrix,
solid = T,
water = T,
waterdepth = 0,
wateralpha = 0.5,
watercolor = "lightblue",
waterlinecolor = "white",
waterlinealpha = 0.5,
zscale= raster_zscale(dem) / 3,
fov=0,theta=135,zoom=0.75,phi=45, windowsize = c(1000,800))
Error in add_overlay(., overlay_img, alpha = 0.8) : argument 3 matches multiple formal arguments
The answer couldn't have been simpler... it needed to be transposed overlay_img = aperm(overlay_img, c(2,1,3)).

Optimal dispatch of power generators

I want to write a matlab code that can optimally dispatch power generators to satisfy electric demand by considering profit too. I used below code that is in matlab website. https://ww2.mathworks.cn/help/optim/examples/optimal-dispatch-of-power-generators.html?requestedDomain=en
But in this code they are not considering the demand.
My target is , there are 20 power plants and I want to schedule this plants to satisfy demand. And the running power level can be any value in between the max and minimum power value of each power plant. High cost power plants should dispatch too.
name = 'generationcost.xlsx';
poolPrice = xlsread(name,'AD3:AD50'); % Revenue in dollars per MWh in interval
demand = xlsread(name,'AC3:AC52');
month = numel(demand);
e = xlsread(name,'W2:W4')'; %Cost for a unit of fuel
fuelPrice = repmat(e,1,1,2);
nPeriods = length(poolPrice);
nGens = 3;
genhigh = xlsread(name,'F2:F4');
genlow = xlsread(name,'Q2:Q4');
gen = [genlow(:),genhigh(:),]; %MW
fuel = [50,330;45,325;55,750]; % Fuel consumption for generator
startCost = [10000;10000;10000]; %Cost in dollars to start a generator after it has been off
y = optimvar('y',nPeriods,nGens,{'Low','High'},'Type','integer','LowerBound',0,'UpperBound',1);
z = optimvar('z',nPeriods,nGens,'Type','integer','LowerBound',0,'UpperBound',1);
powercons = y(:,:,'Low') + y(:,:,'High') <= 1;
yFuel = zeros(nPeriods,nGens,2);
yFuel(:,1,1) = fuel(1,1);
yFuel(:,1,2) = fuel(1,2);
yFuel(:,2,1) = fuel(2,1);
yFuel(:,2,2) = fuel(2,2);
yFuel(:,3,1) = fuel(3,1);
yFuel(:,3,2) = fuel(3,2);
fuelUsed = sum(reshape((sum(y.*yFuel)),[3 2])');
%fuelcons = fuelUsed <= totalFuel;
w = optimexpr(nPeriods,nGens);
idx = 1:(nPeriods-1);
w(idx,:) = y(idx+1,:,'Low') - y(idx,:,'Low') + y(idx+1,:,'High') -
y(idx,:,'High');
w(nPeriods,:) = y(1,:,'Low') - y(nPeriods,:,'Low') + y(1,:,'High') -
y(nPeriods,:,'High');
switchcons = w - z <= 0;
generatorlevel = zeros(size(yFuel));
generatorlevel(:,1,1) = gen(1,1);
generatorlevel(:,1,2) = gen(1,2);
generatorlevel(:,2,1) = gen(2,1);
generatorlevel(:,2,2) = gen(2,2);
generatorlevel(:,3,1) = gen(3,1);
generatorlevel(:,3,2) = gen(3,2);
revenue = optimexpr(size(y));
for ii = 1:nPeriods
revenue(ii,:,:) = poolPrice(ii)*y(ii,:,:).*generatorlevel(ii,:,:);
end
val = optimexpr(size(y));
demands = optimexpr(nPeriods,1);
id = 1:nPeriods;
val(id,:,:)= y(id,:,:).*generatorlevel(id,:,:);
demands(id,:)= sum(sum(val,3),2);
demandcons = demands-demand ;
fuelCost = sum(sum(fuelPrice.*sum(y.*yFuel)));
startingCost = z*startCost;
profit = sum(sum(sum(revenue))) - fuelCost - sum(startingCost);
dispatch = optimproblem('ObjectiveSense','maximize');
dispatch.Objective = profit;
dispatch.Constraints.powercons = powercons;
dispatch.Constraints.demandcons = demands - demand <= 0 ;
options = optimoptions('intlinprog','Display','final');
[dispatchsol,fval,exitflag,output] = solve(dispatch,'options',options);
subplot(5,1,1)
bar(dispatchsol.y(:,1,1)*gen(1,1)+dispatchsol.y(:,1,2)*gen(1,2),.5,'g')
xlim([.5,48.5])
ylabel('MW')
title('Generator 1 Optimal Schedule','FontWeight','bold')
subplot(5,1,2)
bar(dispatchsol.y(:,2,1)*gen(2,1)+dispatchsol.y(:,2,2)*gen(2,2),.5,'c')
title('Generator 2 Optimal Schedule','FontWeight','bold')
xlim([.5,48.5])
ylabel('MW')
subplot(5,1,3)
bar(dispatchsol.y(:,3,1)*gen(3,1)+dispatchsol.y(:,3,2)*gen(3,2),.5,'c')
title('Generator 3 Optimal Schedule','FontWeight','bold')
xlim([.5,48.5])
ylabel('MW')
subplot(5,1,4)
bar(demand,.5)
xlim([.5,48.5])
title('Daily Demand','FontWeight','bold')
xlabel('Period')
ylabel('MW')
starttimes = find(round(dispatchsol.z) == 1);
[theperiod,thegenerator] = ind2sub(size(dispatchsol.z),starttimes)
This code is not satisfying Demand. The output demand value is more lesser than the actual demand value. And also here only use 'High' or 'Low' power levels only. In between power values not using.

How to implement a Slope graph in R for two variables

I'm analyzing how many users have used a particular hashtag and how they have contributed to the total number of tweets. My results are:
Data:
20.68% of tweets related to #HashtagX are created by 20 users. Now, these 20 users only represent 0.001% of the total of 14,432 users who have ever used the hashtag #HashtagX.
What happens if we take the top 100 users by number of tweets? 44% of tweets are created by the top 100 users.
If we extend to the top 500 users by number of users we see that 72% of tweets is created by the top 500.
I am wondering how to implement a slope graph because I think that is a good way to show the relationship between both variables, but it is not a default graph provides for any library.
One of the ways to show the relationship between both variables ("Users" vs "Tweets") is a Slope Chart.
Visualization obtained (solved graph for the question):
Slope Chart
1) Libraries
library(ggplot2)
library(scales)
library(ggrepel)
theme_set(theme_classic())
2) Data example
Country = c('20 accounts', '50 accounts', '100 accounts','200 accounts','300 accounts',
'500 accounts','1000 accounts','14.443 accounts')
January = c(0.14, 0.34, 0.69,1.38,2.07,3.46,6.92,100)
April = c(20.68, 33.61, 44.94, 57.49,64.11,72,80,100)
Tweets_N = c(26797, 43547, 58211, 74472,83052,93259,103898,129529)
a = data.frame(Country, January, April)
left_label <- paste(a$Country, paste0(a$January,"%"),sep=" | ")
right_label <- paste(paste0(round(a$April),"%"),paste0(Tweets_N," tweets"),sep=" | ")
a$color_class <- "green"
3) Plot
p <- ggplot(a) + geom_segment(aes(x=1, xend=2, y=January, yend=April, col=color_class), size=.25, show.legend=F) +
geom_vline(xintercept=1, linetype="dashed", size=.1) +
geom_vline(xintercept=2, linetype="dashed", size=.1) +
scale_color_manual(labels = c("Up", "Down"),
values = c("blue", "red")) +
labs(
x="", y = "Percentage") +
xlim(.5, 2.5) + ylim(0,(1.1*(max(a$January, a$April)))) # X and Y axis limits
# Add texts
p <- p + geom_text_repel(label=left_label, y=a$January, x=rep(1, NROW(a)), hjust=1.1, size=3.5,direction = "y")
p <- p + geom_text(label=right_label, y=a$April, x=rep(2, NROW(a)), hjust=-0.1, size=3.5)
p <- p + geom_text(label="Accounts", x=1, y=1.1*(max(a$January, a$April)), hjust=1.2, size=4, check_overlap = TRUE) # title
p <- p + geom_text(label="Tweeets (% of Total)", x=2, y=1.1*(max(a$January, a$April)), hjust=-0.1, size=4, check_overlap = TRUE)
# title
# Minify theme
p + theme(panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
panel.border = element_blank(),
plot.margin = unit(c(1,2,1,2), "cm"))

different clusters with same method

I am stuck in a problem with hierarchical clustering. I want to make a dendrogram and a heatmap, with a distance method of correlation (d_mydata=dist(1-cor(t(mydata))) and ward.D2 as clustering method.
As a gadget in the package pheatmap you can plot the dendrogram on the left side to visualize the clusters.
The pipeline of my analysis would be this:
create the dendrogram
test how many cluster would be the optimal (k)
extract the subjects in each cluster
create a heatmap
My surprise comes up when the dendrogram plotted in the heatmap is not the same as the one plotted before even when methods are the same.
So I decided to create a pheatmap colouring by the clusters classified before by cutree and test if the colours correspond to the clusters in the dendrogram.
This is my code:
# Create test matrix
test = matrix(rnorm(200), 20, 10)
test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3
test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2
test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4
colnames(test) = paste("Test", 1:10, sep = "")
rownames(test) = paste("Gene", 1:20, sep = "")
test<-as.data.frame(test)
# Create a dendrogram with this test matrix
dist_test<-dist(test)
hc=hclust(dist_test, method="ward.D2")
plot(hc)
dend<-as.dendrogram(hc, check=F, nodePar=list(cex = .000007),leaflab="none", cex.main=3, axes=F, adjust=F)
clus2 <- as.factor(cutree(hc, k=2)) # cut tree into 2 clusters
groups<-data.frame(clus2)
groups$id<-rownames(groups)
#-----------DATAFRAME WITH mydata AND THE CLASSIFICATION OF CLUSTERS AS FACTORS---------------------
test$id<-rownames(test)
clusters<-merge(groups, test, by.x="id")
rownames(clusters)<-clusters$id
clusters$clus2<-as.character(clusters$clus2)
clusters$clus2[clusters$clus2== "1"]= "cluster1"
clusters$clus2[clusters$clus2=="2"]<-"cluster2"
plot(dend,
main = "test",
horiz = TRUE, leaflab = "none")
d_clusters<-dist(1-cor(t(clusters[,7:10])))
hc_cl=hclust(d_clusters, method="ward.D2")
annotation_col = data.frame(
Path = factor(colnames(clusters[3:12]))
)
rownames(annotation_col) = colnames(clusters[3:12])
annotation_row = data.frame(
Group = factor(clusters$clus2)
)
rownames(annotation_row) = rownames(clusters)
# Specify colors
ann_colors = list(
Path= c(Test1="darkseagreen", Test2="lavenderblush2", Test3="lightcyan3", Test4="mediumpurple", Test5="red", Test6="blue", Test7="brown", Test8="pink", Test9="black", Test10="grey"),
Group = c(cluster1="yellow", cluster2="blue")
)
require(RColorBrewer)
library(RColorBrewer)
cols <- colorRampPalette(brewer.pal(10, "RdYlBu"))(20)
library(pheatmap)
pheatmap(clusters[ ,3:12], color = rev(cols),
scale = "column",
kmeans_k = NA,
show_rownames = F, show_colnames = T,
main = "Heatmap CK14, CK5/6, GATA3 and FOXA1 n=492 SCALE",
clustering_method = "ward.D2",
cluster_rows = TRUE, cluster_cols = TRUE,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation",
annotation_row = annotation_row,
annotation_col = annotation_col,
annotation_colors=ann_colors
)
anyone with the same issue? Am I making an stupid mistake?
Thank you in advance