split frequency and percentage in tbl_summary in to two separate columns - gtsummary

I would to split the values of N and % into two separate columns ie, N and % columns
library(gtsummary)
trial %>%
select(response, grade) %>%
tbl_summary()

The easiest way to do this is with tbl_merge(), constructing two tables (one with N the other with %). Example below!
library(gtsummary)
#> #BlackLivesMatter
library(tidyverse)
tbl <-
# iterate over these two statistics
c("{n} / {N}", "{p}%") %>%
# build tbl_summary using each of the stats
map(
~trial %>%
select(response, grade) %>%
tbl_summary(
statistic = all_categorical() ~ .x,
missing = "no"
)
) %>%
# merge the two tables together
tbl_merge() %>%
# some formatting to make it cute
modify_spanning_header(everything() ~ NA) %>%
modify_footnote(everything() ~ NA) %>%
modify_header(list(stat_0_1 ~ "**n / N**", stat_0_2 ~ "**percent**"))
Created on 2021-08-06 by the reprex package (v2.0.0)

Related

show r squared, aic, bic, and deviance for multiple models using gtsummary

looking to have the r squared, aic, bic, and deviance values presented for each of the four models here in the merged output
mod0 <- lm(surv_time ~ Age + Gender + Education + `Standardized MoCA`, data = surv_tbldata_converters)
mod1 <- lm(surv_time ~ Age + Gender + Education + `Global Composite`, data = surv_tbldata_converters)
mod2 <- lm(surv_time ~ Age + Gender + Education + `Standardized MoCA`, data = surv_tbldata)
mod3 <- lm(surv_time ~ Age + Gender + Education + `Global Composite`, data = surv_tbldata)
mod0_tbl <- mod0 %>%
tbl_regression(exponentiate = F) %>%
add_glance_source_note(include = c(r.squared, AIC, BIC, deviance))
mod1_tbl <- mod1 %>%
tbl_regression(exponentiate = F) %>%
add_glance_source_note(include = c(r.squared, AIC, BIC, deviance))
mod2_tbl <- mod2 %>%
tbl_regression(exponentiate = F) %>%
add_glance_source_note(include = c(r.squared, AIC, BIC, deviance))
mod3_tbl <- mod3 %>%
tbl_regression(exponentiate = F) %>%
add_glance_source_note(include = c(r.squared, AIC, BIC, deviance))
# merge tables
tbl_merge(tbls = list(mod2_tbl, mod3_tbl, mod0_tbl, mod1_tbl),
tab_spanner = c("**MoCA Model (Full Sample)**", "**Global Composite Model (Full Sample)**", "**MoCA Model (Converters Only)**", "**Global Composite Model (Converters Only)**")) %>%
modify_header(label = "**Predictor**") %>%
modify_caption("**Table 3. Study One Model Comparisons**") %>%
bold_labels()
The add_glance_source_note() function adds the statistics as a source note, and the table may only have one source note. Use the add_glance_table() function to add the statistics to the bottom of the table, and you'll be able to merge the tables without issue. Example below!
library(gtsummary)
packageVersion("gtsummary")
#> [1] '1.4.1'
lm1 <- lm(age ~ trt + grade, trial)
lm2 <- lm(marker ~ trt + grade, trial)
tbl1 <-
tbl_regression(lm1) %>%
add_glance_table(include = c(r.squared, AIC, BIC, deviance))
tbl2 <-
tbl_regression(lm2) %>%
add_glance_table(include = c(r.squared, AIC, BIC, deviance))
tbl_all <- tbl_merge(list(tbl1, tbl2))
Created on 2021-06-08 by the reprex package (v2.0.0)

tbl_summary ( gtsummary) transpose with p-values

I was wondering if there is a way to compose such a table :
library(gtsummary)
data(trial)
add_p_ex1 <-
trial[c("marker", "trt")] %>%
tbl_summary(by = trt) %>%
add_p()
add_p_ex2 <-
trial[c("marker", "death")] %>%
tbl_summary(by = death) %>%
add_p()
as1=add_p_ex1$table_body
as2=add_p_ex2$table_body
write.csv(rbind(as1, as2), file='temp1.csv')
With an output that is transposed version of as1 and as2 like this :
Ideally for K - continuous variable ( eg marker 1 , marker 2, ... maker k) and a P number of categorical variables.
Marker Level (ng/mL) Pvalue
trt
drug A 0.84 (0.24, 1.57) 0.084746992242773
drug B 0.52 (0.19, 1.20)
Death
0 0.73 (0.23, 1.33) 0.605276987642371
1 0.57 (0.20, 1.45)
It's possible to get your table like this, and I've provided a code example below. I am not sure if it's the easiest way to get what you're looking for, however.
library(gtsummary)
library(tidyverse)
# function to transpose a tbl_summary table
gtsummary_transpose <- function(data, con_var, cat_var) {
tbl <- data[c(con_var, cat_var)] %>%
tbl_summary(by = cat_var, missing = "no") %>%
add_p() %>%
modify_header(stat_by = "{level}",
p.value = "p.value",
label = "label") %>%
as_tibble() %>%
select(-label) %>%
pivot_longer(cols = -p.value) %>%
select(name, value, p.value)
# add a header row
bind_rows(
tibble(name = attr(data[[cat_var]], "label") %||% cat_var),
tbl
) %>%
fill(p.value, .direction = "up") %>%
mutate(p.value = ifelse(row_number() == 1, p.value, NA))
}
gtsummary_transpose(trial, "marker", "trt")
# transose multiple tables, and stack them
tibble(variable = c("trt", "grade")) %>%
mutate(
tbl = map(variable, ~gtsummary_transpose(trial, "marker", .x))
) %>%
unnest(cols = tbl) %>%
gt::gt() %>%
gt::cols_hide(vars(variable)) %>%
gt::cols_label(name = "Characteristic", value = "Marker Level", p.value = "p-value") %>%
gt::fmt_missing(columns = everything(), missing_text = "")

Tidypredict does not incorporate Recipes

I fit a model with logistic regression by tidymodels. The function tidypredict_fit extract the formula but I note that it does not incorporate the preprocess in the recipes. How can I extract the model formula with the necessary preprocess?
set.seed(123)
split <- initial_split(lepto, prop = 0.75, strata = 'composite_outcome')
lepto_train <- training(split)
letp_test <- testing(split)
rec2 <- recipe(composite_outcome ~ letargy + creatinine + cholestatic_syndrome + pallor + pulmonary_involvement +
reduced_diuresis + heart_rate + ast + hypotension + leucocytes, data = lepto_train) %>%
step_medianimpute(all_numeric()) %>%
step_BoxCox(all_numeric()) %>%
step_normalize(all_numeric()) %>%
step_corr(all_numeric(), threshold = 0.9) %>%
step_unknown(all_nominal(), -all_outcomes(), new_level = 'without_information') %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_lincomb(all_predictors()) %>%
step_nzv(all_predictors())
## --- Model glm --------------------------------------- ##
model_glm <- logistic_reg(mode = 'classification') %>%
set_engine('glm')
## -- workflow ---------------------------------------- ##
work_glm <- workflow() %>%
add_recipe(rec2) %>%
add_model(model_glm)
## -------Fit --------------------------------------- ##
test_glm <- work_glm %>%
last_fit(split)
test_glm %>% collect_metrics()
final_glm <- work_glm %>%
fit(lepto_train)
final_glm %>%
pull_workflow_fit() %>%
tidypredict_fit()
model_logistic <- final_glm %>%
pull_workflow_fit()

How to perform a VLOOKUP in R?

I am replicating a CSV file in R (originally created in excel), that is compiled using various data sources. When i made this CSV in excel i used a vlookup to populate certain columns based on another data source/other spreadsheet.
How can i populate a column in R using something similar to a VLOOKUP? i.e. looking for a variable in an external source and matching it with another column in the df?
For example, the formula in the excel version is =
=VLOOKUP('[Spreadsheet1]Tab1'!A22,'[Spreadsheet2]Tab2'!$A$6:$B$500,2,FALSE)
How can i make this same formula in R code?
dplyr::left_join() should do the trick:
myData <- data.frame(
x = c('a', 'b', 'b', 'c')
)
lookUpData <- data.frame(
key = c('a', 'b', 'c'),
value = c(1, 2, 3)
)
library(dplyr)
myData %>%
left_join(lookUpData, by = c(x = 'key')) %>%
rename(newCol = value)
x newCol
1 a 1
2 b 2
3 b 2
4 c 3

select multiple columns with dplyr having factor "No", "Yes" levels

I want to select all factor columns having two levels ("Yes", "No").
I want to use dpylr for this but, could not fix the problem.
AB %>%
select_if(.predicate = function(x) length(levels(x))==2 & unique(x) %in% c("No", "Yes"))
unique(x) %in% c('No','Yes') returns a vector the same length as unique(x), rather than a scalar. I think your better off using setequal(x,c('No','Yes')) as shown below:
library(dplyr)
# generate the dataframe with different factor levels
n<-100
no_yes <- sample(c('No','Yes'), n, replace = T)
no_yes_maybe <- sample(c('No','Yes','Maybe'), n, replace = T)
no <- sample(c('No'), n, replace = T)
no_maybe <- sample(c('No','Maybe'), n, replace = T)
AB<-data.frame(
no_yes, # only this column should get returned
no_yes_maybe,
no,
no_maybe,
stringsAsFactors = T
)%>%as.tbl
# function to return TRUE if column has only No/Yes factors.
desired_levels <- c('No','Yes')
predicate_function <- function(x) setequal(levels(x),desired_levels)
# use dplyr to select columns with desired factor levels
AB%>%select_if(predicate_function)