df_nhpi %>%
select(AGE, SEX, MAR_STAT, HEIGHT, WEIGHT, BMI, HTN, HTNMED, MI, Smoking, COPD, CANCER, DIABETES) %>%
tbl_summary(by = SEX,
label = list(MAR_STAT ~ 'Marital Status',
HTN ~ 'Hypertension',
HTNMED ~ 'Hypertension Medication',
MI ~ 'Heart Attack',
Smoking ~ 'Smoking Status',
COPD ~ 'Chronic Obstructive Pulmonary Disease'),
type = list(c("HTN","HTNMED", "MI", "COPD", "CANCER") ~ "categorical"),
missing = "ifany",
missing_text = "Unknown",
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"),
digits = all_continuous() ~ 2, percent = "column") %>%
add_stat_label() %>%
add_p(test = all_continuous() ~ "t.test", pvalue_fun =
function(x) style_pvalue(x, digits = 3)) %>%
bold_p() %>%
modify_caption("**Table 1. Baseline Characteristics**") %>% bold_labels()
I'm trying to generate a table one. But, the issue here is, I want % for missing values across columns (specifically for categorical variables) and at the same time, I don't want missing values to be included while calculating p-values. I'm trying to do this in single chunk of code. Is there anyway to do this or should I go for the conventional method?
I've been searching the whole internet for the past three days. But, I don't find anything that works in my case.
PS: mutate and forcats doesn't work as it skews my p-values.
I prepared two solutions that both report the proportion of missing data. Hopefully one of them works for you!
library(gtsummary)
packageVersion("gtsummary")
#> [1] '1.5.2'
# add % missing in new column
tbl1 <-
trial %>%
tbl_summary(
by = trt,
include = response,
type = all_dichotomous() ~ "categorical",
missing = "no"
) %>%
add_p() %>%
add_n(statistic = "{n_miss} ({p_miss}%)") %>%
modify_header(n = "**Missing**")
# prepare tbl_summary with rows for missing, then merge in p-values
tbl2 <-
trial %>%
dplyr::mutate(response = forcats::fct_explicit_na(factor(response))) %>%
tbl_summary(
by = trt,
include = response,
label = list(response = "Tumor Response")
) %>%
list(tbl1 %>% modify_column_hide(c(n, all_stat_cols()))) %>%
tbl_merge(tab_spanner = FALSE)
Created on 2022-03-22 by the reprex package (v2.0.1)
Related
I have chosen the percentage calculation by rows. What do I have to do to get only the percentage displayed for certain columns?
Thanks in advance!
Table1234 %>%
select(everything(), -c(screening_id, m07_mainsourceincome_c, m01_2_ageyears_q, r_sample)) %>%
tbl_summary(by = "test_result",
percent = "row",
digits = NULL,
label = list(age_group_10yrs ~ "Age Groups",
m01_1_sex_d ~ "Sex",
m05_qualificationmg_c ~ "Education",
m06_indivemployment_c ~ "Profession",
m06_indivemployment_currently_employed ~ "Employment Status")) %>%
modify_column_hide(., columns = stat_1) %>%
bold_labels() %>%
add_p() %>%
add_overall()
I've just discovered that add_nevent in gtsummary can have the option location = "level". I am rapt! But I would like it to have a percentage as well. I've tried adding statistic = "{n}({p}%)" but nothing changes.
Here is my code:
tbl_regression(glm(rellife ~ age + gender, data = df, family = "binomial"), exponentiate = TRUE) %>%
add_nevent(location = "level", statistic = "{n}/{N}%") %>% # add number of events of the outcome
add_n(location = "level")
And the table:
I would like to have 1601 (93.6%) in the column Event N for Age and so on.
Any help would be appreciated.
Thanks
After adding the N and N event, you can use the modify_table_body() function to calculate the event rate. Example below!
library(gtsummary)
#> #BlackLivesMatter
packageVersion("gtsummary")
#> [1] '1.5.2'
tbl <-
glm(response ~ age + grade, trial, family = binomial) %>%
tbl_regression(exponentiate = TRUE) %>%
add_nevent(location = "level") %>%
add_n(location = "level") %>%
# adding event rate
modify_table_body(
~ .x %>%
dplyr::mutate(
stat_nevent_rate =
ifelse(
!is.na(stat_nevent),
paste0(style_sigfig(stat_nevent / stat_n, scale = 100), "%"),
NA
),
.after = stat_nevent
)
) %>%
# merge the colums into a single column
modify_cols_merge(
pattern = "{stat_nevent} / {stat_n} ({stat_nevent_rate})",
rows = !is.na(stat_nevent)
) %>%
# update header to event rate
modify_header(stat_nevent = "**Event Rate**")
Created on 2022-03-21 by the reprex package (v2.0.1)
I'm using gtsummary package to generate great summary table of mean difference and 95% IC among paired values.
However, the default output format of the mean difference and 95% IC did not include the same format and round (i.e. in my data : no digit after decimal point for the mean difference and 1 digit after decimal point for the inferior limit and no digit after decimal point for the superior limit of the 95% IC).
I try to change it using the estimate_fun= argument but i only obtained error message. Probably due to a bad syntax ? Is anyone has a solution ? :)
Example using the example table for paired data (for example, i try to obtain 1 digit round for difference and the 95%CI)(http://www.danieldsjoberg.com/gtsummary/articles/gallery.html)
trial_paired <-
trial %>%
select(trt, marker) %>%
group_by(trt) %>%
mutate(id = row_number()) %>%
ungroup()
trial_paired %>%
filter(!is.na(marker)) %>%
group_by(id) %>%
filter(n() == 2) %>%
ungroup() %>%
tbl_summary(by = trt, include = -id, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_difference(test = list(all_continuous() ~ "paired.t.test"), group = id, estimate_fun = list(all_continuous() ~ style_sigfig(.x, digits=1)))
Result is only : Erreur : Error in estimate_fun= argument input. Select from ‘marker’
Many thanks is anybody has a solution and sorry if the question is not so clear...
Hello and welcome to stackoverflow!
There was a bug in the add_difference(estimate_fun=) that is now fixed in the dev version of the package on GitHub. Install the version from GitHub and use the code below.
# renv::install("ddsjoberg/gtsummary")
library(gtsummary)
#> #Uighur
packageVersion("gtsummary")
#> [1] '1.4.2.9001'
trial_paired <-
trial %>%
select(trt, marker) %>%
dplyr::group_by(trt) %>%
mutate(id = dplyr::row_number()) %>%
dplyr::ungroup()
tbl <-
trial_paired %>%
dplyr::filter(!is.na(marker)) %>%
dplyr::group_by(id) %>%
dplyr::filter(dplyr::n() == 2) %>%
dplyr::ungroup() %>%
tbl_summary(by = trt, include = -id, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_difference(
test = list(all_continuous() ~ "paired.t.test"),
group = id,
estimate_fun = marker ~ function(x) style_sigfig(x, digits = 1)
)
Created on 2021-07-16 by the reprex package (v2.0.0)
I'm loving gtsummary. I write a lot of reports and love my pretty tables!
I've run into a problem when I updated.
I just updated to using all_stat_cols(TRUE) instead of stat_by.
I'm getting an error with I try to include {level} or {n} when I add overall.
It works if stat_0 isn't included, so I tried with with all_stat_cols(FALSE) and a separate stat_0 ~ "Total n = {n}", but get the error "Error: glue cannot interpolate functions into strings. * object 'n' is a function."
This works:
nr %>%
select(gender, year) %>%
tbl_summary (by = gender, missing = "no") %>%
bold_labels() %>%
add_overall(last=TRUE) %>%
modify_footnote(update = everything() ~ NA) %>%
modify_header(update = list(label ~ "",
all_stat_cols(FALSE) ~ "{level}\n n = {n}\n"))
But I also want my overall to be changed "Total n = 17" like this:
nr %>%
select(gender, year) %>%
tbl_summary (by = gender, missing = "no") %>%
bold_labels() %>%
add_overall(last=TRUE) %>%
modify_footnote(update = everything() ~ NA) %>%
modify_header(update = list(label ~ "",
all_stat_cols(FALSE) ~ "{level}\n n = {n}\n",
stat_0 ~ "Total\n = {n}"))
But get this error:
Error: glue cannot interpolate functions into strings. * object 'n' is a function.
I also want to remove the first row (Year level) if anyone knows how to do that too!
Any help or ideas would be very much appreciated.
hello! The reason you're getting an error is because little n represents the in the by= group, and big N is the overall number of obs. When you try to use little n in the header for the overall column you get the error because it's not defined.
Change the little n to big N, and you should be all set! Example below!
library(gtsummary)
#> #BlackLivesMatter
packageVersion("gtsummary")
#> [1] '1.4.1'
tbl <-
trial %>%
select(trt, age, grade) %>%
tbl_summary(by = trt, missing = "no") %>%
add_overall() %>%
modify_header(
update = list(label ~ "",
all_stat_cols(FALSE) ~ "{level}\n n = {n}\n",
stat_0 ~ "Total\n = {N}")
)
Created on 2021-06-01 by the reprex package (v2.0.0)
I like to pipe my expss tables into kable to get access to some additional formatting options. That sometimes requires some tweaking, and I'm looking for a tweak here to get rid of the row_labels text in the first column of the header in the example below.
Simple reprex:
df <- data.frame(x=rbinom(100,1,0.5), y=rnorm(100,1,0.6),
z=rnorm(100,1,0.2), grp = rep(1:5,20))
var_lab(df$grp) = ""
df %>%
tab_cells(x,y,z) %>%
tab_cols(grp) %>%
tab_stat_mean (label = "") %>%
tab_pivot %>%
kable(caption= "Title",
digits = c(0,rep(3,5))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))%>%
add_header_above(c("", "Group" = 5))
Generates this:
Thanks!
It's better to use 'htmlTable' or 'huxtable' for output expss tables. It is because they are both support complex multilevel and multinested headers.
However, if you want to use 'kable' you can set first column name to empty string just after 'tab_pivot':
library(expss)
library(knitr)
library(kableExtra)
# function which remove first column name
remove_first_name = function(x){
setNames(x, c("", names(x)[-1]))
}
df <- data.frame(x=rbinom(100,1,0.5), y=rnorm(100,1,0.6),
z=rnorm(100,1,0.2), grp = rep(1:5,20))
var_lab(df$grp) = ""
df %>%
tab_cells(x,y,z) %>%
tab_cols(grp) %>%
tab_stat_mean (label = "") %>%
tab_pivot %>%
remove_first_name %>% # remove 'row_labels'
kable(caption= "Title",
digits = c(0,rep(3,5))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))%>%
add_header_above(c("", "Group" = 5))