Table Gallery

Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.

library(gtsummary); library(gt); library(survival)
library(dplyr); library(stringr); library(purrr); library(forcats); library(tidyr)

Summary Tables

Add a spanning header over the group columns for increased clarity, and modify column headers.

trial %>%
  select(trt, age, grade) %>%
  tbl_summary(
    by = trt, 
    missing = "no",
    statistic = all_continuous() ~ "{median} ({p25}, {p75})"
  ) %>%
  modify_header(all_stat_cols() ~ "**{level}**<br>N =  {n} ({style_percent(p)}%)") %>%
  add_n() %>%
  bold_labels() %>%
  modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
Characteristic N Chemotherapy Treatment
Drug A
N = 98 (49%)1
Drug B
N = 102 (51%)1
Age 189 46 (37, 59) 48 (39, 56)
Grade 200
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)

1 Median (IQR); n (%)


Show continuous summary statistics on multiple lines.

trial %>%
  select(trt, age, marker) %>%
  tbl_summary(
    by = trt,
    type = all_continuous() ~ "continuous2",
    statistic = all_continuous() ~ c("{N_nonmiss}",
                                     "{mean} ({sd})", 
                                     "{median} ({p25}, {p75})", 
                                     "{min}, {max}"),
    missing = "no"
  ) %>%
  italicize_levels()
Characteristic Drug A, N = 98 Drug B, N = 102
Age
N 91 98
Mean (SD) 47 (15) 47 (14)
Median (IQR) 46 (37, 59) 48 (39, 56)
Range 6, 78 9, 83
Marker Level (ng/mL)
N 92 98
Mean (SD) 1.02 (0.89) 0.82 (0.83)
Median (IQR) 0.84 (0.24, 1.57) 0.52 (0.19, 1.20)
Range 0.00, 3.87 0.00, 3.64

Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.

trial %>%
  select(response, age, grade) %>%
  mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
  tbl_summary(
    by = response, 
    missing = "no",
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  ) %>%
  add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>%
  add_q()
Characteristic No Tumor Response, N = 1321 Tumor Responded, N = 611 p-value2 q-value3
Patient Age 46 (36, 55) 49 (43, 59) 0.091 0.18
Tumor Grade 0.93 0.93
I 46 (35%) 21 (34%)
II 44 (33%) 19 (31%)
III 42 (32%) 21 (34%)

1 Median (IQR); n (%)

2 Wilcoxon rank sum test; Pearson's Chi-squared test

3 False discovery rate correction for multiple testing


Include missing tumor response as column using fct_explicit_na().

trial %>%
  select(response, age, grade) %>%
  mutate(
    response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>% 
      fct_explicit_na(na_level = "Missing Response Status")
  ) %>%
  tbl_summary(
    by = response, 
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  )  
Characteristic No Tumor Response, N = 1321 Tumor Responded, N = 611 Missing Response Status, N = 71
Patient Age 46 (36, 55) 49 (43, 59) 52 (44, 57)
Unknown 7 3 1
Tumor Grade
I 46 (35%) 21 (34%) 1 (14%)
II 44 (33%) 19 (31%) 5 (71%)
III 42 (32%) 21 (34%) 1 (14%)

1 Median (IQR); n (%)


Report treatment differences between two groups. This is often needed in randomized trials. In this example, we report the difference in tumor response and marker level between two chemotherapy treatments.

trial %>%
  select(response, marker, trt) %>%
  tbl_summary(
    by = trt,
    statistic = list(all_continuous() ~ "{mean} ({sd})",
                     all_categorical() ~ "{p}%"),
    missing = "no"
  ) %>%
  add_difference() %>%
  add_n() %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_footnote(all_stat_cols() ~ NA)
Characteristic N Drug A Drug B Difference1 95% CI1,2 p-value1
Tumor Response 193 29% 34% -4.2% -18%, 9.9% 0.6
Marker Level (ng/mL) 190 1.02 (0.89) 0.82 (0.83) 0.20 -0.05, 0.44 0.12

1 Two sample test for equality of proportions; Welch Two Sample t-test

2 CI = Confidence Interval


Paired t-test and McNemar’s test. The data is expected in a long format with 2 rows per participant.

# imagine that each patient received Drug A and Drug B (adding ID showing their paired measurements)
trial_paired <-
  trial %>%
  select(trt, marker, response) %>%
  group_by(trt) %>%
  mutate(id = row_number()) %>%
  ungroup()

# you must first delete incomplete pairs from the data, then you can build the table
trial_paired %>%
  # delete missing values
  filter(complete.cases(.)) %>%
  # keep IDs with both measurements
  group_by(id) %>%
  filter(n() == 2) %>%
  ungroup() %>%
  # summarize data
  tbl_summary(by = trt, include = -id) %>%
  add_p(test = list(marker ~ "paired.t.test",
                    response ~ "mcnemar.test"), 
        group = id)
Characteristic Drug A, N = 831 Drug B, N = 831 p-value2
Marker Level (ng/mL) 0.82 (0.22, 1.63) 0.53 (0.18, 1.26) 0.2
Tumor Response 21 (25%) 28 (34%) 0.3

1 Median (IQR); n (%)

2 Paired t-test; McNemar's Chi-squared test with continuity correction


Include p-values comparing all groups to a single reference group.

# table summarizing data with no p-values
small_trial <- trial %>% select(grade, age, response)
t0 <- small_trial %>%
  tbl_summary(by = grade, missing = "no") %>%
  modify_header(all_stat_cols() ~ "**{level}**")

# table comparing grade I and II
t1 <- small_trial %>%
  filter(grade %in% c("I", "II")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p() %>%
  modify_header(p.value ~ md("**I vs. II**")) %>%
  # hide summary stat columns
  modify_column_hide(all_stat_cols())

# table comparing grade I and II
t2 <- small_trial %>%
  filter(grade %in% c("I", "III")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p()  %>%
  modify_header(p.value ~ md("**I vs. III**")) %>%
  # hide summary stat columns
  modify_column_hide(all_stat_cols())

# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) %>%
  modify_spanning_header(
    list(
      all_stat_cols() ~ "**Tumor Grade**",
      starts_with("p.value") ~ "**p-values**"
    )
  )
Characteristic Tumor Grade p-values
I1 II1 III1 I vs. II2 I vs. III2
Age 47 (37, 56) 48 (37, 57) 47 (38, 58) 0.7 0.5
Tumor Response 21 (31%) 19 (30%) 21 (33%) >0.9 0.9

1 Median (IQR); n (%)

2 Wilcoxon rank sum test; Fisher's exact test


Add additional statistics as additional columns.

# define function for lower and upper bounds of the mean CI
ll <- function(x) t.test(x)$conf.int[1]
ul <- function(x) t.test(x)$conf.int[2]

t1 <-
  trial %>%
  select(age, marker) %>%
  tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>%
  modify_header(stat_0 ~ "**Mean (SD)**")

t2 <-
  trial %>%
  select(age, marker) %>%
  tbl_summary(statistic = all_continuous() ~ "{ll}, {ul}", missing = "no") %>%
  modify_header(stat_0 ~ "**95% CI for Mean**")

tbl_merge(list(t1, t2)) %>%
  modify_footnote(everything() ~ NA_character_) %>%
  modify_spanning_header(everything() ~ NA_character_)
Characteristic Mean (SD) 95% CI for Mean
Age 47 (14) 45, 49
Marker Level (ng/mL) 0.92 (0.86) 0.79, 1.04

It’s often needed to summarize a continuous by two categorical variables. The code below has been wrapped into a single-line solution in bstfun::tbl_2way_summary(trial, grade, trt, marker)

trial %>%
  # nest data within tumor grade
  select(trt, grade, marker) %>%
  arrange(grade) %>%
  nest(data = -grade) %>%
  # build tbl_summary within each grade
  rowwise() %>%
  mutate(
    tbl = 
      data %>%
      # build summary table, and use the grade level as the label
      tbl_summary(by = trt, label = list(marker = as.character(grade)), missing = "no") %>%
      modify_header(list(label ~ "**Tumor Grade**", all_stat_cols() ~ "{level}")) %>%
      list()
  ) %>%
  # stack tbl_summary tables to create final tbl
  pull(tbl) %>%
  tbl_stack() %>%
  modify_spanning_header(all_stat_cols() ~ "**Treatment Assignment**")
Tumor Grade Treatment Assignment
Drug A1 Drug B1
I 0.96 (0.24, 1.70) 1.05 (0.29, 1.49)
II 0.66 (0.31, 1.23) 0.21 (0.10, 0.94)
III 0.84 (0.16, 1.91) 0.58 (0.35, 1.36)

1 Median (IQR)


Build a summary table stratified by more than one variable.

trial %>%
  select(trt, grade, age, stage) %>%
  mutate(grade = paste("Grade", grade)) %>%
  tbl_strata(
    strata = grade, 
    ~.x %>%
      tbl_summary(by = trt, missing = "no") %>%
      modify_header(all_stat_cols() ~ "**{level}**")
  )
Characteristic Grade I Grade II Grade III
Drug A1 Drug B1 Drug A1 Drug B1 Drug A1 Drug B1
Age 46 (36, 60) 48 (42, 55) 44 (31, 54) 50 (43, 57) 52 (42, 60) 45 (36, 52)
T Stage
T1 8 (23%) 9 (27%) 14 (44%) 9 (25%) 6 (19%) 7 (21%)
T2 8 (23%) 10 (30%) 8 (25%) 9 (25%) 9 (29%) 10 (30%)
T3 11 (31%) 7 (21%) 5 (16%) 6 (17%) 6 (19%) 8 (24%)
T4 8 (23%) 7 (21%) 5 (16%) 12 (33%) 10 (32%) 8 (24%)

1 Median (IQR); n (%)


Regression Tables

Include number of observations and the number of events in a univariate regression table.

trial %>%
  select(response, age, grade) %>%
  tbl_uvregression(
    method = glm,
    y = response, 
    method.args = list(family = binomial),
    exponentiate = TRUE
  ) %>%
  add_nevent()
Characteristic N Event N OR1 95% CI1 p-value
Age 183 58 1.02 1.00, 1.04 0.10
Grade 193 61
I
II 0.95 0.45, 2.00 0.9
III 1.10 0.52, 2.29 0.8

1 OR = Odds Ratio, CI = Confidence Interval


Include two related models side-by-side with descriptive statistics. We also use the compact table theme that reduces cell padding and font size.

gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>%
  tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>%
  tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("trt", "grade")] %>% 
  tbl_summary(missing = "no") %>% 
  add_n() %>%
  modify_header(stat_0 ~ "**n (%)**") %>%
  modify_footnote(stat_0 ~ NA_character_)

theme_gtsummary_compact()
#> Setting theme `Compact`
tbl_merge(
  list(gt_t1, gt_r1, gt_r2),
  tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
)
Characteristic N n (%) Tumor Response Time to Death
OR1 95% CI1 p-value HR1 95% CI1 p-value
Chemotherapy Treatment 200
Drug A 98 (49%)
Drug B 102 (51%) 1.21 0.66, 2.24 0.5 1.25 0.86, 1.81 0.2
Grade 200
I 68 (34%)
II 68 (34%) 0.94 0.44, 1.98 0.9 1.28 0.80, 2.06 0.3
III 64 (32%) 1.09 0.52, 2.27 0.8 1.69 1.07, 2.66 0.024

1 OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio


Include the number of events at each level of a categorical predictor.

trial %>%
  select(ttdeath, death, stage, grade) %>%
  tbl_uvregression(
    method = coxph,
    y = Surv(ttdeath, death), 
    exponentiate = TRUE,
    hide_n = TRUE
  ) %>%
  add_nevent(location = "level")
Characteristic Event N HR1 95% CI1 p-value
T Stage
T1 24
T2 27 1.18 0.68, 2.04 0.6
T3 22 1.23 0.69, 2.20 0.5
T4 39 2.48 1.49, 4.14 <0.001
Grade
I 33
II 36 1.28 0.80, 2.05 0.3
III 43 1.69 1.07, 2.66 0.024

1 HR = Hazard Ratio, CI = Confidence Interval


Regression model where the covariate remains the same, and the outcome changes.

trial %>%
  select(age, marker, trt) %>%
  tbl_uvregression(
    method = lm,
    x = trt,
    show_single_row = "trt",
    hide_n = TRUE
  ) %>%
  modify_header(list(
    label ~"**Model Outcome**",
    estimate ~ "**Treatment Coef.**"
  )) %>%
  modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug B group.")
Model Outcome Treatment Coef.1 95% CI2 p-value
Age 0.44 -3.7, 4.6 0.8
Marker Level (ng/mL) -0.20 -0.44, 0.05 0.12

1 Values larger than 0 indicate larger values in the Drug B group.

2 CI = Confidence Interval


Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default().

my_tidy <- function(x, exponentiate =  FALSE, conf.level = 0.95, ...) {
  dplyr::bind_cols(
    broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
    # calculate the confidence intervals, and save them in a tibble
    stats::confint.default(x) %>%
      tibble::as_tibble() %>%
      rlang::set_names(c("conf.low", "conf.high"))  )
}

lm(age ~ grade + marker, trial) %>%
  tbl_regression(tidy_fun = my_tidy)
Characteristic Beta 95% CI1 p-value
Grade
I
II 0.64 -4.6, 5.9 0.8
III 2.4 -2.8, 7.6 0.4
Marker Level (ng/mL) -0.04 -2.6, 2.5 >0.9

1 CI = Confidence Interval


Use significance stars on estimates with low p-values.

trial %>%
  select(ttdeath, death, stage, grade) %>%
  tbl_uvregression(
    method = coxph,
    y = Surv(ttdeath, death), 
    exponentiate = TRUE,
  ) %>%
  add_significance_stars()
Characteristic N HR1,2 SE2
T Stage 200
T1
T2 1.18 0.281
T3 1.23 0.295
T4 2.48*** 0.260
Grade 200
I
II 1.28 0.241
III 1.69* 0.232

1 *p<0.05; **p<0.01; ***p<0.001

2 HR = Hazard Ratio, SE = Standard Error