This code summarizes demographic information across studies and for each study separately.

Individual demographic data is not shared publicly due to concerns related to potential identifiability of participants, but is available upon request.

prep data

load packages

if (!require(tidyverse)) {
  install.packages('tidyverse')
}
if (!require(kableExtra)) {
  install.packages('kableExtra')
}
if (!require(usmap)) {
  install.packages('usmap')
}

define aesthetics

palette = c("#3B9AB2", "#EBCC2A", "#F21A00")

load and merge data

# demographic data
study1 = read.csv("../data/study1_demo.csv", stringsAsFactors = FALSE)

study2 = read.csv("../data/study2_demo.csv", stringsAsFactors = FALSE)

study3 = read.csv("../data/study3_demo.csv", stringsAsFactors = FALSE)

study4 = read.csv("../data/study4_demo.csv", stringsAsFactors = FALSE)

study5 = read.csv("../data/study5_demo.csv", stringsAsFactors = FALSE)

study6 = read.csv("../data/study6_demo.csv", stringsAsFactors = FALSE)

# merge
merged = bind_rows(study1, study2, study3, study4, study5 ,study6)

# tidy
demo = merged %>%
  mutate(value = ifelse(value == "Would rather not say", "Prefer not to say", value),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  group_by(study, item, value) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  filter(!item %in% c("gender_4_TEXT", "race_self")) %>%
  rename(" " = item)

geographic distribution

by study

study 1

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  filter(study == "study 1") %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 50)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

study 5

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  filter(study == "study 5") %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 10)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

study 6

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  filter(study == "study 6") %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 10)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

combined

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 50)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

age

by study

merged %>%
  filter(item == "age") %>%
  mutate(value = as.numeric(value)) %>%
  group_by(study) %>%
  summarize(`age range` = sprintf("%s - %s", min(value, na.rm = TRUE), max(value, na.rm = TRUE)),
            `mean age` = mean(value, na.rm = TRUE),
            `sd age` = sd(value, na.rm = TRUE)) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
study age range mean age sd age
study 1 18 - 78 38.0 12.3
study 2 19 - 81 38.8 11.8
study 4 19 - 73 38.6 13.6
study 5 21 - 70 37.7 10.8
study 6 20 - 71 38.4 10.7

combined

merged %>%
  filter(item == "age") %>%
  mutate(value = as.numeric(value)) %>%
  summarize(`age range` = sprintf("%s - %s", min(value, na.rm = TRUE), max(value, na.rm = TRUE)),
            `mean age` = mean(value, na.rm = TRUE),
            `sd age` = sd(value, na.rm = TRUE)) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
age range mean age sd age
18 - 81 38.1 12

student year

demo %>%
  filter(` ` == "student_grade") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(-n, -total, -` `) %>%
  spread(study, percent) %>%
  mutate(value = factor(value, c("First year", "Second year", "Third year",
                                 "Fourth year", "Fifth year or higher"))) %>%
  arrange(value) %>%
  kable(digits = 1) %>%
    kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
value study 3
First year 30.2%
Second year 33.5%
Third year 19.8%
Fourth year 15.7%
Fifth year or higher 0.8%

gender

by study

demo %>%
  filter(` ` == "gender") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(-n, -total, -` `) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
    kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
value study 1 study 2 study 3 study 4 study 5 study 6
Man 50.9% 53.4% 44.0% 58.3% 61.3% 56.2%
Non-binary / third gender 0.5% 2.0% 0.3%
Not reported 0.2%
Other 0.3%
Prefer not to say 0.3% 0.4% 0.8% 1.0% 0.5%
Woman 48.4% 45.5% 53.2% 41.7% 37.5% 43.3%

combined

demo %>%
  filter(` ` == "gender") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(-n, -total) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
    kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
value percent
Man 52.5%
Non-binary / third gender 0.2%
Not reported 0.0%
Other 0.2%
Prefer not to say 0.4%
Woman 46.6%

race and ethnicity

by study

hispanic_latinx = demo %>%
  filter(` ` == "Hispanic or Latinx") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  select(-n, -total) %>%
  spread(value, percent) %>%
  select(study, Yes) %>%
  rename("percent" = Yes) %>%
  mutate(`race / ethnicity` = "Hispanic or Latinx")

demo %>%
  filter(` ` == "race") %>%
  filter(!value %in% c("Hispanic", "Latino")) %>% #counted already in the hispanic_latinx item
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(study, value, percent) %>%
  rename("race / ethnicity" = value) %>%
  bind_rows(., hispanic_latinx) %>%
  arrange(`race / ethnicity`) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
race / ethnicity study 1 study 2 study 3 study 5 study 6
American Indian or Alaskan Native 0.7% 0.9% 0.8% 1.6%
Asian 8.3% 8.4% 24.6% 4.1% 4.5%
Black or African American 11.1% 8.2% 9.7% 14.6% 8.3%
Hispanic or Latinx 10.3% 13.3% 12.1% 16.5% 10.6%
More than one race 0.9% 6.0% 0.3% 2.5%
Native Hawaiian or Other Pacific Islander 0.2%
Not reported 0.2%
Other 2.6%
Prefer not to say 0.7% 4.0% 0.6% 1.5%
White 77.0% 80.6% 54.8% 78.7% 83.1%

combined

hispanic_latinx = demo %>%
  filter(` ` == "Hispanic or Latinx") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  select(-n, -total) %>%
  spread(value, percent) %>%
  select(Yes) %>%
  rename("percent" = Yes) %>%
  mutate(`race / ethnicity` = "Hispanic or Latinx")

demo %>%
  filter(` ` == "race") %>%
  filter(!value %in% c("Hispanic", "Latino")) %>% #counted already in the hispanic_latinx item
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  ungroup() %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(value, percent) %>%
  rename("race / ethnicity" = value) %>%
  bind_rows(., hispanic_latinx) %>%
  arrange(`race / ethnicity`) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
race / ethnicity percent
American Indian or Alaskan Native 0.8%
Asian 8.6%
Black or African American 10.6%
Hispanic or Latinx 11.5%
More than one race 0.9%
Native Hawaiian or Other Pacific Islander 0.1%
Not reported 0.0%
Other 1.5%
Prefer not to say 0.6%
White 76.9%

education

by study

demo %>%
  filter(` ` == "highest degree completed") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = factor(value, levels = c("Less than high school", "High school graduate (diploma)", "High school graduate (GED)", 
                                          "Some college (1-4 years, no degree)", "Associate's degree (including occupational or academic degrees)",
                                          "Bachelor's degree (BA, BS, etc)", "Master's degree (MA, MS, MENG, MSW, etc)",
                                          "Professional school degree (MD, DDC, JD, etc)", "Doctorate degree (PhD, EdD, etc)"))) %>%
  arrange(value) %>%
  select(study, value, percent) %>%
  rename("highest degree completed" = value) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
highest degree completed study 1 study 5 study 6
Less than high school 0.2%
High school graduate (diploma) 5.7% 4.8% 8.6%
High school graduate (GED) 3.1% 1.6% 1.8%
Some college (1-4 years, no degree) 17.3% 11.7% 13.4%
Associate’s degree (including occupational or academic degrees) 10.5% 6.7% 9.6%
Bachelor’s degree (BA, BS, etc) 44.2% 52.1% 52.6%
Master’s degree (MA, MS, MENG, MSW, etc) 15.6% 21.3% 12.6%
Professional school degree (MD, DDC, JD, etc) 1.6% 0.6% 1.3%
Doctorate degree (PhD, EdD, etc) 1.9% 1.3% 0.3%

combined

demo %>%
  filter(` ` == "highest degree completed") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = factor(value, levels = c("Less than high school", "High school graduate (diploma)", "High school graduate (GED)", 
                                          "Some college (1-4 years, no degree)", "Associate's degree (including occupational or academic degrees)",
                                          "Bachelor's degree (BA, BS, etc)", "Master's degree (MA, MS, MENG, MSW, etc)",
                                          "Professional school degree (MD, DDC, JD, etc)", "Doctorate degree (PhD, EdD, etc)"))) %>%
  arrange(value) %>%
  select(value, percent) %>%
  rename("highest degree completed" = value) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
highest degree completed percent
Less than high school 0.2%
High school graduate (diploma) 6.0%
High school graduate (GED) 2.7%
Some college (1-4 years, no degree) 16.1%
Associate’s degree (including occupational or academic degrees) 9.9%
Bachelor’s degree (BA, BS, etc) 46.3%
Master’s degree (MA, MS, MENG, MSW, etc) 15.8%
Professional school degree (MD, DDC, JD, etc) 1.4%
Doctorate degree (PhD, EdD, etc) 1.6%

SES

by study

demo %>%
  filter(` ` == "household income") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = ifelse(is.na(value), "Not reported", value),
         value = factor(value, levels = c("Less than $5,000", "$5,000 through $11,999", "$12,000 through $15,999", "$16,000 through $24,999", 
                                          "$25,000 through $34,999", "$35,000 through $49,999", "$50,000 through $74,999", "$75,000 through $99,999",
                                          "$100,000 and greater", "Not reported"))) %>%
  arrange(value) %>%
  select(study, value, percent) %>%
  rename("household income" = value) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
household income study 1 study 5 study 6
Less than $5,000 1.3% 0.6% 1.5%
$5,000 through $11,999 2.8% 5.1% 3.5%
$12,000 through $15,999 2.6% 4.4% 2.3%
$16,000 through $24,999 6.0% 8.9% 7.3%
$25,000 through $34,999 10.1% 9.8% 10.1%
$35,000 through $49,999 13.6% 20.6% 18.9%
$50,000 through $74,999 25.3% 27.3% 31.7%
$75,000 through $99,999 17.9% 15.2% 12.6%
$100,000 and greater 18.5% 7.3% 10.8%
Not reported 1.9% 0.6% 1.3%

combined

demo %>%
  filter(` ` == "household income") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = ifelse(is.na(value), "Not reported", value),
         value = factor(value, levels = c("Less than $5,000", "$5,000 through $11,999", "$12,000 through $15,999", "$16,000 through $24,999", 
                                          "$25,000 through $34,999", "$35,000 through $49,999", "$50,000 through $74,999", "$75,000 through $99,999",
                                          "$100,000 and greater", "Not reported"))) %>%
  arrange(value) %>%
  select(value, percent) %>%
  rename("household income" = value) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
household income percent
Less than $5,000 1.3%
$5,000 through $11,999 3.2%
$12,000 through $15,999 2.8%
$16,000 through $24,999 6.5%
$25,000 through $34,999 10.1%
$35,000 through $49,999 15.1%
$50,000 through $74,999 26.5%
$75,000 through $99,999 16.8%
$100,000 and greater 16.1%
Not reported 1.6%