This code summarizes demographic information across studies and for each study separately.

Individual demographic data is not shared publicly due to concerns related to potential identifiability of participants, but is available upon request.

prep data

load packages

if (!require(tidyverse)) {
  install.packages('tidyverse')
}
if (!require(kableExtra)) {
  install.packages('kableExtra')
}
if (!require(usmap)) {
  install.packages('usmap')
}

define aesthetics

palette = c("#3B9AB2", "#EBCC2A", "#F21A00")

load and merge data

# demographic data
study1 = read.csv("../data/study1_demo.csv", stringsAsFactors = FALSE)

study2 = read.csv("../data/study2_demo.csv", stringsAsFactors = FALSE)

study3 = read.csv("../data/study3_demo.csv", stringsAsFactors = FALSE)

study4 = read.csv("../data/study4_demo.csv", stringsAsFactors = FALSE)

study5 = read.csv("../data/study5_demo.csv", stringsAsFactors = FALSE)

study6 = read.csv("../data/study6_demo.csv", stringsAsFactors = FALSE)

# merge
merged = bind_rows(study1, study2, study3, study4, study5 ,study6)

# tidy
demo = merged %>%
  mutate(value = ifelse(value == "Would rather not say", "Prefer not to say", value),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  group_by(study, item, value) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  filter(!item %in% c("gender_4_TEXT", "race_self")) %>%
  rename(" " = item)

geographic distribution

by study

study 1

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  filter(study == "study 1") %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 50)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

study 5

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  filter(study == "study 5") %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 10)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

study 6

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  filter(study == "study 6") %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 10)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

combined

states = merged %>%
  filter(grepl("state", item)) %>%
  spread(item, value) %>%
  group_by(state) %>%
  summarize(n = n())

states %>%
  usmap::plot_usmap(data = ., values = "n", color = "grey50") +
  scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
                       name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 50)) +
  theme(text = element_text(size = 12, family = "Futura Medium"),
        legend.position = "right")

age

by study

merged %>%
  filter(item == "age") %>%
  mutate(value = as.numeric(value)) %>%
  group_by(study) %>%
  summarize(`age range` = sprintf("%s - %s", min(value, na.rm = TRUE), max(value, na.rm = TRUE)),
            `mean age` = mean(value, na.rm = TRUE),
            `sd age` = sd(value, na.rm = TRUE)) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

study	age range	mean age	sd age
study 1	18 - 78	38.0	12.3
study 2	19 - 81	38.8	11.8
study 4	19 - 73	38.6	13.6
study 5	21 - 70	37.7	10.8
study 6	20 - 71	38.4	10.7

combined

merged %>%
  filter(item == "age") %>%
  mutate(value = as.numeric(value)) %>%
  summarize(`age range` = sprintf("%s - %s", min(value, na.rm = TRUE), max(value, na.rm = TRUE)),
            `mean age` = mean(value, na.rm = TRUE),
            `sd age` = sd(value, na.rm = TRUE)) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

age range	mean age	sd age
18 - 81	38.1	12

student year

demo %>%
  filter(` ` == "student_grade") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(-n, -total, -` `) %>%
  spread(study, percent) %>%
  mutate(value = factor(value, c("First year", "Second year", "Third year",
                                 "Fourth year", "Fifth year or higher"))) %>%
  arrange(value) %>%
  kable(digits = 1) %>%
    kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

value	study 3
First year	30.2%
Second year	33.5%
Third year	19.8%
Fourth year	15.7%
Fifth year or higher	0.8%

gender

by study

demo %>%
  filter(` ` == "gender") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(-n, -total, -` `) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
    kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

value	study 1	study 2	study 3	study 4	study 5	study 6
Man	50.9%	53.4%	44.0%	58.3%	61.3%	56.2%
Non-binary / third gender	—	0.5%	2.0%	—	0.3%	—
Not reported	—	0.2%	—	—	—	—
Other	0.3%	—	—	—	—	—
Prefer not to say	0.3%	0.4%	0.8%	—	1.0%	0.5%
Woman	48.4%	45.5%	53.2%	41.7%	37.5%	43.3%

combined

demo %>%
  filter(` ` == "gender") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(-n, -total) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
    kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

value	percent
Man	52.5%
Non-binary / third gender	0.2%
Not reported	0.0%
Other	0.2%
Prefer not to say	0.4%
Woman	46.6%

race and ethnicity

by study

hispanic_latinx = demo %>%
  filter(` ` == "Hispanic or Latinx") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  select(-n, -total) %>%
  spread(value, percent) %>%
  select(study, Yes) %>%
  rename("percent" = Yes) %>%
  mutate(`race / ethnicity` = "Hispanic or Latinx")

demo %>%
  filter(` ` == "race") %>%
  filter(!value %in% c("Hispanic", "Latino")) %>% #counted already in the hispanic_latinx item
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(study, value, percent) %>%
  rename("race / ethnicity" = value) %>%
  bind_rows(., hispanic_latinx) %>%
  arrange(`race / ethnicity`) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

race / ethnicity	study 1	study 2	study 3	study 5	study 6
American Indian or Alaskan Native	0.7%	0.9%	0.8%	1.6%	—
Asian	8.3%	8.4%	24.6%	4.1%	4.5%
Black or African American	11.1%	8.2%	9.7%	14.6%	8.3%
Hispanic or Latinx	10.3%	13.3%	12.1%	16.5%	10.6%
More than one race	—	0.9%	6.0%	0.3%	2.5%
Native Hawaiian or Other Pacific Islander	0.2%	—	—	—	—
Not reported	—	0.2%	—	—	—
Other	2.6%	—	—	—	—
Prefer not to say	—	0.7%	4.0%	0.6%	1.5%
White	77.0%	80.6%	54.8%	78.7%	83.1%

combined

hispanic_latinx = demo %>%
  filter(` ` == "Hispanic or Latinx") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  select(-n, -total) %>%
  spread(value, percent) %>%
  select(Yes) %>%
  rename("percent" = Yes) %>%
  mutate(`race / ethnicity` = "Hispanic or Latinx")

demo %>%
  filter(` ` == "race") %>%
  filter(!value %in% c("Hispanic", "Latino")) %>% #counted already in the hispanic_latinx item
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  ungroup() %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%"),
         value = ifelse(is.na(value), "Not reported", value)) %>%
  select(value, percent) %>%
  rename("race / ethnicity" = value) %>%
  bind_rows(., hispanic_latinx) %>%
  arrange(`race / ethnicity`) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

race / ethnicity	percent
American Indian or Alaskan Native	0.8%
Asian	8.6%
Black or African American	10.6%
Hispanic or Latinx	11.5%
More than one race	0.9%
Native Hawaiian or Other Pacific Islander	0.1%
Not reported	0.0%
Other	1.5%
Prefer not to say	0.6%
White	76.9%

education

by study

demo %>%
  filter(` ` == "highest degree completed") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = factor(value, levels = c("Less than high school", "High school graduate (diploma)", "High school graduate (GED)", 
                                          "Some college (1-4 years, no degree)", "Associate's degree (including occupational or academic degrees)",
                                          "Bachelor's degree (BA, BS, etc)", "Master's degree (MA, MS, MENG, MSW, etc)",
                                          "Professional school degree (MD, DDC, JD, etc)", "Doctorate degree (PhD, EdD, etc)"))) %>%
  arrange(value) %>%
  select(study, value, percent) %>%
  rename("highest degree completed" = value) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

highest degree completed	study 1	study 5	study 6
Less than high school	0.2%	—	—
High school graduate (diploma)	5.7%	4.8%	8.6%
High school graduate (GED)	3.1%	1.6%	1.8%
Some college (1-4 years, no degree)	17.3%	11.7%	13.4%
Associate’s degree (including occupational or academic degrees)	10.5%	6.7%	9.6%
Bachelor’s degree (BA, BS, etc)	44.2%	52.1%	52.6%
Master’s degree (MA, MS, MENG, MSW, etc)	15.6%	21.3%	12.6%
Professional school degree (MD, DDC, JD, etc)	1.6%	0.6%	1.3%
Doctorate degree (PhD, EdD, etc)	1.9%	1.3%	0.3%

combined

demo %>%
  filter(` ` == "highest degree completed") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = factor(value, levels = c("Less than high school", "High school graduate (diploma)", "High school graduate (GED)", 
                                          "Some college (1-4 years, no degree)", "Associate's degree (including occupational or academic degrees)",
                                          "Bachelor's degree (BA, BS, etc)", "Master's degree (MA, MS, MENG, MSW, etc)",
                                          "Professional school degree (MD, DDC, JD, etc)", "Doctorate degree (PhD, EdD, etc)"))) %>%
  arrange(value) %>%
  select(value, percent) %>%
  rename("highest degree completed" = value) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

highest degree completed	percent
Less than high school	0.2%
High school graduate (diploma)	6.0%
High school graduate (GED)	2.7%
Some college (1-4 years, no degree)	16.1%
Associate’s degree (including occupational or academic degrees)	9.9%
Bachelor’s degree (BA, BS, etc)	46.3%
Master’s degree (MA, MS, MENG, MSW, etc)	15.8%
Professional school degree (MD, DDC, JD, etc)	1.4%
Doctorate degree (PhD, EdD, etc)	1.6%

SES

by study

demo %>%
  filter(` ` == "household income") %>%
  group_by(study) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = ifelse(is.na(value), "Not reported", value),
         value = factor(value, levels = c("Less than $5,000", "$5,000 through $11,999", "$12,000 through $15,999", "$16,000 through $24,999", 
                                          "$25,000 through $34,999", "$35,000 through $49,999", "$50,000 through $74,999", "$75,000 through $99,999",
                                          "$100,000 and greater", "Not reported"))) %>%
  arrange(value) %>%
  select(study, value, percent) %>%
  rename("household income" = value) %>%
  spread(study, percent) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

household income	study 1	study 5	study 6
Less than $5,000	1.3%	0.6%	1.5%
$5,000 through $11,999	2.8%	5.1%	3.5%
$12,000 through $15,999	2.6%	4.4%	2.3%
$16,000 through $24,999	6.0%	8.9%	7.3%
$25,000 through $34,999	10.1%	9.8%	10.1%
$35,000 through $49,999	13.6%	20.6%	18.9%
$50,000 through $74,999	25.3%	27.3%	31.7%
$75,000 through $99,999	17.9%	15.2%	12.6%
$100,000 and greater	18.5%	7.3%	10.8%
Not reported	1.9%	0.6%	1.3%

combined

demo %>%
  filter(` ` == "household income") %>%
  group_by(value) %>%
  summarize(n = sum(n)) %>%
  mutate(total = sum(n),
         percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
  mutate(value = ifelse(is.na(value), "Not reported", value),
         value = factor(value, levels = c("Less than $5,000", "$5,000 through $11,999", "$12,000 through $15,999", "$16,000 through $24,999", 
                                          "$25,000 through $34,999", "$35,000 through $49,999", "$50,000 through $74,999", "$75,000 through $99,999",
                                          "$100,000 and greater", "Not reported"))) %>%
  arrange(value) %>%
  select(value, percent) %>%
  rename("household income" = value) %>%
  mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
  kable(digits = 1) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")

household income	percent
Less than $5,000	1.3%
$5,000 through $11,999	3.2%
$12,000 through $15,999	2.8%
$16,000 through $24,999	6.5%
$25,000 through $34,999	10.1%
$35,000 through $49,999	15.1%
$50,000 through $74,999	26.5%
$75,000 through $99,999	16.8%
$100,000 and greater	16.1%
Not reported	1.6%

Demographics

2021-09-07

prep data

load packages

define aesthetics

load and merge data

geographic distribution

by study

study 1

study 5

study 6

combined

age

by study

combined

student year

gender

by study

combined

race and ethnicity

by study

combined

education

by study

combined

SES

by study

combined