This code summarizes demographic information across studies and for each study separately.
Individual demographic data is not shared publicly due to concerns related to potential identifiability of participants, but is available upon request.
# demographic data
study1 = read.csv("../data/study1_demo.csv", stringsAsFactors = FALSE)
study2 = read.csv("../data/study2_demo.csv", stringsAsFactors = FALSE)
study3 = read.csv("../data/study3_demo.csv", stringsAsFactors = FALSE)
study4 = read.csv("../data/study4_demo.csv", stringsAsFactors = FALSE)
study5 = read.csv("../data/study5_demo.csv", stringsAsFactors = FALSE)
study6 = read.csv("../data/study6_demo.csv", stringsAsFactors = FALSE)
# merge
merged = bind_rows(study1, study2, study3, study4, study5 ,study6)
# tidy
demo = merged %>%
mutate(value = ifelse(value == "Would rather not say", "Prefer not to say", value),
value = ifelse(is.na(value), "Not reported", value)) %>%
group_by(study, item, value) %>%
summarize(n = n()) %>%
ungroup() %>%
filter(!item %in% c("gender_4_TEXT", "race_self")) %>%
rename(" " = item)
states = merged %>%
filter(grepl("state", item)) %>%
spread(item, value) %>%
filter(study == "study 1") %>%
group_by(state) %>%
summarize(n = n())
states %>%
usmap::plot_usmap(data = ., values = "n", color = "grey50") +
scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 50)) +
theme(text = element_text(size = 12, family = "Futura Medium"),
legend.position = "right")
states = merged %>%
filter(grepl("state", item)) %>%
spread(item, value) %>%
filter(study == "study 5") %>%
group_by(state) %>%
summarize(n = n())
states %>%
usmap::plot_usmap(data = ., values = "n", color = "grey50") +
scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 10)) +
theme(text = element_text(size = 12, family = "Futura Medium"),
legend.position = "right")
states = merged %>%
filter(grepl("state", item)) %>%
spread(item, value) %>%
filter(study == "study 6") %>%
group_by(state) %>%
summarize(n = n())
states %>%
usmap::plot_usmap(data = ., values = "n", color = "grey50") +
scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 10)) +
theme(text = element_text(size = 12, family = "Futura Medium"),
legend.position = "right")
states = merged %>%
filter(grepl("state", item)) %>%
spread(item, value) %>%
group_by(state) %>%
summarize(n = n())
states %>%
usmap::plot_usmap(data = ., values = "n", color = "grey50") +
scale_fill_gradient2(low = palette[1], mid = palette[2], midpoint = max(states$n) / 2, high = palette[3],
name = "", na.value = NA, limits = c(0, max(states$n)), breaks = seq(0, max(states$n), 50)) +
theme(text = element_text(size = 12, family = "Futura Medium"),
legend.position = "right")
merged %>%
filter(item == "age") %>%
mutate(value = as.numeric(value)) %>%
group_by(study) %>%
summarize(`age range` = sprintf("%s - %s", min(value, na.rm = TRUE), max(value, na.rm = TRUE)),
`mean age` = mean(value, na.rm = TRUE),
`sd age` = sd(value, na.rm = TRUE)) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
study | age range | mean age | sd age |
---|---|---|---|
study 1 | 18 - 78 | 38.0 | 12.3 |
study 2 | 19 - 81 | 38.8 | 11.8 |
study 4 | 19 - 73 | 38.6 | 13.6 |
study 5 | 21 - 70 | 37.7 | 10.8 |
study 6 | 20 - 71 | 38.4 | 10.7 |
merged %>%
filter(item == "age") %>%
mutate(value = as.numeric(value)) %>%
summarize(`age range` = sprintf("%s - %s", min(value, na.rm = TRUE), max(value, na.rm = TRUE)),
`mean age` = mean(value, na.rm = TRUE),
`sd age` = sd(value, na.rm = TRUE)) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
age range | mean age | sd age |
---|---|---|
18 - 81 | 38.1 | 12 |
demo %>%
filter(` ` == "student_grade") %>%
group_by(study) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%"),
value = ifelse(is.na(value), "Not reported", value)) %>%
select(-n, -total, -` `) %>%
spread(study, percent) %>%
mutate(value = factor(value, c("First year", "Second year", "Third year",
"Fourth year", "Fifth year or higher"))) %>%
arrange(value) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
value | study 3 |
---|---|
First year | 30.2% |
Second year | 33.5% |
Third year | 19.8% |
Fourth year | 15.7% |
Fifth year or higher | 0.8% |
demo %>%
filter(` ` == "gender") %>%
group_by(study) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%"),
value = ifelse(is.na(value), "Not reported", value)) %>%
select(-n, -total, -` `) %>%
spread(study, percent) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
value | study 1 | study 2 | study 3 | study 4 | study 5 | study 6 |
---|---|---|---|---|---|---|
Man | 50.9% | 53.4% | 44.0% | 58.3% | 61.3% | 56.2% |
Non-binary / third gender | — | 0.5% | 2.0% | — | 0.3% | — |
Not reported | — | 0.2% | — | — | — | — |
Other | 0.3% | — | — | — | — | — |
Prefer not to say | 0.3% | 0.4% | 0.8% | — | 1.0% | 0.5% |
Woman | 48.4% | 45.5% | 53.2% | 41.7% | 37.5% | 43.3% |
demo %>%
filter(` ` == "gender") %>%
group_by(value) %>%
summarize(n = sum(n)) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%"),
value = ifelse(is.na(value), "Not reported", value)) %>%
select(-n, -total) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
value | percent |
---|---|
Man | 52.5% |
Non-binary / third gender | 0.2% |
Not reported | 0.0% |
Other | 0.2% |
Prefer not to say | 0.4% |
Woman | 46.6% |
hispanic_latinx = demo %>%
filter(` ` == "Hispanic or Latinx") %>%
group_by(study) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
select(-n, -total) %>%
spread(value, percent) %>%
select(study, Yes) %>%
rename("percent" = Yes) %>%
mutate(`race / ethnicity` = "Hispanic or Latinx")
demo %>%
filter(` ` == "race") %>%
filter(!value %in% c("Hispanic", "Latino")) %>% #counted already in the hispanic_latinx item
group_by(study) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%"),
value = ifelse(is.na(value), "Not reported", value)) %>%
select(study, value, percent) %>%
rename("race / ethnicity" = value) %>%
bind_rows(., hispanic_latinx) %>%
arrange(`race / ethnicity`) %>%
spread(study, percent) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
race / ethnicity | study 1 | study 2 | study 3 | study 5 | study 6 |
---|---|---|---|---|---|
American Indian or Alaskan Native | 0.7% | 0.9% | 0.8% | 1.6% | — |
Asian | 8.3% | 8.4% | 24.6% | 4.1% | 4.5% |
Black or African American | 11.1% | 8.2% | 9.7% | 14.6% | 8.3% |
Hispanic or Latinx | 10.3% | 13.3% | 12.1% | 16.5% | 10.6% |
More than one race | — | 0.9% | 6.0% | 0.3% | 2.5% |
Native Hawaiian or Other Pacific Islander | 0.2% | — | — | — | — |
Not reported | — | 0.2% | — | — | — |
Other | 2.6% | — | — | — | — |
Prefer not to say | — | 0.7% | 4.0% | 0.6% | 1.5% |
White | 77.0% | 80.6% | 54.8% | 78.7% | 83.1% |
hispanic_latinx = demo %>%
filter(` ` == "Hispanic or Latinx") %>%
group_by(value) %>%
summarize(n = sum(n)) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
select(-n, -total) %>%
spread(value, percent) %>%
select(Yes) %>%
rename("percent" = Yes) %>%
mutate(`race / ethnicity` = "Hispanic or Latinx")
demo %>%
filter(` ` == "race") %>%
filter(!value %in% c("Hispanic", "Latino")) %>% #counted already in the hispanic_latinx item
group_by(value) %>%
summarize(n = sum(n)) %>%
ungroup() %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%"),
value = ifelse(is.na(value), "Not reported", value)) %>%
select(value, percent) %>%
rename("race / ethnicity" = value) %>%
bind_rows(., hispanic_latinx) %>%
arrange(`race / ethnicity`) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
race / ethnicity | percent |
---|---|
American Indian or Alaskan Native | 0.8% |
Asian | 8.6% |
Black or African American | 10.6% |
Hispanic or Latinx | 11.5% |
More than one race | 0.9% |
Native Hawaiian or Other Pacific Islander | 0.1% |
Not reported | 0.0% |
Other | 1.5% |
Prefer not to say | 0.6% |
White | 76.9% |
demo %>%
filter(` ` == "highest degree completed") %>%
group_by(study) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
mutate(value = factor(value, levels = c("Less than high school", "High school graduate (diploma)", "High school graduate (GED)",
"Some college (1-4 years, no degree)", "Associate's degree (including occupational or academic degrees)",
"Bachelor's degree (BA, BS, etc)", "Master's degree (MA, MS, MENG, MSW, etc)",
"Professional school degree (MD, DDC, JD, etc)", "Doctorate degree (PhD, EdD, etc)"))) %>%
arrange(value) %>%
select(study, value, percent) %>%
rename("highest degree completed" = value) %>%
spread(study, percent) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
highest degree completed | study 1 | study 5 | study 6 |
---|---|---|---|
Less than high school | 0.2% | — | — |
High school graduate (diploma) | 5.7% | 4.8% | 8.6% |
High school graduate (GED) | 3.1% | 1.6% | 1.8% |
Some college (1-4 years, no degree) | 17.3% | 11.7% | 13.4% |
Associate’s degree (including occupational or academic degrees) | 10.5% | 6.7% | 9.6% |
Bachelor’s degree (BA, BS, etc) | 44.2% | 52.1% | 52.6% |
Master’s degree (MA, MS, MENG, MSW, etc) | 15.6% | 21.3% | 12.6% |
Professional school degree (MD, DDC, JD, etc) | 1.6% | 0.6% | 1.3% |
Doctorate degree (PhD, EdD, etc) | 1.9% | 1.3% | 0.3% |
demo %>%
filter(` ` == "highest degree completed") %>%
group_by(value) %>%
summarize(n = sum(n)) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
mutate(value = factor(value, levels = c("Less than high school", "High school graduate (diploma)", "High school graduate (GED)",
"Some college (1-4 years, no degree)", "Associate's degree (including occupational or academic degrees)",
"Bachelor's degree (BA, BS, etc)", "Master's degree (MA, MS, MENG, MSW, etc)",
"Professional school degree (MD, DDC, JD, etc)", "Doctorate degree (PhD, EdD, etc)"))) %>%
arrange(value) %>%
select(value, percent) %>%
rename("highest degree completed" = value) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
highest degree completed | percent |
---|---|
Less than high school | 0.2% |
High school graduate (diploma) | 6.0% |
High school graduate (GED) | 2.7% |
Some college (1-4 years, no degree) | 16.1% |
Associate’s degree (including occupational or academic degrees) | 9.9% |
Bachelor’s degree (BA, BS, etc) | 46.3% |
Master’s degree (MA, MS, MENG, MSW, etc) | 15.8% |
Professional school degree (MD, DDC, JD, etc) | 1.4% |
Doctorate degree (PhD, EdD, etc) | 1.6% |
demo %>%
filter(` ` == "household income") %>%
group_by(study) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
mutate(value = ifelse(is.na(value), "Not reported", value),
value = factor(value, levels = c("Less than $5,000", "$5,000 through $11,999", "$12,000 through $15,999", "$16,000 through $24,999",
"$25,000 through $34,999", "$35,000 through $49,999", "$50,000 through $74,999", "$75,000 through $99,999",
"$100,000 and greater", "Not reported"))) %>%
arrange(value) %>%
select(study, value, percent) %>%
rename("household income" = value) %>%
spread(study, percent) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
household income | study 1 | study 5 | study 6 |
---|---|---|---|
Less than $5,000 | 1.3% | 0.6% | 1.5% |
$5,000 through $11,999 | 2.8% | 5.1% | 3.5% |
$12,000 through $15,999 | 2.6% | 4.4% | 2.3% |
$16,000 through $24,999 | 6.0% | 8.9% | 7.3% |
$25,000 through $34,999 | 10.1% | 9.8% | 10.1% |
$35,000 through $49,999 | 13.6% | 20.6% | 18.9% |
$50,000 through $74,999 | 25.3% | 27.3% | 31.7% |
$75,000 through $99,999 | 17.9% | 15.2% | 12.6% |
$100,000 and greater | 18.5% | 7.3% | 10.8% |
Not reported | 1.9% | 0.6% | 1.3% |
demo %>%
filter(` ` == "household income") %>%
group_by(value) %>%
summarize(n = sum(n)) %>%
mutate(total = sum(n),
percent = sprintf("%.1f%s", (n / total) * 100, "%")) %>%
mutate(value = ifelse(is.na(value), "Not reported", value),
value = factor(value, levels = c("Less than $5,000", "$5,000 through $11,999", "$12,000 through $15,999", "$16,000 through $24,999",
"$25,000 through $34,999", "$35,000 through $49,999", "$50,000 through $74,999", "$75,000 through $99,999",
"$100,000 and greater", "Not reported"))) %>%
arrange(value) %>%
select(value, percent) %>%
rename("household income" = value) %>%
mutate_if(is.character, funs(ifelse(is.na(.), "—", .))) %>%
kable(digits = 1) %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
household income | percent |
---|---|
Less than $5,000 | 1.3% |
$5,000 through $11,999 | 3.2% |
$12,000 through $15,999 | 2.8% |
$16,000 through $24,999 | 6.5% |
$25,000 through $34,999 | 10.1% |
$35,000 through $49,999 | 15.1% |
$50,000 through $74,999 | 26.5% |
$75,000 through $99,999 | 16.8% |
$100,000 and greater | 16.1% |
Not reported | 1.6% |