In these analyses, we assess the robustness of the relationships from the mega-analysis using specification curve analysis.

prep data

First, we load the relevant packages and data, and define the plotting aesthetics.

load packages

if (!require(tidyverse)) {
  install.packages('tidyverse')
}
if (!require(lmerTest)) {
  install.packages('lmerTest')
}
if (!require(purrr)) {
  install.packages('purrr')
}
if (!require(furrr)) {
  install.packages('furrr')
}
if (!require(ggpubr)) {
  install.packages('ggpubr')
}
if (!require(combinat)) {
  install.packages('combinat')
}
devtools::install_github("dcosme/specr", ref = "plotmods")
if (!require(report)) {
  install.packages('report')
}
report::cite_packages()

##   - Alboukadel Kassambara (2020). ggpubr: 'ggplot2' Based Publication Ready Plots. R package version 0.4.0. https://CRAN.R-project.org/package=ggpubr
##   - Davis Vaughan and Matt Dancho (2021). furrr: Apply Mapping Functions in Parallel using Futures. R package version 0.2.3. https://CRAN.R-project.org/package=furrr
##   - Douglas Bates and Martin Maechler (2021). Matrix: Sparse and Dense Matrix Classes and Methods. R package version 1.3-4. https://CRAN.R-project.org/package=Matrix
##   - Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker (2015). Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
##   - H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.
##   - Hadley Wickham (2019). stringr: Simple, Consistent Wrappers for Common String Operations. R package version 1.4.0. https://CRAN.R-project.org/package=stringr
##   - Hadley Wickham (2021). forcats: Tools for Working with Categorical Variables (Factors). R package version 0.5.1. https://CRAN.R-project.org/package=forcats
##   - Hadley Wickham and Maximilian Girlich (2022). tidyr: Tidy Messy Data. R package version 1.2.0. https://CRAN.R-project.org/package=tidyr
##   - Hadley Wickham, Jim Hester and Jennifer Bryan (2022). readr: Read Rectangular Text Data. R package version 2.1.2. https://CRAN.R-project.org/package=readr
##   - Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2022). dplyr: A Grammar of Data Manipulation. R package version 1.0.8. https://CRAN.R-project.org/package=dplyr
##   - Henrik Bengtsson, A Unifying Framework for Parallel and Distributed Processing in R using Futures, The R Journal (2021) 13:2, pages 208-227, doi:10.32614/RJ-2021-048
##   - Kirill Müller and Hadley Wickham (2021). tibble: Simple Data Frames. R package version 3.1.6. https://CRAN.R-project.org/package=tibble
##   - Kuznetsova A, Brockhoff PB, Christensen RHB (2017). "lmerTest Package:Tests in Linear Mixed Effects Models." _Journal of StatisticalSoftware_, *82*(13), 1-26. doi: 10.18637/jss.v082.i13 (URL:https://doi.org/10.18637/jss.v082.i13).
##   - Lionel Henry and Hadley Wickham (2020). purrr: Functional Programming Tools. R package version 0.3.4. https://CRAN.R-project.org/package=purrr
##   - Makowski, D., Ben-Shachar, M.S., Patil, I. & Lüdecke, D. (2020). Automated Results Reporting as a Practical Tool to Improve Reproducibility and Methodological Best Practices Adoption. CRAN. Available from https://github.com/easystats/report. doi: .
##   - R Core Team (2021). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.
##   - Scott Chasalow (2012). combinat: combinatorics utilities. R package version 0.0-8. https://CRAN.R-project.org/package=combinat
##   - Wickham et al., (2019). Welcome to the tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686

define aesthetics

palette = c("#001219", "#005F73", "#0A9396", "#94D2BD", "#E9D8A6", "#EE9B00", "#CA6702", "#BB3E03", "#AE2012")
palette_sharing = c("#086E70", "#FFA600")
palette_relevance = c("#D14504", "#FB590E", "#004452", "#00667A")
palette_medium = c("#261132", "#FFB5B5")
palette_decisions = c("#3B9AB2", "#EBCC2A", "#F21A00")

plot_aes = theme_minimal() +
  theme(legend.position = "top",
        legend.text = element_text(size = 12),
        text = element_text(size = 16, family = "Futura Medium"),
        axis.text = element_text(color = "black"),
        axis.line = element_line(colour = "black"),
        axis.ticks.y = element_blank())

load and merge data

# demographics
study1_demo = read.csv("../data/study1_demo.csv", stringsAsFactors = FALSE)

study2_demo = read.csv("../data/study2_demo.csv", stringsAsFactors = FALSE)

study3_demo = read.csv("../data/study3_demo.csv", stringsAsFactors = FALSE)

study4_demo = read.csv("../data/study4_demo.csv", stringsAsFactors = FALSE)

study5_demo = read.csv("../data/study5_demo.csv", stringsAsFactors = FALSE)

study6_demo = read.csv("../data/study6_demo.csv", stringsAsFactors = FALSE)

merged_demo = bind_rows(study1_demo, study2_demo, study3_demo, study4_demo, study5_demo, study6_demo) %>%
  spread(item, value) %>%
  select(-gender_4_TEXT, -race_self) %>%
  rename("SES_degree" = `highest degree completed`,
         "SES_income" = `household income`,
         "hispanic_latinx" = `Hispanic or Latinx`) %>%
  mutate(age = as.numeric(age))

# load and tidy study data
study1 = read.csv("../data/study1.csv", stringsAsFactors = FALSE)

study2 = read.csv("../data/study2.csv", stringsAsFactors = FALSE)

study3 = read.csv("../data/study3.csv", stringsAsFactors = FALSE)

study4 = read.csv("../data/study4.csv", stringsAsFactors = FALSE)

study5 = read.csv("../data/study5.csv", stringsAsFactors = FALSE)

study6 = read.csv("../data/study6.csv", stringsAsFactors = FALSE)

merged = bind_rows(study1, study2, study3, study4, study5, study6) %>%
  group_by(study, sharing_type) %>%
  mutate(msg_share_std = scale(msg_share, scale = TRUE, center = TRUE),
         medium = ifelse(grepl("1|2|3", study), "social media", "newspaper")) %>% 
  left_join(., merged_demo) %>%
  mutate(gender = as.factor(gender),
         hispanic_latinx = as.factor(hispanic_latinx),
         race = ifelse(race == "Hispanic" | race == "Latino", NA, race),
         race = as.factor(race),
         SES_degree = factor(SES_degree, ordered = TRUE,
                             levels = c("Less than high school", "High school graduate (diploma)",
                                        "High school graduate (GED)",
                                        "Some college (1-4 years, no degree)",
                                        "Associate's degree (including occupational or academic degrees)",
                                        "Bachelor's degree (BA, BS, etc)",
                                        "Master's degree (MA, MS, MENG, MSW, etc)",
                                        "Professional school degree (MD, DDC, JD, etc)",
                                        "Doctorate degree (PhD, EdD, etc)")),
         SES_income = factor(SES_income, ordered = TRUE,
                             levels = c("Less than $5,000",
                                        "$5,000 through $11,999",
                                        "$12,000 through $15,999",
                                        "$16,000 through $24,999",
                                        "$25,000 through $34,999",
                                        "$35,000 through $49,999",
                                        "$50,000 through $74,999",
                                        "$75,000 through $99,999",
                                        "$100,000 and greater")))

prep for SCA

define functions

These functions are adapted from the specr package.

# specr functions
convert_formula = function(x, y, controls, random_effects, ...) {
    if (controls == "no covariates") {
        paste(y, "~", x, "+", random_effects)    
    } else {
        paste(y, "~", x, "+", controls, "+", random_effects)
    }
}

run_spec <- function(specs, df, conf.level, keep.results = FALSE) {

  # dependencies
  require(dplyr)
  require(purrr)
  

  results <- specs %>%
    mutate(formula = pmap(., convert_formula)) %>%
    tidyr::unnest(formula) %>%
    mutate(res = map2(model, formula, possibly(~ do.call(.x, list(data = df,
                                                                  formula = .y,
                                                                  control = lmerControl(optimizer = "bobyqa"))),
                                               otherwise = NULL))) %>%
    filter(!res == "NULL") %>%
    mutate(coefs = map(res,
                       broom.mixed::tidy,
                       conf.int = TRUE,
                       conf.level = .95),
           obs = map(res, nobs)) %>%
    tidyr::unnest(coefs) %>%
    tidyr::unnest(obs) %>%
    filter(term == x) %>%
    select(-term)

  if (isFALSE(keep.results)) {
    results <- results %>%
      select(-res)
  }

  return(results)
}

create_subsets <- function(df, subsets) {

  # dependencies
  require(dplyr)

  subsets %>%
    stack %>%
    purrr::pmap(~ filter(df, get(as.character(..2)) == ..1) %>%
                  mutate(filter = paste(..2, "=", ..1)))
}

tidy data

Define relevant variables and subset this data for modeling.

# specify model components and variables
dvs = "msg_share_std"
ivs = c("msg_rel_self_between", "msg_rel_self_within", "msg_rel_social_between", "msg_rel_social_within")
controls = c("age", "gender", "SES_degree", "SES_income", "race", "hispanic_latinx", "")
model = "lmer"
random_effects = "(1 + msg_rel_self_within + msg_rel_social_within | SID) + (1 | item)"
conf_level = .95

# get relevant variables
model_df = merged %>%
  select(study, SID, item, content, sharing_type, medium, !!dvs, !!ivs, !!controls[1:6]) %>%
  mutate(sharing_type = ifelse(sharing_type == 0, "broadcast", "narrowcast"))

generate model specifications from model components

models = expand.grid(x = ivs,
                                         y = dvs,
                                         model = model,
                                         controls = controls,
                                         control_ivs = paste(ivs, collapse = " _ "),
                                         random_effects = random_effects, stringsAsFactors = FALSE) %>%
  mutate(control_ivs = str_remove(control_ivs, x),
         control_ivs = str_replace(control_ivs, "^ _ ", ""),
         control_ivs = str_replace(control_ivs, " _ $", ""),
         control_ivs = str_replace(control_ivs, " _  _ ", " _ "),
         control_ivs = gsub(" _ ", " + ", control_ivs, fixed = TRUE),
         controls = ifelse(controls == "", control_ivs,
                           sprintf("%s + %s", control_ivs, controls))) %>%
  select(-control_ivs) %>%
  as_tibble()

create subsets

Create subsets of the data to estimate the models within.

# define subsets
subsets_1 = list(content = "covid",
                 sharing_type = "broadcast")
               
subsets_2 = list(sharing_type = unique(model_df$sharing_type),
                 medium = unique(model_df$medium))
               
subsets_3 = list(content = unique(model_df$content),
                 sharing_type = unique(model_df$sharing_type),
                 medium = unique(model_df$medium))

# create the subset dataframes if the models have not yet been run
if (!file.exists("models/sca_output_12.RDS")) {
  # broadcast covid
  subsets_1 = map(subsets_1, as.character)
  df_1 = subsets_1 %>%
      cross %>%
      map(~ create_subsets(subsets = .x, df = model_df) %>%
            map(~ dplyr::select(.x, -filter)) %>%
            reduce(dplyr::inner_join) %>%
            dplyr::mutate(filter = paste(names(.x),
                                         .x,
                                         collapse = " & ",
                                         sep = " = "))) %>%
    Filter(function(x) nrow(x) > 0, .) # filter out empty dataframes
  
  # medium, sharing type
  subsets_2 = map(subsets_2, as.character)
  df_2 = subsets_2 %>%
      cross %>%
      map(~ create_subsets(subsets = .x, df = model_df) %>%
            map(~ dplyr::select(.x, -filter)) %>%
            reduce(dplyr::inner_join) %>%
            dplyr::mutate(filter = paste(names(.x),
                                         .x,
                                         collapse = " & ",
                                         sep = " = "))) %>%
    Filter(function(x) nrow(x) > 0, .) %>% # filter out empty dataframes
    Filter(function(x) sum(x$filter == "sharing_type = narrowcast & medium = social media") == 0, .)

  # content, medium, sharing type
  subsets_3 = map(subsets_3, as.character)
  df_3 = subsets_3 %>%
      cross %>%
      map(~ create_subsets(subsets = .x, df = model_df) %>%
            map(~ dplyr::select(.x, -filter)) %>%
            reduce(dplyr::inner_join) %>%
            dplyr::mutate(filter = paste(names(.x),
                                         .x,
                                         collapse = " & ",
                                         sep = " = "))) %>%
    Filter(function(x) nrow(x) > 0, .) # filter out empty dataframes
  
# combine subsets 1 and 2 
  df_all = append(df_1, df_2) %>%
    append(., df_3)
}

run SCA

run models

Run models or load saved output from file.

if (!file.exists("models/sca_output.RDS")) {
  # setup parallelization with furrr
  plan(multisession, workers = 10)
  
  # run SCA
  output = furrr::future_map_dfr(df_all, ~ run_spec(models, .x,
                                                    conf.level = conf_level,
                                                    keep.results = FALSE) %>%
                          mutate(subsets = unique(.x$filter)))
  # save output
  saveRDS(output, "models/sca_output.RDS")

} else {
  
  #load output
  output = readRDS("models/sca_output.RDS")
}

tidy output

output = bind_rows(output) %>%
  mutate(x = gsub("msg_rel_", "", x),
         x = gsub("_", " ", x),
         content = ifelse(grepl("covid", subsets), "COVID-19",
                   ifelse(grepl("voting", subsets), "voting",
                   ifelse(grepl("health", subsets), "health",
                   ifelse(grepl("climate", subsets), "climate",
                   ifelse(subsets == "sharing_type = broadcast & medium = social media", "COVID-19 & voting",
                   ifelse(subsets == "sharing_type = broadcast & medium = newspaper", "COVID-19, health & climate",  
                   ifelse(subsets == "sharing_type = narrowcast & medium = newspaper", "COVID-19 & climate", "all"))))))),
         `COVID-19` = ifelse(grepl("COVID-19", content), "COVID-19", NA),
         voting = ifelse(grepl("voting", content), "voting", NA),
         health = ifelse(grepl("health", content), "health", NA),
         climate = ifelse(grepl("climate", content), "climate", NA),
         sharing = ifelse(grepl("broad", subsets), "broadcast",
                  ifelse(grepl("narrow", subsets), "narrowcast", "all")),
         medium = ifelse(grepl("media", subsets), "social media",
                  ifelse(subsets == "content = covid & sharing_type = broadcast", "social media",
                  ifelse(grepl("news", subsets), "newspaper", "all"))),
         relevance = x,
         controls = ifelse(grepl("age|SES|race|hispanic", controls), "controls", "no controls"),
         subset = case_when(subsets == "content = covid & sharing_type = broadcast" ~ "10",
                                   subsets == "sharing_type = broadcast & medium = social media" ~ "11",
                                   subsets == "sharing_type = broadcast & medium = newspaper" ~ "12",
                                   subsets == "sharing_type = narrowcast & medium = newspaper" ~ "13",
                                   subsets == "content = covid & sharing_type = broadcast & medium = social media" ~ "1",
                                   subsets == "content = voting & sharing_type = broadcast & medium = social media" ~ "4",
                                   subsets == "content = voting & sharing_type = narrowcast & medium = social media" ~ "5",
                                   subsets == "content = covid & sharing_type = broadcast & medium = newspaper" ~ "2",
                                   subsets == "content = health & sharing_type = broadcast & medium = newspaper" ~ "6",
                                   subsets == "content = climate & sharing_type = broadcast & medium = newspaper" ~ "8",
                                   subsets == "content = covid & sharing_type = narrowcast & medium = newspaper" ~ "3",
                                   subsets == "content = health & sharing_type = narrowcast & medium = newspaper" ~ "7",
                                   subsets == "content = climate & sharing_type = narrowcast & medium = newspaper" ~ "9"),
         subset = as.numeric(subset))

plot SCA

decisions

This plot depicts the set of models run in each subset of the data. It is supplementary and does not appear in the manuscript.

# define edges
l1 = expand.grid(from = dvs, to = ivs) %>%
  group_by(to) %>%
  mutate(key = "relevance variables",
         to = sprintf("%s_%s", to, row_number()))
l2 = expand.grid(from = unique(l1$to), to = controls) %>%
  group_by(to) %>%
  mutate(key = "control variables",
         to = sprintf("%s_%s", to, row_number()))
edge_list = bind_rows(l1, l2)

# plot
decision_plot = igraph::graph_from_data_frame(edge_list)

ggraph::ggraph(decision_plot, layout = 'dendrogram', circular = FALSE) + 
  ggraph::geom_edge_diagonal(aes(color = key), strength = 0) +
  ggraph::scale_edge_color_manual(name = "decision key", values = palette_decisions) +
  theme_void() +
  theme(legend.position = "top")

define functions

modified specr functions

These functions are adapted from the specr package.

format_results <- function(df, var, null = 0, desc = FALSE) {

  # rank specs
  if (isFALSE(desc)) {
    df <- df %>%
      dplyr::arrange(estimate)
  } else {
    df <- df %>%
      dplyr::arrange(desc(estimate))
  }

  # create rank variable and color significance
  df <- df %>%
    dplyr::mutate(specifications = 1:n(),
           color = case_when(conf.low > null ~ "black",
                             conf.high < null ~ "black",
                             TRUE ~ "darkgrey"))
  return(df)
}

plot_choices <- function(df,
                         var = .data$estimate,
                         choices = c("x", "y", "model", "controls", "subsets"),
                         desc = FALSE,
                         null = 0,
                         size = 5,
                         alpha_values = c(1, 1),
                         color_vars = NULL,
                         palette = NULL,
                         reorder_var = NULL,
                         rename_controls = FALSE,
                         ignore_vars = FALSE,
                         collapse_content = FALSE) {

  value <- key <- NULL

  var <- enquo(var)
  
  if (collapse_content == TRUE) {
  
    df = df %>%
      format_results(var = var, null = null, desc = desc) %>%
      select(-content) %>%
      gather(content_type, content, `COVID-19`, voting, health, climate) %>%
      filter(!is.na(content)) %>%
      select(-content_type)
  
  } else {
    df = df %>%
      format_results(var = var, null = null, desc = desc)
  }

  if (!is.null(color_vars)) {
    color_num_key = df %>%
      select(!!color_vars) %>%
      unique() %>%
      arrange(get(color_vars)) %>%
      mutate(color_num = row_number())
    

    data_df = df %>%
      left_join(., color_num_key) %>%
      mutate(controls = ifelse(grepl("[+]", controls), "all covariates", controls),
             alpha = ifelse(color == "black", "yes", "no"),
             color = sprintf("%s", eval(parse(text = "palette[color_num]")))) %>%
      tidyr::gather(key, value, choices) %>%
      mutate(key = ifelse(isFALSE(rename_controls) == FALSE & key == "controls", rename_controls, key),
             value = ifelse(isFALSE(ignore_vars) == FALSE & value %in% ignore_vars, NA, value)) %>%
      filter(!is.na(value)) %>%
      unique() %>%
      mutate(key = factor(key, levels=unique(key)))

  } else {

    data_df = df %>%
      mutate(controls = ifelse(grepl("[+]", controls), "all covariates", controls)) %>%
      tidyr::gather(key, value, choices) %>%
      mutate(key = ifelse(isFALSE(rename_controls) == FALSE & key == "controls", rename_controls, key),
             value = ifelse(isFALSE(ignore_vars) == FALSE & value %in% ignore_vars, NA, value),
             alpha = "yes") %>%
      filter(!is.na(value)) %>%
      unique() %>%
      mutate(key = factor(key, levels=unique(key)))
  }

  if (!is.null(reorder_var)) {
        
    order = data_df %>%
      select(key, value) %>%
      filter(key == !!reorder_var) %>%
      unique() %>%
      mutate(order = as.numeric(value))
    
    data_df %>%
      left_join(., order) %>%
      ggplot(aes(x = .data$specifications,
                 y = .data$value,
                 color = .data$color)) +
      geom_point(aes(x = .data$specifications,
                     y = reorder(.data$value, order),
                     alpha = alpha),
                 shape = 124,
                 size = size) +
      scale_color_identity() +
      scale_alpha_manual(values = alpha_values) +
      theme_minimal() +
      facet_grid(.data$key~1, scales = "free_y", space = "free_y") +
      theme(
        axis.line = element_line("black", size = .5),
        legend.position = "none",
        panel.spacing = unit(.75, "lines"),
        axis.text = element_text(colour = "black"),
        strip.text.x = element_blank()) +
      labs(x = "", y = "")
    
  } else {
    data_df %>%
      ggplot(aes(x = .data$specifications,
                 y = .data$value,
                 color = .data$color)) +
      geom_point(aes(x = .data$specifications,
                     y = .data$value,
                     alpha = alpha),
                 shape = 124,
                 size = size) +
      scale_color_identity() +
      scale_alpha_manual(values = alpha_values) +
      theme_minimal() +
      facet_grid(.data$key~1, scales = "free_y", space = "free_y") +
      theme(
        axis.line = element_line("black", size = .5),
        legend.position = "none",
        panel.spacing = unit(.75, "lines"),
        axis.text = element_text(colour = "black"),
        strip.text.x = element_blank()) +
      labs(x = "", y = "")
  }
}

plotting functions

plot_sca = function(data, combined = TRUE, labels = c("A", "B"),
                    title = FALSE, limits = NULL,
                    point_size = .5, point_alpha = 1,
                    ci = TRUE, ci_alpha = .5, ci_size = .5,
                    line = FALSE, line_size = 1,
                    median_alpha = 1, median_size = 1,
                    text_size = 14, title_size = 6,
                    color_vars = NULL, palette = palette, legend = TRUE,
                    choices  = c("x", "content", "medium", "sharing", "controls"),
                    alpha_values = c(0.5, 1),
                    remove_y = FALSE, remove_facet = FALSE,
                    collapse_content = FALSE, reorder_var = NULL) {
  
  medians = data %>%
    group_by(get(color_vars)) %>%
    summarize(median = median(estimate)) %>%
    ungroup() %>%
    mutate(color = sprintf("%s", palette))
  
  if (combined == TRUE) {
      p1 = specr::plot_curve(data, point_size = point_size, point_alpha = point_alpha,
                      ci = ci, ci_alpha = ci_alpha, ci_size = ci_size,
                      line = line, line_size = line_size,
                      limits = limits) +
        geom_hline(data = medians, aes(yintercept = median, color = color, linetype = `get(color_vars)`),
                   alpha = median_alpha, size = median_size, show_guide = TRUE) +
        scale_linetype_manual(name = "", values = rep(1, nrow(medians)), 
                              guide = guide_legend(override.aes = list(color = palette))) +
        labs(x = "", y = "standarized\nregression coefficient\n")  +
        theme(legend.position = "top",
              text = element_text(size = text_size, family = "Futura Medium"))
      
      if (legend == FALSE) {
        p1 = p1 +
          theme(legend.position = "none")
      }
      
      if (title == TRUE) {
        if (is.null(limits)) {
          title_range = max(data$conf.high) - min(data$conf.high)
          title_y = max(data$conf.high) - (title_range / 10)
        } else {
          title_range = limits[2] - limits[1]
          title_y = limits[2] - (title_range / 10)
        }
        p1 = p1 + annotate("text", -Inf, Inf, label = unique(data$x), fontface = 2, size = title_size,
                       x = 0.5*(1 + nrow(data)), 
                       y = title_y)
      }
      
      if (!is.null(color_vars)) {
        p2 = plot_choices(data, choices = choices,
                          alpha_values = alpha_values, color_vars = color_vars,
                          palette = palette, collapse_content = collapse_content,
                          reorder_var = reorder_var) +
          labs(x = "\nspecifications (ranked)")  +
          theme(strip.text.x = element_blank(),
                text = element_text(size = text_size, family = "Futura Medium"))
      } else {
        p2 = plot_choices(data, choices = choices,
                          alpha_values = alpha_values, collapse_content = collapse_content,
                          reorder_var = reorder_var) +
          labs(x = "\nspecifications (ranked)")  +
          theme(strip.text.x = element_blank(),
                text = element_text(size = text_size, family = "Futura Medium"))
      }
        
  } else {
      p1 = specr::plot_curve(data, point_size = point_size, point_alpha = point_alpha,
                      ci_alpha = ci_alpha, ci_size = ci_size) +
        geom_hline(yintercept = 0, linetype = "solid", color = "black", size = .5) +
        labs(x = "", y = "standarized\nregression coefficient\n") +
        theme(text = element_text(size = text_size, family = "Futura Medium"))
      
      if (title == TRUE) {
        if (is.null(limits)) {
          title_range = max(data$conf.high) - min(data$conf.high)
          title_y = max(data$conf.high) - (title_range / 10)
        } else {
          title_range = limits[2] - limits[1]
          title_y = limits[2] - (title_range / 10)
        }
        p1 = p1 + annotate("text", -Inf, Inf, label = unique(data$y), fontface = 2, size = title_size,
                       x = 0.5*(1 + nrow(data)), 
                       y = title_y)
      }
      
      p2 = plot_choices(data, choices = choices,
                        alpha_values = alpha_values, collapse_content = collapse_content,
                        reorder_var = reorder_var) +
        labs(x = "\nspecification number (ranked)") +
        theme(strip.text.x = element_blank(),
              text = element_text(size = text_size, family = "Futura Medium"))
  }
  
  if (remove_y == TRUE) {
    p1 = p1 + labs(y = "")
    
    p2 = p2 + theme(axis.text.y = element_blank(),
                    axis.ticks.y = element_blank()) +
      labs(y = "")
  }

  if (remove_facet == TRUE) {
    p2 = p2 + theme(strip.text.y = element_blank())
  }
  
  specr::plot_specs(plot_a = p1,
             plot_b = p2,
             labels = labels,
             rel_height = c(.35, .65))
}

plot_sca_compare = function(data, pointrange = TRUE, labels = c("A", "B"), 
                            rel_heights = c(.75, .25), rel_widths = c(.75, .25), 
                            title = FALSE, text_size = 14, title_size = 6, n_rows = 1, angle_text = FALSE,
                            remove_x = FALSE, remove_y = FALSE, sig = NULL) {
  
  # source raincloud plot
  source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
  
  # merge and tidy for plotting
  plot_data = data %>%
    group_by(x) %>%
    arrange(estimate) %>%
    mutate(specification = row_number()) %>%
    ungroup() %>%
    unique()
  
  # labels
  median_cl_boot = function(x, conf = 0.95, df = TRUE, ci = "low") {
  
    lconf = (1 - conf)/2
    uconf = 1 - lconf
    require(boot)
    bmedian = function(x, ind) median(x[ind])
    bt = boot(x, bmedian, 1000)
    bb = boot.ci(bt, type = "perc")
    
    if (df == TRUE){
      data.frame(y = median(x),
                 ymin = quantile(bt$t, lconf), 
                 ymax = quantile(bt$t, uconf))
      
    } else {
      if (ci == "low") {
        quantile(bt$t, lconf)
      } else {
        quantile(bt$t, uconf)
      }
    }
  }
  
  labs = plot_data %>%
    group_by(x) %>%
    summarize(med = median(estimate),
              low = median_cl_boot(estimate, df = FALSE, ci = "low"),
              high = median_cl_boot(estimate, df = FALSE, ci = "high")) %>%
    mutate(range = max(high) - min(low),
           estimate = ifelse(med > 0, high + (range / 10), low - (range / 10)),
           label = ifelse(x %in% sig, "*", ""))
  
  # plot curves
  if (pointrange == TRUE) {
    a = plot_data %>%
    ggplot(aes(specification, estimate, color = x)) +
      geom_linerange(aes(ymin = conf.low, ymax = conf.high), size = .1) +
      geom_point() +
      geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 1) +
      scale_color_manual(name = "", values = palette_relevance) +
      scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
      labs(x = "\nspecification number (ranked)", y = "standarized\negression coefficient\n") + 
      theme_minimal() + 
      theme(strip.text = element_blank(), 
            axis.line = element_line("black", size = 0.5), 
            legend.position = c(.5, .1), 
            legend.direction = "horizontal",
            panel.spacing = unit(0.75, "lines"), 
            axis.text = element_text(colour = "black"),
            text = element_text(size = text_size, family = "Futura Medium"))
    if (title == TRUE) {
      a = a + annotate("text", -Inf, Inf, label = unique(plot.data$y), fontface = 2, size = title_size,
                       x = 0.5*(min(plot.data$specification) + max(plot.data$specification)), 
                       y = max(plot.data$conf.high))
    }
    
  } else {
    a = plot_data %>%
      ggplot(aes(specification, estimate, color = x)) +
      geom_point() +
      geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 1) +
      scale_color_manual(name = "", values = palette_relevance) +
      scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
      labs(x = "\nspecification number (ranked)", y = "standarized\nregression coefficient\n") + 
      theme_minimal() + 
      theme(strip.text = element_blank(), 
            axis.line = element_line("black", size = 0.5), 
            legend.position = "none", 
            legend.direction = "horizontal",
            panel.spacing = unit(0.75, "lines"), 
            axis.text = element_text(colour = "black"),
            text = element_text(size = text_size, family = "Futura Medium"))
    if (title == TRUE) {
      a = a + annotate("text", -Inf, Inf, label = unique(plot.data$y), fontface = 2, size = title_size,
                       x = 0.5*(min(plot.data$specification) + max(plot.data$specification)), 
                       y = max(plot.data$estimate))    
      }
  }
  
    b = plot_data %>%
      group_by(x) %>%
      mutate(order = median(estimate)) %>%
      ggplot(aes(reorder(x, order), estimate, fill = x)) +
      geom_flat_violin(position = position_nudge(x = .1, y = 0), color = FALSE) +
      geom_point(aes(color = x), position = position_jitter(width = .05), size = .5, alpha = .5) + 
      geom_boxplot(width = .1, outlier.shape = NA, fill = NA) +
      geom_text(data = labs, aes(label = label, x = x, y = estimate), size = 6) +
      scale_fill_manual(name = "", values = palette_relevance) +
      scale_color_manual(name = "", values = palette_relevance) +
      scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
      labs(x = "\n", y = "standarized\nregression coefficient\n") + 
      theme_minimal() + 
      theme(strip.text = element_blank(), 
            axis.line = element_line("black", size = 0.5), 
            legend.position = "none", 
            panel.spacing = unit(0.75, "lines"), 
            axis.text = element_text(colour = "black"),
            text = element_text(size = text_size, family = "Futura Medium"))
    
    if (angle_text == TRUE) {
      b = b + theme(axis.text.x = element_text(angle = 45, hjust = 1))
    }
    
  if (n_rows == 1) {
    a = a + theme(legend.position = c(.5, .1))
    b = b + coord_flip() +
      labs(x = "\n", y = "\nmedian") + 
      theme(axis.text.x = element_text(angle = 0, hjust = 1),
            axis.text.y = element_blank())
  }     
    

  if (remove_x == TRUE) {
    a = a + labs(x = "")
    
    if (n_rows == 1) {
      b = b + labs(y = "")
    } else {
      b = b + labs(x = "")
    }
  }    
  
  if (remove_y == TRUE) {
    a = a + labs(y = "")
    
    if (n_rows == 1) {
      b = b + labs(x = "")
    } else {
      b = b + labs(y = "")
    }
  }  
    
  cowplot::plot_grid(a, b, labels = labels, rel_heights = rel_heights, rel_widths = rel_widths, nrow = n_rows)
}

main manuscript plot

This plot is reported in Figure 3.

self_between = plot_sca(data = filter(output, grepl("self between", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(1, 1), text_size = 18,
         color_vars = "medium", palette = palette_medium,
         remove_facet = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls"), limits = c(-.3, 1),
         collapse_content = TRUE)

social_between = plot_sca(data = filter(output, grepl("social between", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(.5, 1), text_size = 18,
         color_vars = "medium", palette = palette_medium,
         remove_y = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls"), limits = c(-.3, 1),
         collapse_content = TRUE)

self_within = plot_sca(data = filter(output, grepl("self within", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(1, 1), text_size = 18,
         color_vars = "sharing", palette = palette_sharing,
         remove_facet = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls"), limits = c(-0, .4),
         collapse_content = TRUE)

social_within = plot_sca(data = filter(output, grepl("social within", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(1, 1), text_size = 18,
         color_vars = "sharing", palette = palette_sharing,
         remove_y = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls"), limits = c(-0, .4),
         collapse_content = TRUE)

ggarrange(self_between, social_between, self_within, social_within, 
          ncol = 2, nrow = 2, labels = c("A", "B", "C", "D"), widths = c(.51, .49, .51, .49))

supplementary material plots

combined

This plot is reported in Figure S5.

plot_sca(data = output, combined = TRUE, title = FALSE, text_size = 20, median_size = 1.25,
         color_vars = "x", palette = palette_relevance, alpha_values = c(.2, .5),
         choices = c("relevance", "content", "medium", "sharing", "controls"),
         collapse_content = TRUE)

include subsets

between-person relevance

This plot is reported in Figure S3.

self_between_subset = plot_sca(data = filter(output, grepl("self between", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(1, 1), text_size = 18,
         color_vars = "medium", palette = palette_medium,
         remove_facet = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls", "subset"), limits = c(-.3, 1),
         collapse_content = TRUE, reorder_var = "subset")

social_between_subset= plot_sca(data = filter(output, grepl("social between", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(.5, 1), text_size = 18,
         color_vars = "medium", palette = palette_medium,
         remove_y = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls", "subset"), limits = c(-.3, 1),
         collapse_content = TRUE, reorder_var = "subset")

ggarrange(self_between_subset, social_between_subset, ncol = 2, labels = c("A", "B"), widths = c(.51, .49))

within-person relevance

This plot is reported in Figure S4.

self_within_subset = plot_sca(data = filter(output, grepl("self within", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(1, 1), text_size = 18,
         color_vars = "sharing", palette = palette_sharing,
         remove_facet = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls", "subset"), limits = c(-0, .4),
         collapse_content = TRUE, reorder_var = "subset")

social_within_subset = plot_sca(data = filter(output, grepl("social within", x)), combined = TRUE, title = TRUE,
         ci_alpha = 1, alpha_values = c(1, 1), text_size = 18,
         color_vars = "sharing", palette = palette_sharing,
         remove_y = TRUE, labels = c("", ""), median_size = .75,
         choices = c("content", "medium", "sharing", "controls", "subset"), limits = c(-0, .4),
         collapse_content = TRUE, reorder_var = "subset")

ggarrange(self_within_subset, social_within_subset, ncol = 2, labels = c("A", "B"), widths = c(.51, .49))

curve comparison

This plot is reported in Figure 2.

plot_sca_compare(data = output, pointrange = FALSE, n_rows = 2, rel_heights = c(.5, .5))

tables

Descriptive stats for the curve of each relevance variable.

combined

This table is reported in Table 4.

n_models = output %>%
  filter(relevance == "self within")

output %>%
  mutate(positive = ifelse(estimate > 0, 1, 0),
         negative = ifelse(estimate < 0, 1, 0),
         sig = case_when(conf.low > 0 ~ 1,
                             conf.high < 0 ~ 1,
                             TRUE ~ 0),
         positive_significant = ifelse(positive == 1 & sig == 1, 1, 0),
         negative_significant = ifelse(negative == 1 & sig == 1, 1, 0)) %>%
  group_by(relevance) %>%
  summarize(min = min(estimate),
            max = max(estimate),
            median = median(estimate),
            positive = (sum(positive) / nrow(n_models)) * 100,
            negative = (sum(negative) / nrow(n_models)) * 100,
            positive_significant = (sum(positive_significant) / nrow(n_models)) * 100,
            negative_significant = (sum(negative_significant) / nrow(n_models)) * 100) %>%
  mutate(range = sprintf("%.2f, %.2f", min, max)) %>%
  select(relevance, median, range, positive, negative, positive_significant, negative_significant) %>%
  knitr::kable(digits = 2)

relevance	median	range	positive	negative	positive_significant
self between	0.46	0.22, 0.74	100.00	0.00	100.00
self within	0.16	0.08, 0.22	100.00	0.00	100.00
social between	0.13	-0.08, 0.24	89.53	10.47	63.95
social within	0.14	0.10, 0.30	100.00	0.00	100.00

by sharing type

This table is reported in Table S4.

n_broadcast = output %>%
  filter(sharing == "broadcast") %>%
  filter(relevance == "self between")

n_narrowcast = output %>%
  filter(sharing == "narrowcast") %>%
  filter(relevance == "self between")

output %>%
  mutate(positive = ifelse(estimate > 0, 1, 0),
         negative = ifelse(estimate < 0, 1, 0),
         sig = case_when(conf.low > 0 ~ 1,
                             conf.high < 0 ~ 1,
                             TRUE ~ 0),
         positive_significant = ifelse(positive == 1 & sig == 1, 1, 0),
         negative_significant = ifelse(negative == 1 & sig == 1, 1, 0)) %>%
  group_by(relevance, sharing) %>%
  summarize(min = min(estimate),
            max = max(estimate),
            median = median(estimate),
            positive = sum(positive),
            negative = sum(negative),
            positive_significant = sum(positive_significant),
            negative_significant = sum(negative_significant)) %>%
  gather(item, value, contains("positive"), contains("negative")) %>%
  mutate(value = ifelse(sharing == "broadcast", (value / nrow(n_broadcast)) * 100, (value / nrow(n_narrowcast)) * 100),
         range = sprintf("%.2f, %.2f", min, max)) %>%
  spread(item, value) %>%
  select(sharing, relevance, median, range, positive, negative, positive_significant, negative_significant) %>%
  knitr::kable(digits = 2)

sharing	relevance	median	range	positive	negative	positive_significant
broadcast	self between	0.43	0.22, 0.74	100.00	0.00	100.00
narrowcast	self between	0.48	0.24, 0.53	100.00	0.00	100.00
broadcast	self within	0.17	0.08, 0.22	100.00	0.00	100.00
narrowcast	self within	0.12	0.11, 0.13	100.00	0.00	100.00
broadcast	social between	0.12	-0.08, 0.24	83.33	16.67	55.56
narrowcast	social between	0.13	0.11, 0.22	100.00	0.00	78.12
broadcast	social within	0.13	0.10, 0.19	100.00	0.00	100.00
narrowcast	social within	0.22	0.16, 0.30	100.00	0.00	100.00

by message medium

This table is reported in Table S4.

n_social = output %>%
  filter(medium == "social media") %>%
  filter(relevance == "self between")

n_newspaper = output %>%
  filter(medium == "newspaper") %>%
  filter(relevance == "self between")

output %>%
  mutate(positive = ifelse(estimate > 0, 1, 0),
         negative = ifelse(estimate < 0, 1, 0),
         sig = case_when(conf.low > 0 ~ 1,
                             conf.high < 0 ~ 1,
                             TRUE ~ 0),
         positive_significant = ifelse(positive == 1 & sig == 1, 1, 0),
         negative_significant = ifelse(negative == 1 & sig == 1, 1, 0)) %>%
  group_by(relevance, medium) %>%
  summarize(min = min(estimate),
            max = max(estimate),
            median = median(estimate),
            positive = sum(positive),
            negative = sum(negative),
            positive_significant = sum(positive_significant),
            negative_significant = sum(negative_significant)) %>%
  gather(item, value, contains("positive"), contains("negative")) %>%
  mutate(value = ifelse(medium == "social media", (value / nrow(n_social)) * 100, (value / nrow(n_newspaper)) * 100),
         range = sprintf("%.2f, %.2f", min, max)) %>%
  spread(item, value) %>%
  select(medium, relevance, median, range, positive, negative, positive_significant, negative_significant) %>%
  knitr::kable(digits = 2)

medium	relevance	median	range	positive	negative	positive_significant
newspaper	self between	0.50	0.42, 0.74	100.00	0.00	100.00
social media	self between	0.31	0.22, 0.36	100.00	0.00	100.00
newspaper	self within	0.15	0.12, 0.22	100.00	0.00	100.00
social media	self within	0.16	0.08, 0.17	100.00	0.00	100.00
newspaper	social between	0.12	-0.08, 0.20	83.93	16.07	44.64
social media	social between	0.18	0.15, 0.24	100.00	0.00	100.00
newspaper	social within	0.19	0.11, 0.30	100.00	0.00	100.00
social media	social within	0.14	0.10, 0.16	100.00	0.00	100.00

Specification curve analyses

2022-06-06

prep data

load packages

define aesthetics

load and merge data

prep for SCA

define functions

tidy data

generate model specifications from model components

create subsets

run SCA

run models

tidy output

plot SCA

decisions

define functions

modified specr functions

plotting functions

main manuscript plot

supplementary material plots

combined

include subsets

between-person relevance

within-person relevance

curve comparison

tables

combined

by message medium