Libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(readxl)
library(here)
here() starts at /Users/ethanmilne/Documents/GitHub/ApproachingSignificance
library(reshape2)

Attaching package: 'reshape2'

The following object is masked from 'package:tidyr':

    smiths
library(ggtext)
library(ggpubr)

Load Data

raw <- read_csv(here(getwd(), "Data", "qualtrics_clean.csv")) |>
  rename()
Rows: 200 Columns: 40
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (40): participantId, Politics, donate, donate_foreign, animal_welfare, c...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
litreview <- read_excel(here(getwd(), "Data", "litreview.xlsx"))

Results Storage

study1_results <- list()

Cleaning

Survey Data

Transform to Numeric

clean <- raw |>
  mutate(across(
    c(3:40),
    ~ case_when(
      . == "Very liberal" ~ 1,
      . == "Moderately liberal" ~ 2,
      . == "Somewhat liberal" ~ 3,
      . == "Neither liberal nor conservative" ~ 4,
      . == "Somewhat conservative" ~ 5,
      . == "Moderately conservative" ~ 6,
      . == "Very conservative" ~ 7
    )
  )) 

Averaging Scores

Overall Scores

overall_data <- clean |>
  melt(by = c("participantId")) |>
  group_by(variable) |>
  summarize(
    overall_politics = mean(value, na.rm=T),
    overall_sd = sd(value, na.rm=T),
    overall_n = n(),
  ) |>
  mutate(
    overall_se = overall_sd/sqrt(overall_n),
    overall_low = overall_politics - qt(1-(0.05/2), overall_n-1)*overall_se,
    overall_high = overall_politics + qt(1-(0.05/2), overall_n-1)*overall_se
  ) |>
  select(
    c(variable, overall_politics, overall_low, overall_high)
  )
Using participantId, Politics as id variables

Democrat Scores

dem_data <- clean |>
  melt(by = c("participantId")) |>
  group_by(variable, Politics) |>
  filter(Politics == "Democrats") |>
  summarize(
    dem_politics = mean(value, na.rm=T),
    dem_sd = sd(value, na.rm=T),
    dem_n = n(),
  ) |>
  mutate(
    dem_se = dem_sd/sqrt(dem_n),
    dem_low = dem_politics - qt(1-(0.05/2), dem_n-1)*dem_se,
    dem_high = dem_politics + qt(1-(0.05/2), dem_n-1)*dem_se
  ) |>
  select(
    c(variable, dem_politics, dem_low, dem_high)
  )
Using participantId, Politics as id variables
`summarise()` has grouped output by 'variable'. You can override using the
`.groups` argument.

Democrat Scores

rep_data <- clean |>
  melt(by = c("participantId")) |>
  group_by(variable, Politics) |>
  filter(Politics == "Republicans") |>
  summarize(
    rep_politics = mean(value, na.rm=T),
    rep_sd = sd(value, na.rm=T),
    rep_n = n(),
  ) |>
  mutate(
    rep_se = rep_sd/sqrt(rep_n),
    rep_low = rep_politics - qt(1-(0.05/2), rep_n-1)*rep_se,
    rep_high = rep_politics + qt(1-(0.05/2), rep_n-1)*rep_se
  ) |>
  select(
    c(variable, rep_politics, rep_low, rep_high)
  )
Using participantId, Politics as id variables
`summarise()` has grouped output by 'variable'. You can override using the
`.groups` argument.

Combining Scores

joined <- inner_join(overall_data, dem_data) |>
  inner_join(rep_data)
Joining with `by = join_by(variable)`
Joining with `by = join_by(variable)`

Lit Review Data

Weightings

Stimuli-based

stimuli_weights <- litreview |>
  group_by(code) |>
  summarize(
    stimuli_weighting = n()/nrow(litreview)
  )

Study-based

study_temp <- litreview |>
  mutate(
    title_study = paste("Study", study, title, sep = " ")
  ) |>
  group_by(title_study, code) |>
  summarize(
    n = n()
  )
`summarise()` has grouped output by 'title_study'. You can override using the
`.groups` argument.
stimuli_temp <- study_temp |>
  group_by(title_study) |>
  summarize(
    total_stimuli = sum(n)
  ) 

study_weights <- inner_join(stimuli_temp, study_temp) |>
  mutate(weight = (n / total_stimuli) * (1 / length(unique(
    study_temp$title_study
  )))) |>
  group_by(code) |>
  summarize(
    study_weighting = sum(weight)
  )
Joining with `by = join_by(title_study)`

Paper-based

paper_temp <- litreview |>
  group_by(title, code) |>
  summarize(
    n = n()
  )
`summarise()` has grouped output by 'title'. You can override using the
`.groups` argument.
stimuli_temp <- paper_temp |>
  group_by(title) |>
  summarize(
    total_stimuli = sum(n)
  ) 

paper_weights <- inner_join(stimuli_temp, paper_temp) |>
  mutate(weight = (n / total_stimuli) * (1 / length(unique(
    paper_temp$title
  )))) |>
  group_by(code) |>
  summarize(
    paper_weighting = sum(weight)
  )
Joining with `by = join_by(title)`

Combining Weights

weights <- inner_join(stimuli_weights, study_weights) |>
  inner_join(paper_weights)
Joining with `by = join_by(code)`
Joining with `by = join_by(code)`

Combine Survey and Lit Review Data

Joined Variable Renaming

joined_renamed <- joined |>
  mutate(
    variable = case_when(
      variable == "donate" ~ "Charity",
      variable == "donate_foreign" ~ "Charity (foreign)",
      variable == "animal_welfare" ~ "Animal welfare",
      variable == "covid" ~ "COVID-19",
      variable == "culture_sports" ~ "Culture & Sports",
      variable == "education" ~ "Education",
      variable == "education_foreign" ~ "Education (foreign)",
      variable == "environment" ~ "Environment & Species Conservation",
      variable == "fairtrade" ~ "Fair trade",
      variable == "gunrights" ~ "Gun Rights",
      variable == "strangersmoney" ~ "Give stranger money",
      variable == "health" ~ "Health",
      variable == "health_foreign" ~ "Health (foreign)",
      variable == "health_youth" ~ "Health Youth",
      variable == "health_youth_foreign" ~ "Health Youth (foreign)",
      variable == "helpresearchers" ~ "Help Researchers",
      variable == "homeless" ~ "Homeless",
      variable == "humanrights" ~ "Human Rights",
      variable == "hunger" ~ "Hunger",
      variable == "hunger_foreign" ~ "Hunger (foreign)",
      variable == "hunger_youth" ~ "Hunger Youth",
      variable == "hunger_youth_foreign" ~ "Hunger Youth (foreign)",
      variable == "indigenous" ~ "Indigenous Rights",
      variable == "disaster" ~ "Natural disaster",
      variable == "disaster_foreign" ~ "Natural disaster (foreign)",
      variable == "localbusiness" ~ "Local Business Charity",
      variable == "organblood" ~ "Organ & Blood Donation",
      variable == "poverty" ~ "Poverty",
      variable == "poverty_foreign" ~ "Poverty (foreign)",
      variable == "poverty_youth" ~ "Poverty Youth",
      variable == "poverty_youth_foreign" ~ "Poverty Youth (foreign)",
      variable == "publicradio" ~ "Public television & radio",
      variable == "refugees_immigrants" ~ "Refugees & Immigrants",
      variable == "terrorism" ~ "Terrorism",
      variable == "terrorism_foreign" ~ "Terrorism (foreign)",
      variable == "university_foundation" ~ "University foundations",
      variable == "veterans" ~ "Veterans",
    )
  )

Create clean data file

clean_data <- inner_join(joined_renamed |>
            filter(variable != "religion"), weights |>
            rename(variable = code))
Joining with `by = join_by(variable)`

Analysis

Stimuli Count

stimuli_count <- clean_data |>
  mutate(
    variable = as.factor(variable),
    variable = fct_reorder(variable, desc(overall_politics))
  ) |>
  mutate(
    stimuli_count = stimuli_weighting*464
  ) |>
  mutate(classification = case_when(
    overall_high < 4 ~ "Liberal",
    overall_low > 4 ~ "Conservative",
    overall_high > 4 & overall_low < 4 ~ "Neutral"
  )) |>
  group_by(
    classification
  ) |>
  summarize(
    total_stimuli = sum(stimuli_count),
    cause_area_count = n()
  )

study1_results[["stimuli_count"]] <- stimuli_count

stimuli_count
# A tibble: 3 × 3
  classification total_stimuli cause_area_count
  <chr>                  <dbl>            <int>
1 Conservative              13                4
2 Liberal                  304               25
3 Neutral                  147                8

Skew by weighting method

avg_political_valence <- clean_data |>
  summarize(
    `Overall (by Stimuli)` = sum(overall_politics*stimuli_weighting),
    `Overall (by Paper)` = sum(overall_politics*paper_weighting),
    `Overall (by Study)` = sum(overall_politics*study_weighting),
  )

study1_results[["avg_political_valence"]] <- avg_political_valence

avg_political_valence
# A tibble: 1 × 3
  `Overall (by Stimuli)` `Overall (by Paper)` `Overall (by Study)`
                   <dbl>                <dbl>                <dbl>
1                   3.57                 3.60                 3.60

Diversity analysis

diversity_data <- left_join(litreview |>
            rename(variable = code), joined_renamed) |>
  group_by(title) |>
  summarize(
    count_of_stimuli = n(),
    avg_valence = mean(overall_politics)
  )
Joining with `by = join_by(variable)`
diversity_model <- lm(avg_valence ~ count_of_stimuli, data = diversity_data)
summary(diversity_model)

Call:
lm(formula = avg_valence ~ count_of_stimuli, data = diversity_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73494 -0.17188 -0.01994  0.21405  0.76909 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)       3.651741   0.051914  70.342   <2e-16 ***
count_of_stimuli -0.011804   0.009209  -1.282    0.203    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.287 on 97 degrees of freedom
Multiple R-squared:  0.01666,   Adjusted R-squared:  0.006518 
F-statistic: 1.643 on 1 and 97 DF,  p-value: 0.203
study1_results[["diversity_model"]] <- diversity_model

report::report(diversity_model)
We fitted a linear model (estimated using OLS) to predict avg_valence with
count_of_stimuli (formula: avg_valence ~ count_of_stimuli). The model explains
a statistically not significant and very weak proportion of variance (R2 =
0.02, F(1, 97) = 1.64, p = 0.203, adj. R2 = 6.52e-03). The model's intercept,
corresponding to count_of_stimuli = 0, is at 3.65 (95% CI [3.55, 3.75], t(97) =
70.34, p < .001). Within this model:

  - The effect of count of stimuli is statistically non-significant and negative
(beta = -0.01, 95% CI [-0.03, 6.47e-03], t(97) = -1.28, p = 0.203; Std. beta =
-0.13, 95% CI [-0.33, 0.07])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald t-distribution approximation.

Averaging Analysis

overall_avg_data <- left_join(litreview |>
            rename(variable = code), joined_renamed)
Joining with `by = join_by(variable)`
overall_ttest <- t.test(overall_avg_data$overall_politics, mu = 4)

study1_results[["overall_ttest"]] <- overall_ttest

overall_ttest

    One Sample t-test

data:  overall_avg_data$overall_politics
t = -20.905, df = 463, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 3.531445 3.611965
sample estimates:
mean of x 
 3.571705 
overall_demrep_ttest <- t.test(overall_avg_data$dem_politics, overall_avg_data$rep_politics)

study1_results[["overall_demrep_ttest"]] <- overall_demrep_ttest

overall_demrep_ttest

    Welch Two Sample t-test

data:  overall_avg_data$dem_politics and overall_avg_data$rep_politics
t = -20.367, df = 906.01, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.6740599 -0.5555704
sample estimates:
mean of x mean of y 
 3.255110  3.869925 
overall_rep_ttest <- t.test(overall_avg_data$rep_politics, mu = 4)

study1_results[["overall_rep_ttest"]] <- overall_rep_ttest

overall_rep_ttest

    One Sample t-test

data:  overall_avg_data$rep_politics
t = -5.6861, df = 463, p-value = 2.305e-08
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 3.824972 3.914879
sample estimates:
mean of x 
 3.869925 
overall_dem_ttest <- t.test(overall_avg_data$dem_politics, mu = 4)

study1_results[["overall_dem_ttest"]] <- overall_dem_ttest

overall_dem_ttest

    One Sample t-test

data:  overall_avg_data$dem_politics
t = -37.818, df = 463, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 3.216405 3.293816
sample estimates:
mean of x 
  3.25511 

Religion Analysis

Overall

religion_ttest <- t.test(clean$religion, mu = 4)

study1_results[["religion_ttest"]] <- religion_ttest

religion_ttest

    One Sample t-test

data:  clean$religion
t = 15.509, df = 199, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 5.444575 5.865425
sample estimates:
mean of x 
    5.655 

Democrats

dem_religion_ttest <- t.test(clean[clean$Politics=="Democrats",]$religion, mu = 4)

study1_results[["dem_religion_ttest"]] <- dem_religion_ttest

dem_religion_ttest

    One Sample t-test

data:  clean[clean$Politics == "Democrats", ]$religion
t = 10.557, df = 96, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 5.314226 5.922887
sample estimates:
mean of x 
 5.618557 

Republicans

rep_religion_ttest <- t.test(clean[clean$Politics=="Republicans",]$religion, mu = 4)

study1_results[["rep_religion_ttest"]] <- rep_religion_ttest

rep_religion_ttest

    One Sample t-test

data:  clean[clean$Politics == "Republicans", ]$religion
t = 11.318, df = 102, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 5.393264 5.985377
sample estimates:
mean of x 
  5.68932 

Participant count

participant_count <- clean |>
  group_by(Politics) |>
  summarize(
    count = n()
  )

study1_results[["participant_count"]] <- participant_count

participant_count
# A tibble: 2 × 2
  Politics    count
  <chr>       <int>
1 Democrats      97
2 Republicans   103

Save results list

save(study1_results, file = here(getwd(), "Outputs", "study1_results.rdata"))

Graphing

Stimuli Graph

stimuli_graph <- clean_data |>
  mutate(
    variable = as.factor(variable),
    variable = fct_reorder(variable, desc(overall_politics))
  ) |>
  mutate(
    stimuli_count = stimuli_weighting*464
  ) |>
  ggplot(
    aes(y = variable, x = overall_politics)
  ) +
  geom_point(size = 2) +
  geom_errorbar(aes(xmin=overall_low, xmax=overall_high)) +
  geom_point(aes(x = dem_politics, y = variable), colour = "#0072B2", alpha = 0.5) +
  geom_point(aes(x = rep_politics, y = variable), colour = "#D55E00", alpha = 0.5) +
  geom_vline(xintercept = 4, linetype = "dashed") +
  theme_classic() +
  labs(
    x = 'Political Valence \n (1 = "Very liberal" — 7 = "Very conservative")',
    subtitle = "Overall ratings with group means for <span style = 'color:#0072B2;'>Democrats</span>
or <span style = 'color:#D55E00;'>Republicans</span>"
  ) +
  theme(
    axis.title = element_markdown(),
    plot.title.position = "plot",
    plot.title = element_markdown(),
    plot.subtitle = element_markdown(),
    axis.title.y = element_blank()
  )
  
ggsave(stimuli_graph, file = here(getwd(), "Outputs", "stimuli_graph.png"), height = 6, width = 8, dpi=300)

stimuli_graph

Pie Graph

politics_graph <- clean_data |>
  mutate(
    classify = case_when(
      overall_high < 4 ~ "Liberal",
      overall_low > 4 ~ "Conservative",
      overall_high < 4 & overall_low > 4 ~ "Neutral",
      overall_low < 4 & overall_high > 4 ~ "Neutral"
    )
  ) |>
  group_by(classify) |>
  rename(Politics = classify) |>
  summarize(
    `Cause Areas` = n() / nrow(clean_data),
    `Stimuli` = sum(stimuli_weighting)
  ) |>
  pivot_longer(cols = c(`Cause Areas`, `Stimuli`)) |>
  ggplot(aes(x="", y=value, fill=Politics)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  facet_wrap(~name) +
  theme_void()

ggsave(politics_graph, file = here(getwd(), "Outputs", "pie_chart.png"), height = 5, width = 8, dpi=300)

politics_graph