1 Section 1: Data Preperation

In this section, we are going to clean our data to make it ready for checking, plotting, and analysis. in the code chuck below, you can find the script along with comments.

# Reading the csv file
raw_data <- read_csv(here ("raw_data","smarvus_crt_empty.csv")) 
dim(raw_data)
## [1] 12570    39
# So we have 12570 rows and 39 columns

# The number of attention check questions
n_attention_checks <- 6


cleaned_data <-
  raw_data %>%
  # First, let's remove some of the columns that we do not need
  select(-survey_id, -incentive:-degree_year, -spld, -do_q_crt_1:-do_q_crt_3, progress) %>%
  # Next we wanna rename some variables to make them easier to work with. I am going to relabel columns to crt1, etc
  rename(attention_check1 = Q7.1_24,
         attention_check2 = Q8.1_21,
         attention_check3 = Q9.1_22,
         attention_check4 = Q11.1_9,
         attention_check5 = Q13.1_17,
         attention_check6 = Q15.1_9,
         crt1 = Q17.1,
         crt2 = Q17.2,
         crt3 = Q17.3,
         # belief in God
         big = Q18.1,
         # How many of the reasoning Qs do you think you answered correctly?
         crt_perceived_corr = crt_check1,
         # You have just answered three reasoning problems. Did you look any of the answers up online?
         crt_cheat = crt_check2,
         # Whether the CRT block was presented before or after the BiG (belief in God/s) block: 1 = before; 2 = after; NA = question was not presented
         crt_order = do_block_crt,
         # Whether the BiG (belief in God/s) block was presented before or after the CRT block
         big_order = do_block_big
         ) %>%
  # The main columns are empty to keep ourseleves away from bias in preregisting our hypothesis. Thus, we simulate some data for now so we can work with the dataset. This block of code should be removed when we are using the actual dataset.
  mutate(crt1 = sample(c(4, 8, NA, 12, 1), size = n(), replace = TRUE, 
                       prob = c(0.2, 0.6, 0.1, 0.05, 0.05)),
         crt2 = sample(c(10, 50, NA, 15, 45), size = n(), replace = TRUE, 
                       prob = c(0.2, 0.6, 0.1, 0.05, 0.05)),
         crt3 = sample(c(39, 20, NA, 40, 15), size = n(), replace = TRUE, 
                       prob = c(0.2, 0.6, 0.1, 0.05, 0.05)),
         crt_perceived_corr = sample(c(0, 1, NA, 2, 3), size = n(), replace = TRUE, 
                       prob = c(0.2, 0.6, 0.1, 0.05, 0.05)),
         crt_cheat = sample(c("Yes","No"), size = n(), replace = TRUE, 
                                     prob = c(0.1, 0.9)),
         big = round(runif(n(), min = 0, max = 100))) %>%
  # Now, let's calculate indexes for CRT accuracy and CRT Intuitiveness, and other crt responses
  mutate(crt_n_correct = rowSums(cbind(crt1 == 4, crt2 == 10, crt3 == 39), na.rm = TRUE),
         crt_n_intuitive = rowSums(cbind(crt1 == 8, crt2 == 50, crt3 == 20), na.rm = TRUE),
         crt_n_other = rowSums(is.na(cbind(crt1, crt2, crt3))|
           cbind(!(crt1 == 4 | crt1 == 8),
                 !(crt2 == 10 | crt2 == 50),
                 !(crt3 == 20 | crt3 == 39)), na.rm = TRUE)) %>%
  # Here we simply change the order of the columns in our dataframe
  relocate(crt_n_correct, .after = crt3) %>%
  relocate(crt_n_intuitive, .after = crt_n_correct) %>%
  relocate(crt_n_other, .after = crt_n_intuitive) %>%
  # Next, we are replacing values of attention checks with 1 & 0 (correct & incorrect)
  mutate(attention_check1 = case_when(attention_check1== 1 ~ 1,
                                      is.na (attention_check1) ~ NA,
                                      T~ 0),
         attention_check2 = case_when(attention_check2== 5 ~ 1,
                                      is.na (attention_check2) ~ NA,
                                      T~ 0),
         attention_check3 = case_when(attention_check3== 1 ~ 1,
                                      is.na (attention_check3) ~ NA,
                                      T~ 0),
         attention_check4 = case_when(attention_check4== 3 ~ 1,
                                      is.na (attention_check4) ~ NA,
                                      T~ 0),
         attention_check5 = case_when(attention_check5== 2 ~ 1,
                                      is.na (attention_check5) ~ NA,
                                      T~ 0),
         attention_check6 = case_when(attention_check6== 4 ~ 1,
                                      is.na (attention_check6) ~ NA,
                                      T~ 0)) %>%
  # Now, we create a column n_att_passes that shows the number of passed att check
  mutate(n_att_pass = rowSums(select(., starts_with("attention_check")) == 1, na.rm = TRUE)) %>%
  # Now, we create a column n_att_fails that shows the number of failed att check
  mutate(n_att_fails = n_attention_checks - n_att_pass) %>%
  relocate(n_att_pass, .after = attention_check6) %>%
  relocate(n_att_fails, .after = n_att_pass) %>%
  # make Nortern Ireland and Republic of ireland shorter to fit our plots nicely
  mutate(country = case_when(country ==  "Northern Ireland" ~ "North. Ireland",
                              country ==  "Republic of Ireland" ~ "Rep. Ireland",
                              TRUE ~ country)) %>%
  mutate_if(is.character,as.factor)

1.1 Things to be considered

So far, we have decided to exclude participants if:

  1. They have failed more than 3 attention checks.

  2. They have missed the BiG question (like those in Saudi Arabia) or have entered weird values.

  3. They have received CRTs with the wrong translation: “If the most common response isn’t the intuitive response and the second most common response isn’t the correct response (compared to all other responses pooled together) then we will investigate a country and consider removing their data if we find evidence for a translation problem”

  4. They accepted that they have checked CRT questions online: We have a few options: a. removing all; b. removing those who are more accurate than honest participants; c. removing or keeping the whole sample based on the difference (if significant, excluding them).

  5. They are from countries with N < 30 after applying our criteria.

  6. They have NA for attention_amnesty.