Chapter 2 Create Data Set

This chapter describes the process of loading and pooling the six panel data sets.

2.1 Load packages or install them if not already installed

if(!require("ggplot2")){install.packages("ggplot2");  library(ggplot2)}
if(!require("haven")){install.packages("haven");  library(haven)}
## Loading required package: haven
if(!require("here")){install.packages("here");  library(here)}
## Loading required package: here
## here() starts at C:/Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2
if(!require("kableExtra")){install.packages("kableExtra");  library(kableExtra)}
if(!require("knitr")){install.packages("knitr");  library(knitr)}
if(!require("naniar")){install.packages("naniar");  library(naniar)}
## Loading required package: naniar
if(!require("readxl")){install.packages("readxl");  library(readxl)}
if(!require("tidyverse")){install.packages("tidyverse");  library(tidyverse)}

knitr::opts_chunk$set(echo = FALSE, knitr.kable.NA = "", cache = FALSE, warning = FALSE, message = FALSE, error = TRUE, echo = FALSE)

2.2 Load raw EIPS data

Select variables of interest for the good loser experiment, recode, and create new data set in .sav and .csv formats

## EIPS Iceland.
## Md5sum: 492e6210db4b3fcb047819bdf0bd2955
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Iceland/EIPS_is.sav")

is_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Iceland/EIPS_is.sav")

## EIPS Netherlands.
## Md5sum: 2fcad34e4ddda3c8ff18d17f94878b8a
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Netherlands/L_PanelCollaboration_wave4_4p_EN.sav")

nl_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Netherlands/L_PanelCollaboration_wave4_4p_EN.sav")

## EIPS Norway.
## Md5sum: 74d22c43548599e69ea50b78f8630ce3
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Norway/Norwegian Citizen Panel - wave 9 EN.sav")

no_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Norway/Norwegian Citizen Panel - wave 9 EN.sav")

## EIPS Sweden.
## Md5sum: 2ebae375d080fb29e4d2ed3586144ce3
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Sweden/EIPS2017_Swedish_Citizen_Panel_20180112.dta")

se_raw <- read_dta("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Sweden/EIPS2017_Swedish_Citizen_Panel_20180112.dta")

## EIPS France.
## Md5sum: d910c074786ce467fc4aa9293d1689f1
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/France/spss/EIPS2017-France.sav")

fr_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/France/spss/EIPS2017-France.sav")

## EIPS Germany

de_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Germany/EIPS2017-Germany.sav")

2.3 Merge data sets

## Prepare Iceland ---------------------------------------------------

is_01 <-
  
  is_raw %>%
  
  mutate(
    
    rsp_id = as.numeric(paste0(100, row_number())),
    
    rsp_country = "Iceland",
    
    rsp_eu = case_when(
      EIPS2017D %in% 0:4 ~ "Oppose EU membership",
      EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
      EIPS2017D %in% 6:10 ~ "Support EU membership"),
    
    rsp_gender = case_when(
      
      kyn == 1 ~ "Male",
      
      kyn == 2 ~ "Female"),
    
    rsp_gender = factor(rsp_gender),
    
    rsp_age = as.numeric(aldur),
    
    rsp_edu = case_when(
      
      menntun == 1 ~ "Lower",
      
      menntun == 2 ~ "Intermediate",
      
      menntun == 3 ~ "Intermediate",
      
      menntun == 4 ~ "Higher",
      
      menntun == 5 ~ "Higher",
      
      menntun == 6 ~ "Higher",
      
      menntun == 7 ~ "Higher",
      
      menntun == 8 ~ "Higher")
    
  )

is_02 <-
  
  is_01 %>%
  mutate(
ref_turnout = case_when(
  EIPS2017E_turnout == 1 ~ "Not shown",
  EIPS2017E_turnout == 2 ~ "35%",
  EIPS2017E_turnout == 3 ~ "47%",
  EIPS2017E_turnout == 4 ~ "53%",
  EIPS2017E_turnout == 5 ~ "85%"),
ref_majoritysize = case_when(
  EIPS2017E_majoritysize == 1 ~ "Not shown",
  EIPS2017E_majoritysize == 2 ~ "51%"),
ref_winner = case_when(
  EIPS2017E_winner == 1 ~ "Not shown",
  EIPS2017E_winner == 2 ~ "Pro EU",
  EIPS2017E_winner == 3 ~ "Anti-EU"),
ref_outcome = case_when(
  EIPS2017D %in% 0:4 & EIPS2017E_winner == 2 ~ "Unfavorable outcome",
  EIPS2017D %in% 6:10 & EIPS2017E_winner == 3 ~ "Unfavorable outcome",
  EIPS2017D %in% 0:4 & EIPS2017E_winner == 3 ~ "Favorable outcome",
  EIPS2017D %in% 6:10 & EIPS2017E_winner == 2 ~ "Favorable outcome",
  EIPS2017E_winner == 1 ~ "Not shown"
  
),
ref_post = case_when(
  EIPS2017E_dv == 1 ~ 1,
  EIPS2017E_dv == 0 ~ 0
)
) %>%
  
  select(matches("^rsp_"), matches("^ref_"))

is <- is_02


## Prepare Netherlands -----------------------------------------------

nl_01 <-
  
  nl_raw %>%
  
  mutate(
    
    rsp_id = as.numeric(paste0(200, nomem_encr)),
    
    rsp_country = "Netherlands",
    
    rsp_eu = case_when(
      EIPS2017D %in% 0:4 ~ "Oppose EU membership",
      EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
      EIPS2017D %in% 6:10 ~ "Support EU membership"),
    
    rsp_gender = case_when(
      
      geslacht == 1 ~ "Male",
      
      geslacht == 2 ~ "Female"),
    
    rsp_age = as.numeric(leeftijd),
    
    rsp_popdensity = 6 - as.numeric(sted),
    
    rsp_edu = case_when(
      
      oplcat == 1     ~ "Lower",
      
      oplcat %in% 2:4 ~ "Intermediate",
      
      oplcat %in% 5:6 ~ "Higher")
    
  ) %>%
  
  filter(rsp_age >= 18)



nl_02 <-
  
  nl_01 %>%
    mutate(
    ref_turnout = case_when(
    EIPS2017E_turnout == 1 ~ "Not shown",
    EIPS2017E_turnout == 2 ~ "35%",
    EIPS2017E_turnout == 3 ~ "47%",
    EIPS2017E_turnout == 4 ~ "53%",
    EIPS2017E_turnout == 5 ~ "85%"),
ref_majoritysize = case_when(
  EIPS2017E_majoritysize == 1 ~ "Not shown",
  EIPS2017E_majoritysize == 2 ~ "51%",
  EIPS2017E_majoritysize == 3 ~ "55%",
  EIPS2017E_majoritysize == 4 ~ "70%"),
ref_winner = case_when(
  EIPS2017E_winner == 1 ~ "Not shown",
  EIPS2017E_winner == 2 ~ "Pro EU",
  EIPS2017E_winner == 3 ~ "Anti-EU"),
ref_outcome = case_when(
  EIPS2017D %in% 0:4 & EIPS2017E_winner == 2 ~ "Unfavorable outcome",
  EIPS2017D %in% 6:10 & EIPS2017E_winner == 3 ~ "Unfavorable outcome",
  EIPS2017D %in% 0:4 & EIPS2017E_winner == 3 ~ "Favorable outcome",
  EIPS2017D %in% 6:10 & EIPS2017E_winner == 2 ~ "Favorable outcome",
  EIPS2017E_winner == 1 ~ "Not shown"
),
ref_post = case_when(
  EIPS2017E == 1 ~ 1,
  EIPS2017E == 2 ~ 0
) 
)%>%
  
  select(matches("^rsp_"), matches("^ref_"))


nl <- nl_02


## Prepare Norway ----------------------------------------------------

no_01 <-
  
  no_raw %>%
  
  mutate(
    
    rsp_id = as.numeric(paste0(300, responseid)),
    
    rsp_country = "Norway",
    
    rsp_eu = case_when(
      eips2017d %in% 0:4 ~ "Oppose EU membership",
      eips2017d == 5 ~ "Neither support nor oppose EU membership",
      eips2017d %in% 6:10 ~ "Support EU membership"),
    
    rsp_gender = case_when(
      
      R9P1 == 1 ~ "Male",
      
      R9P1 == 2 ~ "Female"),
    
    rsp_age_10a = case_when(
      
      R9P5_1 == 1 ~ "18-25",
      
      R9P5_1 == 2 ~ "26-35",
      
      R9P5_1 == 3 ~ "36-45",
      
      R9P5_1 == 4 ~ "46-55",
      
      R9P5_1 == 5 ~ "56-65",
      
      R9P5_1 == 6 ~ "66-75",
      
      R9P5_1 == 7 ~ "75 <"),
    
    rsp_edu = case_when(
      
      R9P4_1 == 1 ~ "Lower",
      
      R9P4_1 == 2 ~ "Intermediate",
      
      R9P4_1 == 3 ~ "Higher")
    
  )


no_02 <-
  
  no_01 %>%
  
    mutate(
    
      ref_turnout = case_when(
        eips2017e_turnout == 1 ~ "Not shown",
        eips2017e_turnout == 2 ~ "35%",
        eips2017e_turnout == 3 ~ "47%",
        eips2017e_turnout == 4 ~ "53%",
        eips2017e_turnout == 5 ~ "85%"),
      ref_majoritysize = case_when(
        eips2017e_majoritysize == 1 ~ "Not shown",
        eips2017e_majoritysize == 2 ~ "51%",
        eips2017e_majoritysize == 3 ~ "55%",
        eips2017e_majoritysize == 4 ~ "70%"),
      ref_winner = case_when(
        eips2017e_winner == 1 ~ "Not shown",
        eips2017e_winner == 2 ~ "Pro EU",
        eips2017e_winner == 3 ~ "Anti-EU"),
      ref_outcome = case_when(
        eips2017d %in% 0:4 & eips2017e_winner == 2 ~ "Unfavorable outcome",
        eips2017d %in% 6:10 & eips2017e_winner == 3 ~ "Unfavorable outcome",
        eips2017d %in% 0:4 & eips2017e_winner == 3 ~ "Favorable outcome",
        eips2017d %in% 6:10 & eips2017e_winner == 2 ~ "Favorable outcome",
        eips2017e_winner == 1 ~ "Not shown"
      ),
      ref_post = case_when(
        eips2017e_dv == 1 ~ 1,
        eips2017e_dv == 0 ~ 0
      )

  ) %>%
  
  select(matches("^rsp_"), matches("^ref_"))



no <- no_02


## Prepare Sweden ----------------------------------------------------

se_01 <-
  
  se_raw %>%
  
  mutate(
    
    rsp_id = as.numeric(paste0(400, id)),
    
    rsp_country = "Sweden",
    
    rsp_eu = case_when(
      EIPS2017D %in% 0:4 ~ "Oppose EU membership",
      EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
      EIPS2017D %in% 6:10 ~ "Support EU membership"),
    
    rsp_gender = case_when(
      
      sex == 2 ~ "Male",
      
      sex == 1 ~ "Female"),
    
    rsp_age = as.numeric(age),
    
    rsp_edu = case_when(
      
      edu3 == 1 ~ "Lower",
      
      edu3 == 2 ~ "Intermediate",
      
      edu3 == 3 ~ "Higher")
    
  )


se_02 <-
  
  se_01 %>%
  
  mutate(
    
    ref_turnout = case_when(
      EIPS2017E_turnout == 1 ~ "Not shown",
      EIPS2017E_turnout == 2 ~ "35%",
      EIPS2017E_turnout == 3 ~ "47%",
      EIPS2017E_turnout == 4 ~ "53%",
      EIPS2017E_turnout == 5 ~ "85%"),
    ref_majoritysize = case_when(
      EIPS2017E_majoritysize == 1 ~ "Not shown",
      EIPS2017E_majoritysize == 2 ~ "51%",
      EIPS2017E_majoritysize == 3 ~ "55%",
      EIPS2017E_majoritysize == 4 ~ "70%"),
    ref_winner = case_when(
      EIPS2017E_winner == 1 ~ "Not shown",
      EIPS2017E_winner == 2 ~ "Pro EU",
      EIPS2017E_winner == 3 ~ "Anti-EU"),
    ref_outcome = case_when(
      EIPS2017D %in% 0:4 & EIPS2017E_winner == 2 ~ "Unfavorable outcome",
      EIPS2017D %in% 6:10 & EIPS2017E_winner == 3 ~ "Unfavorable outcome",
      EIPS2017D %in% 0:4 & EIPS2017E_winner == 3 ~ "Favorable outcome",
      EIPS2017D %in% 6:10 & EIPS2017E_winner == 2 ~ "Favorable outcome",
      EIPS2017E_winner == 1 ~ "Not shown"
    ),
    ref_post = case_when(
      EIPS2017E_dv == 1 ~ 1,
      EIPS2017E_dv == 0 ~ 0
    )
  ) %>%
  
  select(matches("^rsp_"), matches("^ref_"))


se <- se_02


## Prepare France ----------------------------------------------------

fr_01 <-
  
  fr_raw %>%
  
  mutate(
    
    rsp_id = as.numeric(paste0(500, UID_pe03)),
    
    rsp_country = "France",
    
    rsp_eu = case_when(
      pe03_EIPS2017D %in% 0:4 ~ "Oppose EU membership",
      pe03_EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
      pe03_EIPS2017D %in% 6:10 ~ "Support EU membership"),
    
    rsp_gender = case_when(
      
      ea17_A1 == 1 ~ "Male",
      
      ea17_A1 == 2 ~ "Female"),
    
    rsp_age_5 = case_when(
      
      ea17_A2A_rec == 4  ~ "18-24",
      
      ea17_A2A_rec == 5  ~ "25-29",
      
      ea17_A2A_rec == 6  ~ "30-34",
      
      ea17_A2A_rec == 7  ~ "35-39",
      
      ea17_A2A_rec == 8  ~ "40-44",
      
      ea17_A2A_rec == 9  ~ "45-59",
      
      ea17_A2A_rec == 10 ~ "50-54",
      
      ea17_A2A_rec == 11 ~ "55-59",
      
      ea17_A2A_rec == 12 ~ "60-64",
      
      ea17_A2A_rec == 13 ~ "65-69",
      
      ea17_A2A_rec == 14 ~ "70 <"),
    
    rsp_age_10b = case_when(
      
      CAL_AGE10 == 1 ~ "18-22",
      
      CAL_AGE10 == 2 ~ "23-34",
      
      CAL_AGE10 == 3 ~ "35-44",
      
      CAL_AGE10 == 4 ~ "45-54",
      
      CAL_AGE10 == 5 ~ "55-64",
      
      CAL_AGE10 == 6 ~ "65-75",
      
      CAL_AGE10 == 7 ~ "76-79"),
    
    rsp_edu = case_when(
      
      CAL_DIPLOME == 1     ~ "Lower",
      
      CAL_DIPLOME == 2     ~ "Intermediate",
      
      CAL_DIPLOME %in% 3:4 ~ "Higher")
    
  )


fr_02 <-
  
  fr_01 %>%
  
  mutate(
    
    ref_turnout = case_when(
      pe03_EIPS2017E_TURNOUT == 1 ~ "Not shown",
      pe03_EIPS2017E_TURNOUT == 2 ~ "35%",
      pe03_EIPS2017E_TURNOUT == 3 ~ "47%",
      pe03_EIPS2017E_TURNOUT == 4 ~ "53%",
      pe03_EIPS2017E_TURNOUT == 5 ~ "85%"),
    ref_majoritysize = case_when(
      pe03_EIPS2017E_MAJORITYSIZE == 1 ~ "Not shown",
      pe03_EIPS2017E_MAJORITYSIZE == 2 ~ "51%",
      pe03_EIPS2017E_MAJORITYSIZE == 3 ~ "55%",
      pe03_EIPS2017E_MAJORITYSIZE == 4 ~ "70%"),
    ref_winner = case_when(
      pe03_EIPS2017E_WINNER == 1 ~ "Not shown",
      pe03_EIPS2017E_WINNER == 2 ~ "Pro EU",
      pe03_EIPS2017E_WINNER == 3 ~ "Anti-EU"),
    ref_outcome = case_when(
      pe03_EIPS2017D %in% 0:4 & pe03_EIPS2017E_WINNER == 2 ~ "Unfavorable outcome",
      pe03_EIPS2017D %in% 6:10 & pe03_EIPS2017E_WINNER == 3 ~ "Unfavorable outcome",
      pe03_EIPS2017D %in% 0:4 & pe03_EIPS2017E_WINNER == 3 ~ "Favorable outcome",
      pe03_EIPS2017D %in% 6:10 & pe03_EIPS2017E_WINNER == 2 ~ "Favorable outcome",
      pe03_EIPS2017E_WINNER == 1 ~ "Not shown"
    ),
    ref_post = case_when(
      pe03_EIPS2017E_DV == 1 ~ 1,
      pe03_EIPS2017E_DV == 0 ~ 0
    )  
  ) %>%
  
  select(matches("^rsp_"), matches("^ref_"))


fr <- fr_02

## Prepare Germany ---------------------------------------------

de_01 <-
  
  de_raw %>%
  
  mutate(
    
    rsp_id = as.numeric(paste0(600, id_g)),
    
    rsp_country = "Germany",
    
    rsp_eu = case_when(
      ZH29053 %in% 0:4 ~ "Oppose EU membership",
      ZH29053 == 5 ~ "Neither support nor oppose EU membership",
      ZH29053 %in% 6:10 ~ "Support EU membership"),
    
    rsp_gender = case_when(
      
      gender_16 == 1 ~ "Male",
      
      gender_16 == 2 ~ "Female"),
    
    rsp_age_5 = case_when(       #NOTE TO SELF: CHECK AGE CATEGORIES ONCE MORE
      
      year_of_birth_cat_16 == 13  ~ "18-23",
      
      year_of_birth_cat_16 == 12  ~ "24-29",
      
      year_of_birth_cat_16 == 11 ~ "30-34",
      
      year_of_birth_cat_16== 10  ~ "35-39",
      
      year_of_birth_cat_16 == 9  ~ "40-44",
      
      year_of_birth_cat_16 == 8  ~ "45-49",
      
      year_of_birth_cat_16== 7 ~ "50-54",
      
      year_of_birth_cat_16 == 6 ~ "55-59",
      
      year_of_birth_cat_16 == 5 ~ "60-64",
      
      year_of_birth_cat_16 == 4 ~ "65-69",
      
      year_of_birth_cat_16 == 3 ~ "70-74",
    
    year_of_birth_cat_16 == 2  ~ "75-79",
    
    year_of_birth_cat_16 == 1  ~ "80-84"),
    
    rsp_age_10b = case_when(
      
      year_of_birth_cat_16 %in% 12:13 ~ "18-27",
      
      year_of_birth_cat_16 %in% 10:11 ~ "28-37",
      
      year_of_birth_cat_16 %in% 8:9 ~ "38-47",
      
      year_of_birth_cat_16 %in% 6:7 ~ "48-57",
      
      year_of_birth_cat_16 %in% 4:5 ~ "58-67",
      
      year_of_birth_cat_16 %in% 2:3 ~ "68-77",
      
      year_of_birth_cat_16 == 1 ~ "78-87"),
    
    rsp_edu = case_when(
      
      educ_school_16  %in% 1:3     ~ "Lower",
      
      educ_school_16  == 4     ~ "Intermediate",
      
      educ_school_16 %in% 5:6 ~ "Higher")
    
  )


de_02 <-
  
  de_01 %>%
  
  mutate(
    
    ref_turnout = case_when(
      expZH29054_1 == 1 ~ "Not shown",
      expZH29054_1 == 2 ~ "35%",
      expZH29054_1 == 3 ~ "47%",
      expZH29054_1 == 4 ~ "53%",
      expZH29054_1 == 5 ~ "85%"),
    ref_majoritysize = case_when(
      expZH29054_2 == 1 ~ "Not shown",
      expZH29054_2 == 2 ~ "51%",
      expZH29054_2 == 3 ~ "55%",
      expZH29054_2 == 4 ~ "70%"),
    ref_winner = case_when(
      expZH29054_3 == 1 ~ "Not shown",
      expZH29054_3 == 2 ~ "Pro EU",
      expZH29054_3 == 3 ~ "Anti-EU"),
    ref_outcome = case_when(
      ZH29053 %in% 0:4 & expZH29054_3 == 2 ~ "Unfavorable outcome",
      ZH29053 %in% 6:10 & expZH29054_3 == 3 ~ "Unfavorable outcome",
      ZH29053 %in% 0:4 & expZH29054_3 == 3 ~ "Favorable outcome",
      ZH29053 %in% 6:10 & expZH29054_3 == 2 ~ "Favorable outcome",
      expZH29054_3 == 1 ~ "Not shown"
    ),
    ref_post = case_when(
      ZH29054 == 1 ~ 1,
      ZH29054 == 2 ~ 0
    )  
  ) %>%
  
  select(matches("^rsp_"), matches("^ref_"))



de <- de_02


## Combine EIPS ------------------------------------------------------


eips_raw <- bind_rows(is, nl, no, se, fr, de) %>%
  
  filter(!is.na(ref_post)) %>%
  
  mutate(
    
    rsp_edu_2 = case_when(
      
      rsp_edu == "Higher" ~ "Higher",
      
      rsp_edu %in% c("Intermediate", "Lower") ~ "Lower"),
    
    rsp_age_cat_3 = case_when(
      
      rsp_age_10a %in% c("18-25", "26-35")          ~ "18-35",
      
      rsp_age_10b %in% c("18-22", "23-34")          ~ "18-35",
      
      rsp_age %in% 18:35                            ~ "18-35",
      
      rsp_age_5 %in% c("18-23", "24-29", "30-34")   ~ "18-35",
      
      rsp_age_10a %in% c("36-45", "46-55")          ~ "36-55",
      
      rsp_age_10b %in% c("35-44", "45-54")          ~ "36-55",
      
      rsp_age %in% 36:55                            ~ "36-55",
      
      rsp_age_5 %in% c("35-39", "40-44", "45-49", "50-54") ~ "36-55",
      
      rsp_age_10a %in% c("56-65", "66-75", "75 <")  ~ "55<",
      
      rsp_age_10b %in% c("55-64", "65-75", "76-79") ~ "55<",
      
      rsp_age >= 56                                 ~ "55<",
      
      rsp_age_5 %in% c("55-59", "60-64", "65-69", "70-74", "75-79", "80-84") ~ "55<")
    
)