Chapter 2 Create Data Set
This chapter describes the process of loading and pooling the six panel data sets.
2.1 Load packages or install them if not already installed
if(!require("ggplot2")){install.packages("ggplot2"); library(ggplot2)}
if(!require("haven")){install.packages("haven"); library(haven)}
## Loading required package: haven
if(!require("here")){install.packages("here"); library(here)}
## Loading required package: here
## here() starts at C:/Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2
if(!require("kableExtra")){install.packages("kableExtra"); library(kableExtra)}
if(!require("knitr")){install.packages("knitr"); library(knitr)}
if(!require("naniar")){install.packages("naniar"); library(naniar)}
## Loading required package: naniar
if(!require("readxl")){install.packages("readxl"); library(readxl)}
if(!require("tidyverse")){install.packages("tidyverse"); library(tidyverse)}
knitr::opts_chunk$set(echo = FALSE, knitr.kable.NA = "", cache = FALSE, warning = FALSE, message = FALSE, error = TRUE, echo = FALSE)
2.2 Load raw EIPS data
Select variables of interest for the good loser experiment, recode, and create new data set in .sav and .csv formats
## EIPS Iceland.
## Md5sum: 492e6210db4b3fcb047819bdf0bd2955
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Iceland/EIPS_is.sav")
is_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Iceland/EIPS_is.sav")
## EIPS Netherlands.
## Md5sum: 2fcad34e4ddda3c8ff18d17f94878b8a
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Netherlands/L_PanelCollaboration_wave4_4p_EN.sav")
nl_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Netherlands/L_PanelCollaboration_wave4_4p_EN.sav")
## EIPS Norway.
## Md5sum: 74d22c43548599e69ea50b78f8630ce3
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Norway/Norwegian Citizen Panel - wave 9 EN.sav")
no_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Norway/Norwegian Citizen Panel - wave 9 EN.sav")
## EIPS Sweden.
## Md5sum: 2ebae375d080fb29e4d2ed3586144ce3
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/Sweden/EIPS2017_Swedish_Citizen_Panel_20180112.dta")
se_raw <- read_dta("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Sweden/EIPS2017_Swedish_Citizen_Panel_20180112.dta")
## EIPS France.
## Md5sum: d910c074786ce467fc4aa9293d1689f1
## tools::md5sum("C:\\Users/Sveinung/Dropbox/WhatAuthority/EIPS/EIPS2017-data/France/spss/EIPS2017-France.sav")
fr_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/France/spss/EIPS2017-France.sav")
## EIPS Germany
de_raw <- read_sav("C:\\Users/Sveinung/OneDrive/NORCE 2018-/wiggle 2/bookdown-wiggle2/Data/EIPS2017-data/Germany/EIPS2017-Germany.sav")
2.3 Merge data sets
## Prepare Iceland ---------------------------------------------------
is_01 <-
is_raw %>%
mutate(
rsp_id = as.numeric(paste0(100, row_number())),
rsp_country = "Iceland",
rsp_eu = case_when(
EIPS2017D %in% 0:4 ~ "Oppose EU membership",
EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
EIPS2017D %in% 6:10 ~ "Support EU membership"),
rsp_gender = case_when(
kyn == 1 ~ "Male",
kyn == 2 ~ "Female"),
rsp_gender = factor(rsp_gender),
rsp_age = as.numeric(aldur),
rsp_edu = case_when(
menntun == 1 ~ "Lower",
menntun == 2 ~ "Intermediate",
menntun == 3 ~ "Intermediate",
menntun == 4 ~ "Higher",
menntun == 5 ~ "Higher",
menntun == 6 ~ "Higher",
menntun == 7 ~ "Higher",
menntun == 8 ~ "Higher")
)
is_02 <-
is_01 %>%
mutate(
ref_turnout = case_when(
EIPS2017E_turnout == 1 ~ "Not shown",
EIPS2017E_turnout == 2 ~ "35%",
EIPS2017E_turnout == 3 ~ "47%",
EIPS2017E_turnout == 4 ~ "53%",
EIPS2017E_turnout == 5 ~ "85%"),
ref_majoritysize = case_when(
EIPS2017E_majoritysize == 1 ~ "Not shown",
EIPS2017E_majoritysize == 2 ~ "51%"),
ref_winner = case_when(
EIPS2017E_winner == 1 ~ "Not shown",
EIPS2017E_winner == 2 ~ "Pro EU",
EIPS2017E_winner == 3 ~ "Anti-EU"),
ref_outcome = case_when(
EIPS2017D %in% 0:4 & EIPS2017E_winner == 2 ~ "Unfavorable outcome",
EIPS2017D %in% 6:10 & EIPS2017E_winner == 3 ~ "Unfavorable outcome",
EIPS2017D %in% 0:4 & EIPS2017E_winner == 3 ~ "Favorable outcome",
EIPS2017D %in% 6:10 & EIPS2017E_winner == 2 ~ "Favorable outcome",
EIPS2017E_winner == 1 ~ "Not shown"
),
ref_post = case_when(
EIPS2017E_dv == 1 ~ 1,
EIPS2017E_dv == 0 ~ 0
)
) %>%
select(matches("^rsp_"), matches("^ref_"))
is <- is_02
## Prepare Netherlands -----------------------------------------------
nl_01 <-
nl_raw %>%
mutate(
rsp_id = as.numeric(paste0(200, nomem_encr)),
rsp_country = "Netherlands",
rsp_eu = case_when(
EIPS2017D %in% 0:4 ~ "Oppose EU membership",
EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
EIPS2017D %in% 6:10 ~ "Support EU membership"),
rsp_gender = case_when(
geslacht == 1 ~ "Male",
geslacht == 2 ~ "Female"),
rsp_age = as.numeric(leeftijd),
rsp_popdensity = 6 - as.numeric(sted),
rsp_edu = case_when(
oplcat == 1 ~ "Lower",
oplcat %in% 2:4 ~ "Intermediate",
oplcat %in% 5:6 ~ "Higher")
) %>%
filter(rsp_age >= 18)
nl_02 <-
nl_01 %>%
mutate(
ref_turnout = case_when(
EIPS2017E_turnout == 1 ~ "Not shown",
EIPS2017E_turnout == 2 ~ "35%",
EIPS2017E_turnout == 3 ~ "47%",
EIPS2017E_turnout == 4 ~ "53%",
EIPS2017E_turnout == 5 ~ "85%"),
ref_majoritysize = case_when(
EIPS2017E_majoritysize == 1 ~ "Not shown",
EIPS2017E_majoritysize == 2 ~ "51%",
EIPS2017E_majoritysize == 3 ~ "55%",
EIPS2017E_majoritysize == 4 ~ "70%"),
ref_winner = case_when(
EIPS2017E_winner == 1 ~ "Not shown",
EIPS2017E_winner == 2 ~ "Pro EU",
EIPS2017E_winner == 3 ~ "Anti-EU"),
ref_outcome = case_when(
EIPS2017D %in% 0:4 & EIPS2017E_winner == 2 ~ "Unfavorable outcome",
EIPS2017D %in% 6:10 & EIPS2017E_winner == 3 ~ "Unfavorable outcome",
EIPS2017D %in% 0:4 & EIPS2017E_winner == 3 ~ "Favorable outcome",
EIPS2017D %in% 6:10 & EIPS2017E_winner == 2 ~ "Favorable outcome",
EIPS2017E_winner == 1 ~ "Not shown"
),
ref_post = case_when(
EIPS2017E == 1 ~ 1,
EIPS2017E == 2 ~ 0
)
)%>%
select(matches("^rsp_"), matches("^ref_"))
nl <- nl_02
## Prepare Norway ----------------------------------------------------
no_01 <-
no_raw %>%
mutate(
rsp_id = as.numeric(paste0(300, responseid)),
rsp_country = "Norway",
rsp_eu = case_when(
eips2017d %in% 0:4 ~ "Oppose EU membership",
eips2017d == 5 ~ "Neither support nor oppose EU membership",
eips2017d %in% 6:10 ~ "Support EU membership"),
rsp_gender = case_when(
R9P1 == 1 ~ "Male",
R9P1 == 2 ~ "Female"),
rsp_age_10a = case_when(
R9P5_1 == 1 ~ "18-25",
R9P5_1 == 2 ~ "26-35",
R9P5_1 == 3 ~ "36-45",
R9P5_1 == 4 ~ "46-55",
R9P5_1 == 5 ~ "56-65",
R9P5_1 == 6 ~ "66-75",
R9P5_1 == 7 ~ "75 <"),
rsp_edu = case_when(
R9P4_1 == 1 ~ "Lower",
R9P4_1 == 2 ~ "Intermediate",
R9P4_1 == 3 ~ "Higher")
)
no_02 <-
no_01 %>%
mutate(
ref_turnout = case_when(
eips2017e_turnout == 1 ~ "Not shown",
eips2017e_turnout == 2 ~ "35%",
eips2017e_turnout == 3 ~ "47%",
eips2017e_turnout == 4 ~ "53%",
eips2017e_turnout == 5 ~ "85%"),
ref_majoritysize = case_when(
eips2017e_majoritysize == 1 ~ "Not shown",
eips2017e_majoritysize == 2 ~ "51%",
eips2017e_majoritysize == 3 ~ "55%",
eips2017e_majoritysize == 4 ~ "70%"),
ref_winner = case_when(
eips2017e_winner == 1 ~ "Not shown",
eips2017e_winner == 2 ~ "Pro EU",
eips2017e_winner == 3 ~ "Anti-EU"),
ref_outcome = case_when(
eips2017d %in% 0:4 & eips2017e_winner == 2 ~ "Unfavorable outcome",
eips2017d %in% 6:10 & eips2017e_winner == 3 ~ "Unfavorable outcome",
eips2017d %in% 0:4 & eips2017e_winner == 3 ~ "Favorable outcome",
eips2017d %in% 6:10 & eips2017e_winner == 2 ~ "Favorable outcome",
eips2017e_winner == 1 ~ "Not shown"
),
ref_post = case_when(
eips2017e_dv == 1 ~ 1,
eips2017e_dv == 0 ~ 0
)
) %>%
select(matches("^rsp_"), matches("^ref_"))
no <- no_02
## Prepare Sweden ----------------------------------------------------
se_01 <-
se_raw %>%
mutate(
rsp_id = as.numeric(paste0(400, id)),
rsp_country = "Sweden",
rsp_eu = case_when(
EIPS2017D %in% 0:4 ~ "Oppose EU membership",
EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
EIPS2017D %in% 6:10 ~ "Support EU membership"),
rsp_gender = case_when(
sex == 2 ~ "Male",
sex == 1 ~ "Female"),
rsp_age = as.numeric(age),
rsp_edu = case_when(
edu3 == 1 ~ "Lower",
edu3 == 2 ~ "Intermediate",
edu3 == 3 ~ "Higher")
)
se_02 <-
se_01 %>%
mutate(
ref_turnout = case_when(
EIPS2017E_turnout == 1 ~ "Not shown",
EIPS2017E_turnout == 2 ~ "35%",
EIPS2017E_turnout == 3 ~ "47%",
EIPS2017E_turnout == 4 ~ "53%",
EIPS2017E_turnout == 5 ~ "85%"),
ref_majoritysize = case_when(
EIPS2017E_majoritysize == 1 ~ "Not shown",
EIPS2017E_majoritysize == 2 ~ "51%",
EIPS2017E_majoritysize == 3 ~ "55%",
EIPS2017E_majoritysize == 4 ~ "70%"),
ref_winner = case_when(
EIPS2017E_winner == 1 ~ "Not shown",
EIPS2017E_winner == 2 ~ "Pro EU",
EIPS2017E_winner == 3 ~ "Anti-EU"),
ref_outcome = case_when(
EIPS2017D %in% 0:4 & EIPS2017E_winner == 2 ~ "Unfavorable outcome",
EIPS2017D %in% 6:10 & EIPS2017E_winner == 3 ~ "Unfavorable outcome",
EIPS2017D %in% 0:4 & EIPS2017E_winner == 3 ~ "Favorable outcome",
EIPS2017D %in% 6:10 & EIPS2017E_winner == 2 ~ "Favorable outcome",
EIPS2017E_winner == 1 ~ "Not shown"
),
ref_post = case_when(
EIPS2017E_dv == 1 ~ 1,
EIPS2017E_dv == 0 ~ 0
)
) %>%
select(matches("^rsp_"), matches("^ref_"))
se <- se_02
## Prepare France ----------------------------------------------------
fr_01 <-
fr_raw %>%
mutate(
rsp_id = as.numeric(paste0(500, UID_pe03)),
rsp_country = "France",
rsp_eu = case_when(
pe03_EIPS2017D %in% 0:4 ~ "Oppose EU membership",
pe03_EIPS2017D == 5 ~ "Neither support nor oppose EU membership",
pe03_EIPS2017D %in% 6:10 ~ "Support EU membership"),
rsp_gender = case_when(
ea17_A1 == 1 ~ "Male",
ea17_A1 == 2 ~ "Female"),
rsp_age_5 = case_when(
ea17_A2A_rec == 4 ~ "18-24",
ea17_A2A_rec == 5 ~ "25-29",
ea17_A2A_rec == 6 ~ "30-34",
ea17_A2A_rec == 7 ~ "35-39",
ea17_A2A_rec == 8 ~ "40-44",
ea17_A2A_rec == 9 ~ "45-59",
ea17_A2A_rec == 10 ~ "50-54",
ea17_A2A_rec == 11 ~ "55-59",
ea17_A2A_rec == 12 ~ "60-64",
ea17_A2A_rec == 13 ~ "65-69",
ea17_A2A_rec == 14 ~ "70 <"),
rsp_age_10b = case_when(
CAL_AGE10 == 1 ~ "18-22",
CAL_AGE10 == 2 ~ "23-34",
CAL_AGE10 == 3 ~ "35-44",
CAL_AGE10 == 4 ~ "45-54",
CAL_AGE10 == 5 ~ "55-64",
CAL_AGE10 == 6 ~ "65-75",
CAL_AGE10 == 7 ~ "76-79"),
rsp_edu = case_when(
CAL_DIPLOME == 1 ~ "Lower",
CAL_DIPLOME == 2 ~ "Intermediate",
CAL_DIPLOME %in% 3:4 ~ "Higher")
)
fr_02 <-
fr_01 %>%
mutate(
ref_turnout = case_when(
pe03_EIPS2017E_TURNOUT == 1 ~ "Not shown",
pe03_EIPS2017E_TURNOUT == 2 ~ "35%",
pe03_EIPS2017E_TURNOUT == 3 ~ "47%",
pe03_EIPS2017E_TURNOUT == 4 ~ "53%",
pe03_EIPS2017E_TURNOUT == 5 ~ "85%"),
ref_majoritysize = case_when(
pe03_EIPS2017E_MAJORITYSIZE == 1 ~ "Not shown",
pe03_EIPS2017E_MAJORITYSIZE == 2 ~ "51%",
pe03_EIPS2017E_MAJORITYSIZE == 3 ~ "55%",
pe03_EIPS2017E_MAJORITYSIZE == 4 ~ "70%"),
ref_winner = case_when(
pe03_EIPS2017E_WINNER == 1 ~ "Not shown",
pe03_EIPS2017E_WINNER == 2 ~ "Pro EU",
pe03_EIPS2017E_WINNER == 3 ~ "Anti-EU"),
ref_outcome = case_when(
pe03_EIPS2017D %in% 0:4 & pe03_EIPS2017E_WINNER == 2 ~ "Unfavorable outcome",
pe03_EIPS2017D %in% 6:10 & pe03_EIPS2017E_WINNER == 3 ~ "Unfavorable outcome",
pe03_EIPS2017D %in% 0:4 & pe03_EIPS2017E_WINNER == 3 ~ "Favorable outcome",
pe03_EIPS2017D %in% 6:10 & pe03_EIPS2017E_WINNER == 2 ~ "Favorable outcome",
pe03_EIPS2017E_WINNER == 1 ~ "Not shown"
),
ref_post = case_when(
pe03_EIPS2017E_DV == 1 ~ 1,
pe03_EIPS2017E_DV == 0 ~ 0
)
) %>%
select(matches("^rsp_"), matches("^ref_"))
fr <- fr_02
## Prepare Germany ---------------------------------------------
de_01 <-
de_raw %>%
mutate(
rsp_id = as.numeric(paste0(600, id_g)),
rsp_country = "Germany",
rsp_eu = case_when(
ZH29053 %in% 0:4 ~ "Oppose EU membership",
ZH29053 == 5 ~ "Neither support nor oppose EU membership",
ZH29053 %in% 6:10 ~ "Support EU membership"),
rsp_gender = case_when(
gender_16 == 1 ~ "Male",
gender_16 == 2 ~ "Female"),
rsp_age_5 = case_when( #NOTE TO SELF: CHECK AGE CATEGORIES ONCE MORE
year_of_birth_cat_16 == 13 ~ "18-23",
year_of_birth_cat_16 == 12 ~ "24-29",
year_of_birth_cat_16 == 11 ~ "30-34",
year_of_birth_cat_16== 10 ~ "35-39",
year_of_birth_cat_16 == 9 ~ "40-44",
year_of_birth_cat_16 == 8 ~ "45-49",
year_of_birth_cat_16== 7 ~ "50-54",
year_of_birth_cat_16 == 6 ~ "55-59",
year_of_birth_cat_16 == 5 ~ "60-64",
year_of_birth_cat_16 == 4 ~ "65-69",
year_of_birth_cat_16 == 3 ~ "70-74",
year_of_birth_cat_16 == 2 ~ "75-79",
year_of_birth_cat_16 == 1 ~ "80-84"),
rsp_age_10b = case_when(
year_of_birth_cat_16 %in% 12:13 ~ "18-27",
year_of_birth_cat_16 %in% 10:11 ~ "28-37",
year_of_birth_cat_16 %in% 8:9 ~ "38-47",
year_of_birth_cat_16 %in% 6:7 ~ "48-57",
year_of_birth_cat_16 %in% 4:5 ~ "58-67",
year_of_birth_cat_16 %in% 2:3 ~ "68-77",
year_of_birth_cat_16 == 1 ~ "78-87"),
rsp_edu = case_when(
educ_school_16 %in% 1:3 ~ "Lower",
educ_school_16 == 4 ~ "Intermediate",
educ_school_16 %in% 5:6 ~ "Higher")
)
de_02 <-
de_01 %>%
mutate(
ref_turnout = case_when(
expZH29054_1 == 1 ~ "Not shown",
expZH29054_1 == 2 ~ "35%",
expZH29054_1 == 3 ~ "47%",
expZH29054_1 == 4 ~ "53%",
expZH29054_1 == 5 ~ "85%"),
ref_majoritysize = case_when(
expZH29054_2 == 1 ~ "Not shown",
expZH29054_2 == 2 ~ "51%",
expZH29054_2 == 3 ~ "55%",
expZH29054_2 == 4 ~ "70%"),
ref_winner = case_when(
expZH29054_3 == 1 ~ "Not shown",
expZH29054_3 == 2 ~ "Pro EU",
expZH29054_3 == 3 ~ "Anti-EU"),
ref_outcome = case_when(
ZH29053 %in% 0:4 & expZH29054_3 == 2 ~ "Unfavorable outcome",
ZH29053 %in% 6:10 & expZH29054_3 == 3 ~ "Unfavorable outcome",
ZH29053 %in% 0:4 & expZH29054_3 == 3 ~ "Favorable outcome",
ZH29053 %in% 6:10 & expZH29054_3 == 2 ~ "Favorable outcome",
expZH29054_3 == 1 ~ "Not shown"
),
ref_post = case_when(
ZH29054 == 1 ~ 1,
ZH29054 == 2 ~ 0
)
) %>%
select(matches("^rsp_"), matches("^ref_"))
de <- de_02
## Combine EIPS ------------------------------------------------------
eips_raw <- bind_rows(is, nl, no, se, fr, de) %>%
filter(!is.na(ref_post)) %>%
mutate(
rsp_edu_2 = case_when(
rsp_edu == "Higher" ~ "Higher",
rsp_edu %in% c("Intermediate", "Lower") ~ "Lower"),
rsp_age_cat_3 = case_when(
rsp_age_10a %in% c("18-25", "26-35") ~ "18-35",
rsp_age_10b %in% c("18-22", "23-34") ~ "18-35",
rsp_age %in% 18:35 ~ "18-35",
rsp_age_5 %in% c("18-23", "24-29", "30-34") ~ "18-35",
rsp_age_10a %in% c("36-45", "46-55") ~ "36-55",
rsp_age_10b %in% c("35-44", "45-54") ~ "36-55",
rsp_age %in% 36:55 ~ "36-55",
rsp_age_5 %in% c("35-39", "40-44", "45-49", "50-54") ~ "36-55",
rsp_age_10a %in% c("56-65", "66-75", "75 <") ~ "55<",
rsp_age_10b %in% c("55-64", "65-75", "76-79") ~ "55<",
rsp_age >= 56 ~ "55<",
rsp_age_5 %in% c("55-59", "60-64", "65-69", "70-74", "75-79", "80-84") ~ "55<")
)