Part 5 Week 2
Getting Started with Data Visualization in R > Week 2
5.1 Introduction to the tidyverse
install.packages("tidyverse")
5.2 Data import and structure in the tidyverse
5.3 Filtering, selecting, recoding, renaming, and piping
5.4 Recoding, Renaming, and Calculating Columns
5.5 Grouping and summarizing data
#####Data Wrangling with the tidyverse
### install the tidyverse if you don't have it installed. You only have to do this once.
#install.packages("tidyverse")
###load the tidyverse functions #### Do this everytime you want to use tidyverse commands
library(tidyverse)
####Use read_csv instead of read.csv
#### make sure you have the file in your working directory, or use the complete file path. Use setwd() if you need to.
# setwd(")
<- read_csv("week2/cces_sample_coursera.csv") cces
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> .default = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
#### read_csv produces a tibble rather than a dataframe.
class(cces)
#> [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
# vignette("tibble")
##### if you need to switch back and forth between tibble and dataframe for some reason
<- as.data.frame(cces)
cces_dataframe <- as_tibble(cces_dataframe)
cces_tibble
####drop rows with missing data
<- drop_na(cces)
cces
##### Use the filter function
####selects only women respondents
<- filter(cces, gender == 2)
women
####remember the other logical operators
# >
# <
# <=
# >=
# &
# |
# %in%
dim(cces)
#> [1] 869 25
dim(women)
#> [1] 478 25
table(cces$gender)
#>
#> 1 2
#> 391 478
$gender women
#> [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [38] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [75] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [112] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [149] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [297] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [334] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [371] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [408] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [445] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
<- filter(cces, gender == 2 & pid7 > 4)
republican_women
dim(republican_women)
#> [1] 154 25
table(republican_women$pid7)
#>
#> 5 6 7
#> 28 36 90
$gender republican_women
#> [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [38] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [75] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [112] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [149] 2 2 2 2 2 2
$pid7 republican_women
#> [1] 7 5 7 5 7 7 7 7 6 7 6 6 7 7 7 7 5 7 6 5 7 6 7 5 7 5 7 6 7 7 7 7 6 7 7 6 5
#> [38] 6 7 5 7 5 7 6 5 7 7 7 6 7 7 5 5 7 7 5 6 7 7 7 7 7 7 7 5 7 6 7 5 7 7 7 7 7
#> [75] 7 7 5 7 7 7 7 7 7 7 6 7 7 7 7 6 6 7 6 7 6 5 6 7 6 7 7 7 6 7 5 7 7 7 7 7 5
#> [112] 6 7 7 6 6 6 7 7 7 7 7 6 5 6 6 5 6 6 5 6 6 6 6 7 5 7 7 5 6 7 7 7 7 5 7 7 6
#> [149] 7 7 7 5 5 5
head(republican_women)
#> # A tibble: 6 x 25
#> caseid region gender educ edloan race hispanic employ marstat pid7 ideo5
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 412873566 2 2 3 2 1 2 7 1 7 5
#> 2 416479672 3 2 2 2 1 2 5 4 5 3
#> 3 412985437 3 2 4 2 1 2 6 3 7 3
#> 4 413425778 4 2 3 2 1 2 1 5 5 4
#> 5 413859987 3 2 6 1 1 2 1 1 7 5
#> 6 412380857 4 2 6 1 1 2 1 1 7 3
#> # … with 14 more variables: pew_religimp <dbl>, newsint <dbl>,
#> # faminc_new <dbl>, union <dbl>, investor <dbl>, CC18_308a <dbl>,
#> # CC18_310a <dbl>, CC18_310b <dbl>, CC18_310c <dbl>, CC18_310d <dbl>,
#> # CC18_325a <dbl>, CC18_325b <dbl>, CC18_325c <dbl>, CC18_325d <dbl>
####select certain columns from the data
select(republican_women, "educ", "employ")
#> # A tibble: 154 x 2
#> educ employ
#> <dbl> <dbl>
#> 1 3 7
#> 2 2 5
#> 3 4 6
#> 4 3 1
#> 5 6 1
#> 6 6 1
#> # … with 148 more rows
####combine multiple commands using piping
#x %>% f(y) is the same as f(x, y)
#y %>% f(x, ., z) is the same as f(x, y, z)
<-
women_republicans_educ_employ %>% filter(gender == 2 & pid7 > 4) %>% select("educ", "employ")
cces
####recode variables
<-
party recode(
$pid7,
cces`1` = "Democrat",
`2` = "Democrat",
`3` = "Democrat",
`4` = "Independent",
`5` = "Republican",
`6` = "Republican",
`7` = "Republican"
)
$party <- party
cces
####rename variables
<- rename(cces, trump_approval = CC18_308a)
test
$trump_approval test
#> [1] 2 4 4 4 4 1 1 4 1 4 4 1 4 4 4 4 4 1 4 4 2 2 1 4 1 4 1 4 4 2 4 4 4 4 4 4 2
#> [38] 4 1 4 2 1 1 3 3 1 1 4 4 4 1 4 4 3 2 3 4 4 4 4 4 4 4 4 4 4 4 4 4 2 1 1 1 1
#> [75] 4 1 2 4 4 3 4 4 4 2 4 4 4 4 4 4 4 3 2 4 2 4 1 1 4 1 4 4 1 1 1 4 1 1 1 4 4
#> [112] 4 1 4 4 1 4 4 4 4 1 2 4 4 4 4 4 4 1 4 3 4 1 1 4 4 3 1 1 4 4 1 2 4 4 4 4 3
#> [149] 4 3 4 4 4 1 4 4 3 1 2 2 4 4 4 1 4 4 3 4 4 4 1 1 4 3 1 1 4 4 2 4 3 4 4 4 4
#> [186] 4 4 3 3 1 4 4 2 4 4 4 1 2 3 1 4 4 4 4 4 1 1 3 4 2 4 4 4 4 1 1 2 1 3 1 4 2
#> [223] 1 4 4 1 4 2 3 4 1 4 1 4 1 4 3 1 4 4 4 2 1 4 2 1 1 4 2 4 2 4 1 4 1 4 1 4 2
#> [260] 1 4 4 3 2 1 4 3 4 3 1 2 4 4 1 3 2 2 1 1 4 2 2 1 4 4 4 1 3 4 1 4 1 1 3 4 1
#> [297] 4 2 4 4 1 1 1 1 4 1 2 1 3 1 1 3 4 1 2 4 4 3 4 4 4 3 1 2 4 4 1 4 4 4 1 4 2
#> [334] 4 4 4 4 4 4 4 2 2 1 4 2 2 1 4 1 2 3 4 2 1 3 2 2 1 2 4 1 1 3 1 4 1 1 4 4 1
#> [371] 4 3 1 1 1 4 1 4 2 3 2 4 4 1 1 1 1 4 1 4 1 2 1 1 4 1 4 1 4 1 2 4 2 2 4 4 4
#> [408] 1 2 4 4 1 1 4 1 1 1 4 2 4 2 4 1 4 4 4 1 4 4 3 4 4 4 2 4 1 3 3 4 2 4 1 4 1
#> [445] 4 1 4 4 4 1 3 1 2 3 1 3 4 1 2 1 2 3 1 4 3 4 1 2 4 1 1 2 3 2 1 4 4 4 3 4 1
#> [482] 4 4 4 1 4 1 4 4 4 2 4 4 4 2 1 1 4 3 1 2 2 1 4 2 4 4 1 1 2 4 3 1 4 4 1 3 4
#> [519] 4 1 2 4 4 4 2 4 4 2 1 2 4 2 2 2 4 4 1 3 4 4 4 2 4 1 2 1 2 1 3 4 2 4 4 2 2
#> [556] 4 2 1 1 1 4 4 4 1 4 4 1 4 2 1 2 2 2 4 1 4 1 4 2 4 4 4 1 3 2 4 1 4 4 4 1 4
#> [593] 4 4 1 4 4 4 3 4 4 1 4 4 4 4 4 1 1 4 4 4 3 2 4 4 4 4 1 4 4 3 1 4 1 4 4 4 4
#> [630] 1 4 4 2 3 1 4 4 4 4 4 4 1 4 2 4 1 1 4 1 4 1 2 4 4 4 1 1 4 4 2 4 3 4 4 1 4
#> [667] 1 4 1 2 1 4 2 2 4 4 4 4 4 4 2 4 2 3 4 2 4 1 4 4 4 2 4 2 4 1 2 4 1 4 4 4 1
#> [704] 4 4 1 4 1 1 4 2 4 1 4 4 2 2 2 1 4 4 4 4 2 4 1 2 4 4 2 4 1 4 4 1 4 2 1 2 4
#> [741] 4 1 4 3 4 1 4 1 2 4 4 1 1 1 4 2 4 4 1 2 2 4 4 1 2 3 3 3 4 4 3 1 4 1 4 1 1
#> [778] 3 4 4 4 3 1 4 2 4 1 2 4 2 4 4 4 3 3 1 4 4 2 2 4 2 1 1 4 4 1 1 3 1 3 4 1 4
#> [815] 4 4 3 4 4 3 1 4 1 1 4 2 1 4 4 2 4 4 4 4 4 4 1 4 2 4 1 4 2 1 1 1 1 4 2 1 2
#> [852] 1 3 2 4 1 4 4 4 1 1 2 2 2 2 4 4 4 4
<- test
cces $trump_approval cces
#> [1] 2 4 4 4 4 1 1 4 1 4 4 1 4 4 4 4 4 1 4 4 2 2 1 4 1 4 1 4 4 2 4 4 4 4 4 4 2
#> [38] 4 1 4 2 1 1 3 3 1 1 4 4 4 1 4 4 3 2 3 4 4 4 4 4 4 4 4 4 4 4 4 4 2 1 1 1 1
#> [75] 4 1 2 4 4 3 4 4 4 2 4 4 4 4 4 4 4 3 2 4 2 4 1 1 4 1 4 4 1 1 1 4 1 1 1 4 4
#> [112] 4 1 4 4 1 4 4 4 4 1 2 4 4 4 4 4 4 1 4 3 4 1 1 4 4 3 1 1 4 4 1 2 4 4 4 4 3
#> [149] 4 3 4 4 4 1 4 4 3 1 2 2 4 4 4 1 4 4 3 4 4 4 1 1 4 3 1 1 4 4 2 4 3 4 4 4 4
#> [186] 4 4 3 3 1 4 4 2 4 4 4 1 2 3 1 4 4 4 4 4 1 1 3 4 2 4 4 4 4 1 1 2 1 3 1 4 2
#> [223] 1 4 4 1 4 2 3 4 1 4 1 4 1 4 3 1 4 4 4 2 1 4 2 1 1 4 2 4 2 4 1 4 1 4 1 4 2
#> [260] 1 4 4 3 2 1 4 3 4 3 1 2 4 4 1 3 2 2 1 1 4 2 2 1 4 4 4 1 3 4 1 4 1 1 3 4 1
#> [297] 4 2 4 4 1 1 1 1 4 1 2 1 3 1 1 3 4 1 2 4 4 3 4 4 4 3 1 2 4 4 1 4 4 4 1 4 2
#> [334] 4 4 4 4 4 4 4 2 2 1 4 2 2 1 4 1 2 3 4 2 1 3 2 2 1 2 4 1 1 3 1 4 1 1 4 4 1
#> [371] 4 3 1 1 1 4 1 4 2 3 2 4 4 1 1 1 1 4 1 4 1 2 1 1 4 1 4 1 4 1 2 4 2 2 4 4 4
#> [408] 1 2 4 4 1 1 4 1 1 1 4 2 4 2 4 1 4 4 4 1 4 4 3 4 4 4 2 4 1 3 3 4 2 4 1 4 1
#> [445] 4 1 4 4 4 1 3 1 2 3 1 3 4 1 2 1 2 3 1 4 3 4 1 2 4 1 1 2 3 2 1 4 4 4 3 4 1
#> [482] 4 4 4 1 4 1 4 4 4 2 4 4 4 2 1 1 4 3 1 2 2 1 4 2 4 4 1 1 2 4 3 1 4 4 1 3 4
#> [519] 4 1 2 4 4 4 2 4 4 2 1 2 4 2 2 2 4 4 1 3 4 4 4 2 4 1 2 1 2 1 3 4 2 4 4 2 2
#> [556] 4 2 1 1 1 4 4 4 1 4 4 1 4 2 1 2 2 2 4 1 4 1 4 2 4 4 4 1 3 2 4 1 4 4 4 1 4
#> [593] 4 4 1 4 4 4 3 4 4 1 4 4 4 4 4 1 1 4 4 4 3 2 4 4 4 4 1 4 4 3 1 4 1 4 4 4 4
#> [630] 1 4 4 2 3 1 4 4 4 4 4 4 1 4 2 4 1 1 4 1 4 1 2 4 4 4 1 1 4 4 2 4 3 4 4 1 4
#> [667] 1 4 1 2 1 4 2 2 4 4 4 4 4 4 2 4 2 3 4 2 4 1 4 4 4 2 4 2 4 1 2 4 1 4 4 4 1
#> [704] 4 4 1 4 1 1 4 2 4 1 4 4 2 2 2 1 4 4 4 4 2 4 1 2 4 4 2 4 1 4 4 1 4 2 1 2 4
#> [741] 4 1 4 3 4 1 4 1 2 4 4 1 1 1 4 2 4 4 1 2 2 4 4 1 2 3 3 3 4 4 3 1 4 1 4 1 1
#> [778] 3 4 4 4 3 1 4 2 4 1 2 4 2 4 4 4 3 3 1 4 4 2 2 4 2 1 1 4 4 1 1 3 1 3 4 1 4
#> [815] 4 4 3 4 4 3 1 4 1 1 4 2 1 4 4 2 4 4 4 4 4 4 1 4 2 4 1 4 2 1 1 1 1 4 2 1 2
#> [852] 1 3 2 4 1 4 4 4 1 1 2 2 2 2 4 4 4 4
####calculate new numeric variables
<- recode(
rec_sen1_01 $CC18_310b,
cces`1` = 0,
`5` = 0,
`2` = 1,
`3` = 1,
`4` = 1
)
<- recode(
rec_sen2_01 $CC18_310c,
cces`1` = 0,
`5` = 0,
`2` = 1,
`3` = 1,
`4` = 1
)
$rec_sen1_01 <- rec_sen1_01
cces
$rec_sen2_01 <- rec_sen2_01
cces
<- mutate(cces, know_sens = rec_sen1_01 + rec_sen2_01)
cces $know_sens cces
#> [1] 2 1 2 2 2 2 2 2 2 2 2 2 0 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [38] 2 2 0 2 2 2 0 2 2 2 0 2 1 1 0 2 2 2 2 1 2 1 2 2 2 2 1 0 0 2 2 2 2 2 2 2 2
#> [75] 0 2 2 2 1 0 0 2 1 1 2 2 2 2 2 0 2 1 2 0 2 2 1 2 1 2 0 2 2 1 2 2 2 2 2 1 2
#> [112] 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 0 2 2 2 2 2 2 0 2 2 2 2 2 1 2 2 2 2 2
#> [149] 2 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1
#> [186] 2 0 0 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 1 2
#> [223] 2 2 2 2 2 2 2 1 2 2 0 2 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 1 2 2 2
#> [260] 2 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 0
#> [297] 1 1 2 0 2 2 2 2 0 0 0 2 2 1 2 2 1 2 2 2 2 2 2 2 1 0 0 1 2 2 2 2 2 2 2 2 2
#> [334] 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2
#> [371] 2 1 2 2 2 2 0 2 2 0 2 1 2 2 1 2 2 1 2 1 2 2 2 2 2 0 2 2 2 2 1 2 2 2 2 2 2
#> [408] 2 1 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 0 2 2 2 0 2 2 2 2 2 2 1 0 2 2
#> [445] 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 0 1 2 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2
#> [482] 2 2 2 1 1 2 2 0 2 0 2 2 2 1 2 0 2 0 1 0 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2
#> [519] 2 2 2 2 2 2 2 0 2 2 2 2 2 1 2 2 2 2 2 0 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 0
#> [556] 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 1 2 1 2 2 1 2 1 2 2 2 2 2
#> [593] 2 0 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 0 2 2
#> [630] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 1 2 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 2 2
#> [667] 1 2 2 1 2 2 2 2 2 0 2 2 1 2 2 2 1 2 2 0 0 2 2 2 2 1 2 0 2 2 2 2 2 0 2 2 1
#> [704] 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 1 2 0 2 2 2 1 2 1 2 0 2 2 2 2 2 2 0 2
#> [741] 0 0 2 2 0 2 0 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 1 2 2 2 2 0 1 0 2 0 2 2 2 2 1
#> [778] 1 2 2 2 1 2 2 2 2 1 2 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2
#> [815] 2 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 0 2 2 2 2 0 2 2 2 1 2 2 2 0
#> [852] 2 2 1 1 2 2 2 2 2 2 2 2 2 0 2 2 2 2
##### reorder rows by column values
<- cces %>% arrange(gender, pid7)
sorted_by_gender_and_party
# View(sorted_by_gender_and_party)
<- cces %>% arrange(gender, desc(pid7))
sorted_by_gender_and_party
# View(sorted_by_gender_and_party)
##### add grouping to data
<- cces %>% group_by(gender, pid7)
grouped_gender_pid7 grouped_gender_pid7
#> # A tibble: 869 x 29
#> # Groups: gender, pid7 [14]
#> caseid region gender educ edloan race hispanic employ marstat pid7 ideo5
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 417614315 3 1 2 2 1 2 5 3 6 3
#> 2 415490556 1 2 6 2 1 1 1 1 2 2
#> 3 414351505 3 2 3 2 2 2 1 4 2 3
#> 4 411855339 1 2 5 2 6 2 5 3 3 1
#> 5 420208067 2 1 3 2 1 2 1 1 4 5
#> 6 412517331 1 1 2 2 1 2 5 5 5 4
#> # … with 863 more rows, and 18 more variables: pew_religimp <dbl>,
#> # newsint <dbl>, faminc_new <dbl>, union <dbl>, investor <dbl>,
#> # trump_approval <dbl>, CC18_310a <dbl>, CC18_310b <dbl>, CC18_310c <dbl>,
#> # CC18_310d <dbl>, CC18_325a <dbl>, CC18_325b <dbl>, CC18_325c <dbl>,
#> # CC18_325d <dbl>, party <chr>, rec_sen1_01 <dbl>, rec_sen2_01 <dbl>,
#> # know_sens <dbl>
###remove grouping with ungroup
#ungroup(grouped_gender_pid7)
##### summarize the data
# ?summarise
summarise(cces,
mean_pid7 = mean(pid7),
mean_faminc = mean(faminc_new))
#> # A tibble: 1 x 2
#> mean_pid7 mean_faminc
#> <dbl> <dbl>
#> 1 3.62 6.58
#####you could do the same thing with piping
#cces %>% summarise(mean_pid7=mean(pid7),mean_faminc=mean(faminc_new))
###when you summarise grouped data, you get summaries by group
<- cces %>% group_by(gender)
grouped_gender summarise(grouped_gender,
mean_pid7 = mean(pid7),
mean_faminc = mean(faminc_new))
#> # A tibble: 2 x 3
#> gender mean_pid7 mean_faminc
#> <dbl> <dbl> <dbl>
#> 1 1 3.93 7.18
#> 2 2 3.37 6.09
####combine all of this with piping if you want to look like a pro and have fewer lines of code
#cces %>% group_by(gender) %>% summarise(mean=mean(pid7))
5.6 Practices
library(tidyverse)
<- drop_na(read_csv(url("https://www.dropbox.com/s/uhfstf6g36ghxwp/cces_sample_coursera.csv?raw=1"))) dat
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> .default = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
# Problem 1
# The object "dat" created in the assignment code will import the survey data for the assignment using read_csv, thereby creating a tibble. Using that object as your data, use select() to create a new tibble that include only the columns for educational level, whether the respondent has an educational loan, employment status, and Trump approval. Display that object. Hint: consult the codebook to identify the correct column names.
#### Write your code below:
%>% head() dat
#> # A tibble: 6 x 25
#> caseid region gender educ edloan race hispanic employ marstat pid7 ideo5
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 417614315 3 1 2 2 1 2 5 3 6 3
#> 2 415490556 1 2 6 2 1 1 1 1 2 2
#> 3 414351505 3 2 3 2 2 2 1 4 2 3
#> 4 411855339 1 2 5 2 6 2 5 3 3 1
#> 5 420208067 2 1 3 2 1 2 1 1 4 5
#> 6 412517331 1 1 2 2 1 2 5 5 5 4
#> # … with 14 more variables: pew_religimp <dbl>, newsint <dbl>,
#> # faminc_new <dbl>, union <dbl>, investor <dbl>, CC18_308a <dbl>,
#> # CC18_310a <dbl>, CC18_310b <dbl>, CC18_310c <dbl>, CC18_310d <dbl>,
#> # CC18_325a <dbl>, CC18_325b <dbl>, CC18_325c <dbl>, CC18_325d <dbl>
<- dat %>% select(educ, edloan, employ, CC18_308a)
df %>% head() df
#> # A tibble: 6 x 4
#> educ edloan employ CC18_308a
#> <dbl> <dbl> <dbl> <dbl>
#> 1 2 2 5 2
#> 2 6 2 1 4
#> 3 3 2 1 4
#> 4 5 2 5 4
#> 5 3 2 1 4
#> 6 2 2 5 1
# Problem 2
# Continuing to use the new data table you created in Problem 1, use recode() to create a new column named "trump_approve_disapprove" that recodes the column for President Trump's job approval. A value of "1" should mean that the respondent either "strongly" or "somewhat" approves of the President, and a value of 0 should mean that the respondent either "strongly" or "somewhat" DISapproves of the president. Display the resulting object.
#### Write your code below:
CC18_308a Job approval by President Trump
Do you approve or disapprove of the way each is doing their job…
1 Strongly approve
2 Somewhat approve
3 Somewhat disapprove
4 Strongly disapprove
$trump_approve_disapprove <- df$CC18_308a %>%
dfrecode(`1` = 1,
`2` = 1,
`3` = 0,
`4` = 0)
%>% head() df
#> # A tibble: 6 x 5
#> educ edloan employ CC18_308a trump_approve_disapprove
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2 2 5 2 1
#> 2 6 2 1 4 0
#> 3 3 2 1 4 0
#> 4 5 2 5 4 0
#> 5 3 2 1 4 0
#> 6 2 2 5 1 1
# Problem 3
# Use summarise() to create a summary table for survey respondents who are employed full time and are married. The table should have the mean and median for the importance of religion column.
#### Write your code below:
employ: Which of the following best describes your current employment status?
1 Full-time
2 Part-time
3 Temporarily laid off
4 Unemployed
5 Retired
6 Permanently disabled
7 Homemaker
8 Student
9 Other
marstat: What is you marital status?
1 Married
2 Separated
3 Divorced
4 Widowed
5 Never married
6 Domestic/civil partnership
pew_religimp: How important is religion in yuor life?
1 Very important
2 Somewhat important
3 Not too important
4 Not at all important
%>% filter(employ == 1 & marstat == 1) %>%
dat summarise(`Mean Importance of Religion` = mean(pew_religimp),
`Median Importance of Religion` = median(pew_religimp))
#> # A tibble: 1 x 2
#> `Mean Importance of Religion` `Median Importance of Religion`
#> <dbl> <dbl>
#> 1 2.19 2