Part 5 Week 2 Asynchronous
Getting Started with Data Visualization in R > Week 2
5.1 Introduction to the tidyverse
install.packages("tidyverse")5.2 Data import and structure in the tidyverse
5.3 Filtering, selecting, recoding, renaming, and piping
5.4 Recoding, Renaming, and Calculating Columns
5.5 Grouping and summarizing data
#####Data Wrangling with the tidyverse
### install the tidyverse if you don't have it installed. You only have to do this once.
#install.packages("tidyverse")
###load the tidyverse functions #### Do this everytime you want to use tidyverse commands
library(tidyverse)
####Use read_csv instead of read.csv
#### make sure you have the file in your working directory, or use the complete file path. Use setwd() if you need to.
# setwd(")
cces <- read_csv("week2/cces_sample_coursera.csv")#> 
#> ── Column specification ─────────────────────────────────────────────
#> cols(
#>   .default = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
#### read_csv produces a tibble rather than a dataframe.
class(cces)#> [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
# vignette("tibble")
##### if you need to switch back and forth between tibble and dataframe for some reason
cces_dataframe <- as.data.frame(cces)
cces_tibble <- as_tibble(cces_dataframe)
####drop rows with missing data
cces <- drop_na(cces)
##### Use the filter function
####selects only women respondents
women <- filter(cces, gender == 2)
####remember the other logical operators
# >
# <
# <=
# >=
# &
# |
# %in%
dim(cces)#> [1] 869  25
dim(women)#> [1] 478  25
table(cces$gender)#> 
#>   1   2 
#> 391 478
women$gender#>   [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [33] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [65] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [97] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [129] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [161] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [193] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [225] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [257] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [289] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [321] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [353] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [385] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [417] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [449] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
republican_women <- filter(cces, gender == 2 & pid7 > 4)
dim(republican_women)#> [1] 154  25
table(republican_women$pid7)#> 
#>  5  6  7 
#> 28 36 90
republican_women$gender#>   [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [33] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [65] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [97] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [129] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
republican_women$pid7#>   [1] 7 5 7 5 7 7 7 7 6 7 6 6 7 7 7 7 5 7 6 5 7 6 7 5 7 5 7 6 7 7 7 7
#>  [33] 6 7 7 6 5 6 7 5 7 5 7 6 5 7 7 7 6 7 7 5 5 7 7 5 6 7 7 7 7 7 7 7
#>  [65] 5 7 6 7 5 7 7 7 7 7 7 7 5 7 7 7 7 7 7 7 6 7 7 7 7 6 6 7 6 7 6 5
#>  [97] 6 7 6 7 7 7 6 7 5 7 7 7 7 7 5 6 7 7 6 6 6 7 7 7 7 7 6 5 6 6 5 6
#> [129] 6 5 6 6 6 6 7 5 7 7 5 6 7 7 7 7 5 7 7 6 7 7 7 5 5 5
head(republican_women)#> # A tibble: 6 x 25
#>      caseid region gender  educ edloan  race hispanic employ marstat
#>       <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl>    <dbl>  <dbl>   <dbl>
#> 1 412873566      2      2     3      2     1        2      7       1
#> 2 416479672      3      2     2      2     1        2      5       4
#> 3 412985437      3      2     4      2     1        2      6       3
#> 4 413425778      4      2     3      2     1        2      1       5
#> 5 413859987      3      2     6      1     1        2      1       1
#> 6 412380857      4      2     6      1     1        2      1       1
#> # … with 16 more variables: pid7 <dbl>, ideo5 <dbl>,
#> #   pew_religimp <dbl>, newsint <dbl>, faminc_new <dbl>,
#> #   union <dbl>, investor <dbl>, CC18_308a <dbl>, CC18_310a <dbl>,
#> #   CC18_310b <dbl>, CC18_310c <dbl>, CC18_310d <dbl>,
#> #   CC18_325a <dbl>, CC18_325b <dbl>, CC18_325c <dbl>,
#> #   CC18_325d <dbl>
####select certain columns from the data
select(republican_women, "educ", "employ")#> # A tibble: 154 x 2
#>    educ employ
#>   <dbl>  <dbl>
#> 1     3      7
#> 2     2      5
#> 3     4      6
#> 4     3      1
#> 5     6      1
#> 6     6      1
#> # … with 148 more rows
####combine multiple commands using piping
#x %>% f(y) is the same as f(x, y)
#y %>% f(x, ., z) is the same as f(x, y, z)
women_republicans_educ_employ <-
  cces %>% filter(gender == 2 & pid7 > 4) %>% select("educ", "employ")
####recode variables
party <-
  recode(
    cces$pid7,
    `1` = "Democrat",
    `2` = "Democrat",
    `3` = "Democrat",
    `4` = "Independent",
    `5` = "Republican",
    `6` = "Republican",
    `7` = "Republican"
  )
cces$party <- party
####rename variables
test <- rename(cces, trump_approval = CC18_308a)
test$trump_approval#>   [1] 2 4 4 4 4 1 1 4 1 4 4 1 4 4 4 4 4 1 4 4 2 2 1 4 1 4 1 4 4 2 4 4
#>  [33] 4 4 4 4 2 4 1 4 2 1 1 3 3 1 1 4 4 4 1 4 4 3 2 3 4 4 4 4 4 4 4 4
#>  [65] 4 4 4 4 4 2 1 1 1 1 4 1 2 4 4 3 4 4 4 2 4 4 4 4 4 4 4 3 2 4 2 4
#>  [97] 1 1 4 1 4 4 1 1 1 4 1 1 1 4 4 4 1 4 4 1 4 4 4 4 1 2 4 4 4 4 4 4
#> [129] 1 4 3 4 1 1 4 4 3 1 1 4 4 1 2 4 4 4 4 3 4 3 4 4 4 1 4 4 3 1 2 2
#> [161] 4 4 4 1 4 4 3 4 4 4 1 1 4 3 1 1 4 4 2 4 3 4 4 4 4 4 4 3 3 1 4 4
#> [193] 2 4 4 4 1 2 3 1 4 4 4 4 4 1 1 3 4 2 4 4 4 4 1 1 2 1 3 1 4 2 1 4
#> [225] 4 1 4 2 3 4 1 4 1 4 1 4 3 1 4 4 4 2 1 4 2 1 1 4 2 4 2 4 1 4 1 4
#> [257] 1 4 2 1 4 4 3 2 1 4 3 4 3 1 2 4 4 1 3 2 2 1 1 4 2 2 1 4 4 4 1 3
#> [289] 4 1 4 1 1 3 4 1 4 2 4 4 1 1 1 1 4 1 2 1 3 1 1 3 4 1 2 4 4 3 4 4
#> [321] 4 3 1 2 4 4 1 4 4 4 1 4 2 4 4 4 4 4 4 4 2 2 1 4 2 2 1 4 1 2 3 4
#> [353] 2 1 3 2 2 1 2 4 1 1 3 1 4 1 1 4 4 1 4 3 1 1 1 4 1 4 2 3 2 4 4 1
#> [385] 1 1 1 4 1 4 1 2 1 1 4 1 4 1 4 1 2 4 2 2 4 4 4 1 2 4 4 1 1 4 1 1
#> [417] 1 4 2 4 2 4 1 4 4 4 1 4 4 3 4 4 4 2 4 1 3 3 4 2 4 1 4 1 4 1 4 4
#> [449] 4 1 3 1 2 3 1 3 4 1 2 1 2 3 1 4 3 4 1 2 4 1 1 2 3 2 1 4 4 4 3 4
#> [481] 1 4 4 4 1 4 1 4 4 4 2 4 4 4 2 1 1 4 3 1 2 2 1 4 2 4 4 1 1 2 4 3
#> [513] 1 4 4 1 3 4 4 1 2 4 4 4 2 4 4 2 1 2 4 2 2 2 4 4 1 3 4 4 4 2 4 1
#> [545] 2 1 2 1 3 4 2 4 4 2 2 4 2 1 1 1 4 4 4 1 4 4 1 4 2 1 2 2 2 4 1 4
#> [577] 1 4 2 4 4 4 1 3 2 4 1 4 4 4 1 4 4 4 1 4 4 4 3 4 4 1 4 4 4 4 4 1
#> [609] 1 4 4 4 3 2 4 4 4 4 1 4 4 3 1 4 1 4 4 4 4 1 4 4 2 3 1 4 4 4 4 4
#> [641] 4 1 4 2 4 1 1 4 1 4 1 2 4 4 4 1 1 4 4 2 4 3 4 4 1 4 1 4 1 2 1 4
#> [673] 2 2 4 4 4 4 4 4 2 4 2 3 4 2 4 1 4 4 4 2 4 2 4 1 2 4 1 4 4 4 1 4
#> [705] 4 1 4 1 1 4 2 4 1 4 4 2 2 2 1 4 4 4 4 2 4 1 2 4 4 2 4 1 4 4 1 4
#> [737] 2 1 2 4 4 1 4 3 4 1 4 1 2 4 4 1 1 1 4 2 4 4 1 2 2 4 4 1 2 3 3 3
#> [769] 4 4 3 1 4 1 4 1 1 3 4 4 4 3 1 4 2 4 1 2 4 2 4 4 4 3 3 1 4 4 2 2
#> [801] 4 2 1 1 4 4 1 1 3 1 3 4 1 4 4 4 3 4 4 3 1 4 1 1 4 2 1 4 4 2 4 4
#> [833] 4 4 4 4 1 4 2 4 1 4 2 1 1 1 1 4 2 1 2 1 3 2 4 1 4 4 4 1 1 2 2 2
#> [865] 2 4 4 4 4
cces <- test
cces$trump_approval#>   [1] 2 4 4 4 4 1 1 4 1 4 4 1 4 4 4 4 4 1 4 4 2 2 1 4 1 4 1 4 4 2 4 4
#>  [33] 4 4 4 4 2 4 1 4 2 1 1 3 3 1 1 4 4 4 1 4 4 3 2 3 4 4 4 4 4 4 4 4
#>  [65] 4 4 4 4 4 2 1 1 1 1 4 1 2 4 4 3 4 4 4 2 4 4 4 4 4 4 4 3 2 4 2 4
#>  [97] 1 1 4 1 4 4 1 1 1 4 1 1 1 4 4 4 1 4 4 1 4 4 4 4 1 2 4 4 4 4 4 4
#> [129] 1 4 3 4 1 1 4 4 3 1 1 4 4 1 2 4 4 4 4 3 4 3 4 4 4 1 4 4 3 1 2 2
#> [161] 4 4 4 1 4 4 3 4 4 4 1 1 4 3 1 1 4 4 2 4 3 4 4 4 4 4 4 3 3 1 4 4
#> [193] 2 4 4 4 1 2 3 1 4 4 4 4 4 1 1 3 4 2 4 4 4 4 1 1 2 1 3 1 4 2 1 4
#> [225] 4 1 4 2 3 4 1 4 1 4 1 4 3 1 4 4 4 2 1 4 2 1 1 4 2 4 2 4 1 4 1 4
#> [257] 1 4 2 1 4 4 3 2 1 4 3 4 3 1 2 4 4 1 3 2 2 1 1 4 2 2 1 4 4 4 1 3
#> [289] 4 1 4 1 1 3 4 1 4 2 4 4 1 1 1 1 4 1 2 1 3 1 1 3 4 1 2 4 4 3 4 4
#> [321] 4 3 1 2 4 4 1 4 4 4 1 4 2 4 4 4 4 4 4 4 2 2 1 4 2 2 1 4 1 2 3 4
#> [353] 2 1 3 2 2 1 2 4 1 1 3 1 4 1 1 4 4 1 4 3 1 1 1 4 1 4 2 3 2 4 4 1
#> [385] 1 1 1 4 1 4 1 2 1 1 4 1 4 1 4 1 2 4 2 2 4 4 4 1 2 4 4 1 1 4 1 1
#> [417] 1 4 2 4 2 4 1 4 4 4 1 4 4 3 4 4 4 2 4 1 3 3 4 2 4 1 4 1 4 1 4 4
#> [449] 4 1 3 1 2 3 1 3 4 1 2 1 2 3 1 4 3 4 1 2 4 1 1 2 3 2 1 4 4 4 3 4
#> [481] 1 4 4 4 1 4 1 4 4 4 2 4 4 4 2 1 1 4 3 1 2 2 1 4 2 4 4 1 1 2 4 3
#> [513] 1 4 4 1 3 4 4 1 2 4 4 4 2 4 4 2 1 2 4 2 2 2 4 4 1 3 4 4 4 2 4 1
#> [545] 2 1 2 1 3 4 2 4 4 2 2 4 2 1 1 1 4 4 4 1 4 4 1 4 2 1 2 2 2 4 1 4
#> [577] 1 4 2 4 4 4 1 3 2 4 1 4 4 4 1 4 4 4 1 4 4 4 3 4 4 1 4 4 4 4 4 1
#> [609] 1 4 4 4 3 2 4 4 4 4 1 4 4 3 1 4 1 4 4 4 4 1 4 4 2 3 1 4 4 4 4 4
#> [641] 4 1 4 2 4 1 1 4 1 4 1 2 4 4 4 1 1 4 4 2 4 3 4 4 1 4 1 4 1 2 1 4
#> [673] 2 2 4 4 4 4 4 4 2 4 2 3 4 2 4 1 4 4 4 2 4 2 4 1 2 4 1 4 4 4 1 4
#> [705] 4 1 4 1 1 4 2 4 1 4 4 2 2 2 1 4 4 4 4 2 4 1 2 4 4 2 4 1 4 4 1 4
#> [737] 2 1 2 4 4 1 4 3 4 1 4 1 2 4 4 1 1 1 4 2 4 4 1 2 2 4 4 1 2 3 3 3
#> [769] 4 4 3 1 4 1 4 1 1 3 4 4 4 3 1 4 2 4 1 2 4 2 4 4 4 3 3 1 4 4 2 2
#> [801] 4 2 1 1 4 4 1 1 3 1 3 4 1 4 4 4 3 4 4 3 1 4 1 1 4 2 1 4 4 2 4 4
#> [833] 4 4 4 4 1 4 2 4 1 4 2 1 1 1 1 4 2 1 2 1 3 2 4 1 4 4 4 1 1 2 2 2
#> [865] 2 4 4 4 4
####calculate new numeric variables
rec_sen1_01 <- recode(
  cces$CC18_310b,
  `1` = 0,
  `5` = 0,
  `2` = 1,
  `3` = 1,
  `4` = 1
)
rec_sen2_01 <- recode(
  cces$CC18_310c,
  `1` = 0,
  `5` = 0,
  `2` = 1,
  `3` = 1,
  `4` = 1
)
cces$rec_sen1_01 <- rec_sen1_01
cces$rec_sen2_01 <- rec_sen2_01
cces <- mutate(cces, know_sens = rec_sen1_01 + rec_sen2_01)
cces$know_sens#>   [1] 2 1 2 2 2 2 2 2 2 2 2 2 0 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [33] 2 2 2 2 2 2 2 0 2 2 2 0 2 2 2 0 2 1 1 0 2 2 2 2 1 2 1 2 2 2 2 1
#>  [65] 0 0 2 2 2 2 2 2 2 2 0 2 2 2 1 0 0 2 1 1 2 2 2 2 2 0 2 1 2 0 2 2
#>  [97] 1 2 1 2 0 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2
#> [129] 2 0 2 2 2 2 2 2 0 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1
#> [161] 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1 2 0 0 2 2 2 2
#> [193] 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2
#> [225] 2 2 2 2 2 1 2 2 0 2 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 1
#> [257] 2 2 2 2 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2
#> [289] 2 2 2 2 2 1 2 0 1 1 2 0 2 2 2 2 0 0 0 2 2 1 2 2 1 2 2 2 2 2 2 2
#> [321] 1 0 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2
#> [353] 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 0 2 2 0 2 1 2 2
#> [385] 1 2 2 1 2 1 2 2 2 2 2 0 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2
#> [417] 1 2 2 2 2 1 2 2 2 2 2 2 2 0 2 2 2 0 2 2 2 2 2 2 1 0 2 2 2 2 2 2
#> [449] 2 2 2 0 2 2 2 2 2 2 2 2 0 1 2 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 2 2
#> [481] 2 2 2 2 1 1 2 2 0 2 0 2 2 2 1 2 0 2 0 1 0 2 2 2 2 2 2 2 2 2 2 0
#> [513] 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 1 2 2 2 2 2 0 2 2 2 1 1 2
#> [545] 2 2 2 2 2 2 2 2 2 2 0 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2
#> [577] 2 2 2 1 2 1 2 2 1 2 1 2 2 2 2 2 2 0 2 1 2 1 2 2 2 2 2 2 2 2 2 2
#> [609] 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 1
#> [641] 2 2 2 2 2 2 1 1 2 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 1 2 2
#> [673] 2 2 2 0 2 2 1 2 2 2 1 2 2 0 0 2 2 2 2 1 2 0 2 2 2 2 2 0 2 2 1 2
#> [705] 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 1 2 0 2 2 2 1 2 1 2 0 2 2 2 2
#> [737] 2 2 0 2 0 0 2 2 0 2 0 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 1 2 2 2 2 0
#> [769] 1 0 2 0 2 2 2 2 1 1 2 2 2 1 2 2 2 2 1 2 2 2 2 1 2 2 2 2 1 1 2 2
#> [801] 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [833] 1 2 2 2 2 0 2 2 2 2 0 2 2 2 1 2 2 2 0 2 2 1 1 2 2 2 2 2 2 2 2 2
#> [865] 0 2 2 2 2
##### reorder rows by column values
sorted_by_gender_and_party <- cces %>% arrange(gender, pid7)
# View(sorted_by_gender_and_party)
sorted_by_gender_and_party <- cces %>% arrange(gender, desc(pid7))
# View(sorted_by_gender_and_party)
##### add grouping to data
grouped_gender_pid7 <- cces %>% group_by(gender, pid7)
grouped_gender_pid7#> # A tibble: 869 x 29
#> # Groups:   gender, pid7 [14]
#>      caseid region gender  educ edloan  race hispanic employ marstat
#>       <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl>    <dbl>  <dbl>   <dbl>
#> 1 417614315      3      1     2      2     1        2      5       3
#> 2 415490556      1      2     6      2     1        1      1       1
#> 3 414351505      3      2     3      2     2        2      1       4
#> 4 411855339      1      2     5      2     6        2      5       3
#> 5 420208067      2      1     3      2     1        2      1       1
#> 6 412517331      1      1     2      2     1        2      5       5
#> # … with 863 more rows, and 20 more variables: pid7 <dbl>,
#> #   ideo5 <dbl>, pew_religimp <dbl>, newsint <dbl>,
#> #   faminc_new <dbl>, union <dbl>, investor <dbl>,
#> #   trump_approval <dbl>, CC18_310a <dbl>, CC18_310b <dbl>,
#> #   CC18_310c <dbl>, CC18_310d <dbl>, CC18_325a <dbl>,
#> #   CC18_325b <dbl>, CC18_325c <dbl>, CC18_325d <dbl>, party <chr>,
#> #   rec_sen1_01 <dbl>, rec_sen2_01 <dbl>, know_sens <dbl>
###remove grouping with ungroup
#ungroup(grouped_gender_pid7)
##### summarize the data
# ?summarise
summarise(cces,
          mean_pid7 = mean(pid7),
          mean_faminc = mean(faminc_new))#> # A tibble: 1 x 2
#>   mean_pid7 mean_faminc
#>       <dbl>       <dbl>
#> 1      3.62        6.58
#####you could do the same thing with piping
#cces %>% summarise(mean_pid7=mean(pid7),mean_faminc=mean(faminc_new))
###when you summarise grouped data, you get summaries by group
grouped_gender <- cces %>% group_by(gender)
summarise(grouped_gender,
          mean_pid7 = mean(pid7),
          mean_faminc = mean(faminc_new))#> # A tibble: 2 x 3
#>   gender mean_pid7 mean_faminc
#>    <dbl>     <dbl>       <dbl>
#> 1      1      3.93        7.18
#> 2      2      3.37        6.09
####combine all of this with piping if you want to look like a pro and have fewer lines of code
#cces %>% group_by(gender) %>% summarise(mean=mean(pid7))5.6 Practices
library(tidyverse)dat <- drop_na(read_csv(url("https://www.dropbox.com/s/uhfstf6g36ghxwp/cces_sample_coursera.csv?raw=1")))#> 
#> ── Column specification ─────────────────────────────────────────────
#> cols(
#>   .default = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
# Problem 1
# The object "dat" created in the assignment code will import the survey data for the assignment using read_csv, thereby creating a tibble. Using that object as your data, use select() to create a new tibble that include only the columns for educational level, whether the respondent has an educational loan, employment status, and Trump approval. Display that object. Hint: consult the codebook to identify the correct column names.
#### Write your code below:
dat %>% head()#> # A tibble: 6 x 25
#>      caseid region gender  educ edloan  race hispanic employ marstat
#>       <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl>    <dbl>  <dbl>   <dbl>
#> 1 417614315      3      1     2      2     1        2      5       3
#> 2 415490556      1      2     6      2     1        1      1       1
#> 3 414351505      3      2     3      2     2        2      1       4
#> 4 411855339      1      2     5      2     6        2      5       3
#> 5 420208067      2      1     3      2     1        2      1       1
#> 6 412517331      1      1     2      2     1        2      5       5
#> # … with 16 more variables: pid7 <dbl>, ideo5 <dbl>,
#> #   pew_religimp <dbl>, newsint <dbl>, faminc_new <dbl>,
#> #   union <dbl>, investor <dbl>, CC18_308a <dbl>, CC18_310a <dbl>,
#> #   CC18_310b <dbl>, CC18_310c <dbl>, CC18_310d <dbl>,
#> #   CC18_325a <dbl>, CC18_325b <dbl>, CC18_325c <dbl>,
#> #   CC18_325d <dbl>
df <- dat %>% select(educ, edloan, employ, CC18_308a)
df %>% head()#> # A tibble: 6 x 4
#>    educ edloan employ CC18_308a
#>   <dbl>  <dbl>  <dbl>     <dbl>
#> 1     2      2      5         2
#> 2     6      2      1         4
#> 3     3      2      1         4
#> 4     5      2      5         4
#> 5     3      2      1         4
#> 6     2      2      5         1
# Problem 2
# Continuing to use the new data table you created in Problem 1, use recode() to create a new column named "trump_approve_disapprove" that recodes the column for President Trump's job approval. A value of "1" should mean that the respondent either "strongly" or "somewhat" approves of the President, and a value of 0 should mean that the respondent either "strongly" or "somewhat" DISapproves of the president. Display the resulting object. 
#### Write your code below:CC18_308a Job approval by President Trump
Do you approve or disapprove of the way each is doing their job…
1 Strongly approve
2 Somewhat approve
3 Somewhat disapprove
4 Strongly disapprove
df$trump_approve_disapprove <- df$CC18_308a %>% 
  recode(`1` = 1,
         `2` = 1,
         `3` = 0,
         `4` = 0)df %>% head()#> # A tibble: 6 x 5
#>    educ edloan employ CC18_308a trump_approve_disapprove
#>   <dbl>  <dbl>  <dbl>     <dbl>                    <dbl>
#> 1     2      2      5         2                        1
#> 2     6      2      1         4                        0
#> 3     3      2      1         4                        0
#> 4     5      2      5         4                        0
#> 5     3      2      1         4                        0
#> 6     2      2      5         1                        1
# Problem 3
# Use summarise() to create a summary table for survey respondents who are employed full time and are married. The table should have the mean and median for the importance of religion column.
#### Write your code below:employ: Which of the following best describes your current employment status?
1 Full-time
2 Part-time
3 Temporarily laid off
4 Unemployed
5 Retired
6 Permanently disabled
7 Homemaker
8 Student
9 Other
marstat: What is you marital status?
1 Married
2 Separated
3 Divorced
4 Widowed
5 Never married
6 Domestic/civil partnership
pew_religimp: How important is religion in yuor life?
1 Very important
2 Somewhat important
3 Not too important
4 Not at all important
dat %>% filter(employ == 1 & marstat == 1) %>% 
  summarise(`Mean Importance of Religion` = mean(pew_religimp), 
            `Median Importance of Religion` = median(pew_religimp))#> # A tibble: 1 x 2
#>   `Mean Importance of Religion` `Median Importance of Religion`
#>                           <dbl>                           <dbl>
#> 1                          2.19                               2