Part 9 Week 4

Data Visualization in R with ggplot2 > Week 1

9.1 Getting Started with ggplot Part 1, 2

library(tidyverse)

#####Load the data (if you want, you could do this locally from your computer rather than download from Dropbox)

cel <-
  read_csv(
    url(
      "https://www.dropbox.com/s/4ebgnkdhhxo5rac/cel_volden_wiseman%20_coursera.csv?raw=1"
    )
  )
#> 
#> ── Column specification ───────────────────────────────────────────────────────────────
#> cols(
#>   .default = col_double(),
#>   thomas_name = col_character(),
#>   st_name = col_character()
#> )
#> ℹ Use `spec()` for the full column specifications.
names(cel)
#>  [1] "thomas_num"     "thomas_name"    "icpsr"          "congress"      
#>  [5] "year"           "st_name"        "cd"             "dem"           
#>  [9] "elected"        "female"         "votepct"        "dwnom1"        
#> [13] "deleg_size"     "speaker"        "subchr"         "afam"          
#> [17] "latino"         "votepct_sq"     "power"          "chair"         
#> [21] "state_leg"      "state_leg_prof" "majority"       "maj_leader"    
#> [25] "min_leader"     "meddist"        "majdist"        "all_bills"     
#> [29] "all_aic"        "all_abc"        "all_pass"       "all_law"       
#> [33] "les"            "seniority"      "benchmark"      "expectation"   
#> [37] "TotalInParty"   "RankInParty"
dim(cel)
#> [1] 10262    38
table(cel$year)
#> 
#> 1973 1975 1977 1979 1981 1983 1985 1987 1989 1991 1993 1995 1997 1999 2001 2003 2005 
#>  444  444  443  442  447  444  445  446  449  447  446  445  449  442  447  444  445 
#> 2007 2009 2011 2013 2015 2017 
#>  452  451  449  450  443  448
summary(cel$all_bills)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>     0.0     7.0    12.0    16.8    21.0   258.0
#for making a scatterplot

####filter the data we want
fig115 <- filter(cel, congress == 115)
fig115 <- select(fig115, "seniority", "all_pass")

####these commands do the same thing as above, just with piping
fig115 <-
  cel %>% filter(congress == 115) %>% select("seniority", "all_pass")

###check to make sure the filter worked properly
head(fig115)
#> # A tibble: 6 x 2
#>   seniority all_pass
#>       <dbl>    <dbl>
#> 1         2        1
#> 2         3        2
#> 3        11        0
#> 4         2        3
#> 5         2        1
#> 6         4        1
####set up the data and aesthetics
ggplot(fig115, aes(x = seniority, y = all_pass))

####add the marks
ggplot(fig115, aes(x = seniority, y = all_pass)) +
  geom_point()

####jitter adds random noise to the data to avoid overplotting
ggplot(fig115, aes(x = seniority, y = all_pass)) +
  geom_jitter()

####add some labels and a title
ggplot(fig115, aes(x = seniority, y = all_pass)) +
  geom_jitter() +
  labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress")

#####modify filter and select to grab "dem"

fig115 <- cel %>%
  filter(congress == 115) %>%
  select("seniority", "all_pass", "dem")

fig115$dem
#>   [1] 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 0 0 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 0 0 0 1
#>  [42] 1 0 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 1 1 0 0 0 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 1 1 0 1
#>  [83] 1 1 0 1 0 0 1 1 1 0 1 0 0 0 1 0 1 1 1 1 0 1 1 1 1 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 1
#> [124] 1 1 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0
#> [165] 1 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0
#> [206] 0 1 0 1 0 1 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1
#> [247] 1 0 0 0 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0 1 1 0 0 0 0 1 1
#> [288] 1 0 1 0 1 1 1 0 0 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 1 0 1 1 0
#> [329] 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0 1 1 1 0 1 0 1 1 1 1 1
#> [370] 0 0 1 1 0 1 0 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 1 0 0 0 1 1 1
#> [411] 0 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0
ggplot(fig115, aes(x = seniority, y = all_pass, color = dem)) +
  geom_jitter() +
  labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress")

####colors are strange, let's fix

#####make dem a categorical variable called "party"
party <- recode(fig115$dem, `1` = "Democrat", `0` = "Republican")

fig115 <- add_column(fig115, party)

fig115$party
#>   [1] "Republican" "Democrat"   "Republican" "Democrat"   "Republican" "Republican"
#>   [7] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#>  [13] "Republican" "Democrat"   "Republican" "Democrat"   "Democrat"   "Democrat"  
#>  [19] "Republican" "Democrat"   "Republican" "Republican" "Republican" "Republican"
#>  [25] "Democrat"   "Republican" "Republican" "Republican" "Democrat"   "Democrat"  
#>  [31] "Democrat"   "Democrat"   "Republican" "Democrat"   "Republican" "Democrat"  
#>  [37] "Republican" "Republican" "Republican" "Republican" "Democrat"   "Democrat"  
#>  [43] "Republican" "Republican" "Republican" "Republican" "Republican" "Democrat"  
#>  [49] "Democrat"   "Republican" "Republican" "Democrat"   "Democrat"   "Democrat"  
#>  [55] "Democrat"   "Republican" "Republican" "Democrat"   "Democrat"   "Democrat"  
#>  [61] "Republican" "Republican" "Republican" "Democrat"   "Democrat"   "Democrat"  
#>  [67] "Democrat"   "Democrat"   "Democrat"   "Democrat"   "Republican" "Democrat"  
#>  [73] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#>  [79] "Democrat"   "Democrat"   "Republican" "Democrat"   "Democrat"   "Democrat"  
#>  [85] "Republican" "Democrat"   "Republican" "Republican" "Democrat"   "Democrat"  
#>  [91] "Democrat"   "Republican" "Democrat"   "Republican" "Republican" "Republican"
#>  [97] "Democrat"   "Republican" "Democrat"   "Democrat"   "Democrat"   "Democrat"  
#> [103] "Republican" "Democrat"   "Democrat"   "Democrat"   "Democrat"   "Republican"
#> [109] "Republican" "Republican" "Democrat"   "Republican" "Democrat"   "Democrat"  
#> [115] "Republican" "Democrat"   "Republican" "Republican" "Republican" "Republican"
#> [121] "Democrat"   "Republican" "Democrat"   "Democrat"   "Democrat"   "Republican"
#> [127] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Republican"
#> [133] "Republican" "Republican" "Republican" "Democrat"   "Republican" "Democrat"  
#> [139] "Republican" "Republican" "Democrat"   "Democrat"   "Republican" "Republican"
#> [145] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Republican"
#> [151] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Democrat"  
#> [157] "Republican" "Republican" "Republican" "Republican" "Republican" "Democrat"  
#> [163] "Democrat"   "Republican" "Democrat"   "Republican" "Republican" "Democrat"  
#> [169] "Democrat"   "Republican" "Republican" "Republican" "Republican" "Democrat"  
#> [175] "Democrat"   "Republican" "Republican" "Republican" "Democrat"   "Republican"
#> [181] "Republican" "Democrat"   "Republican" "Republican" "Democrat"   "Republican"
#> [187] "Democrat"   "Republican" "Republican" "Republican" "Republican" "Republican"
#> [193] "Democrat"   "Democrat"   "Democrat"   "Republican" "Republican" "Republican"
#> [199] "Democrat"   "Democrat"   "Republican" "Republican" "Democrat"   "Republican"
#> [205] "Republican" "Republican" "Democrat"   "Republican" "Democrat"   "Republican"
#> [211] "Democrat"   "Republican" "Democrat"   "Democrat"   "Democrat"   "Democrat"  
#> [217] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Republican"
#> [223] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Republican"
#> [229] "Democrat"   "Republican" "Republican" "Democrat"   "Democrat"   "Democrat"  
#> [235] "Republican" "Democrat"   "Democrat"   "Democrat"   "Republican" "Democrat"  
#> [241] "Republican" "Democrat"   "Democrat"   "Democrat"   "Republican" "Democrat"  
#> [247] "Democrat"   "Republican" "Republican" "Republican" "Democrat"   "Democrat"  
#> [253] "Republican" "Republican" "Democrat"   "Democrat"   "Democrat"   "Republican"
#> [259] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Republican"
#> [265] "Republican" "Democrat"   "Republican" "Republican" "Republican" "Democrat"  
#> [271] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Democrat"  
#> [277] "Republican" "Republican" "Republican" "Democrat"   "Democrat"   "Republican"
#> [283] "Republican" "Republican" "Republican" "Democrat"   "Democrat"   "Democrat"  
#> [289] "Republican" "Democrat"   "Republican" "Democrat"   "Democrat"   "Democrat"  
#> [295] "Republican" "Republican" "Democrat"   "Democrat"   "Republican" "Democrat"  
#> [301] "Republican" "Democrat"   "Democrat"   "Republican" "Republican" "Democrat"  
#> [307] "Republican" "Democrat"   "Democrat"   "Republican" "Democrat"   "Republican"
#> [313] "Democrat"   "Democrat"   "Republican" "Democrat"   "Democrat"   "Democrat"  
#> [319] "Republican" "Democrat"   "Democrat"   "Republican" "Republican" "Democrat"  
#> [325] "Republican" "Democrat"   "Democrat"   "Republican" "Democrat"   "Republican"
#> [331] "Republican" "Republican" "Republican" "Democrat"   "Republican" "Democrat"  
#> [337] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#> [343] "Republican" "Republican" "Republican" "Democrat"   "Republican" "Republican"
#> [349] "Republican" "Republican" "Democrat"   "Republican" "Democrat"   "Democrat"  
#> [355] "Democrat"   "Republican" "Republican" "Republican" "Democrat"   "Democrat"  
#> [361] "Democrat"   "Republican" "Democrat"   "Republican" "Democrat"   "Democrat"  
#> [367] "Democrat"   "Democrat"   "Democrat"   "Republican" "Republican" "Democrat"  
#> [373] "Democrat"   "Republican" "Democrat"   "Republican" "Democrat"   "Democrat"  
#> [379] "Democrat"   "Republican" "Republican" "Republican" "Democrat"   "Democrat"  
#> [385] "Democrat"   "Democrat"   "Republican" "Republican" "Republican" "Republican"
#> [391] "Republican" "Democrat"   "Democrat"   "Republican" "Republican" "Republican"
#> [397] "Democrat"   "Democrat"   "Democrat"   "Republican" "Republican" "Democrat"  
#> [403] "Republican" "Democrat"   "Republican" "Republican" "Republican" "Democrat"  
#> [409] "Democrat"   "Democrat"   "Republican" "Democrat"   "Republican" "Republican"
#> [415] "Republican" "Democrat"   "Democrat"   "Democrat"   "Democrat"   "Democrat"  
#> [421] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#> [427] "Democrat"   "Democrat"   "Democrat"   "Democrat"   "Republican" "Republican"
#> [433] "Democrat"   "Republican" "Republican" "Democrat"   "Republican" "Democrat"  
#> [439] "Republican" "Republican" "Republican" "Republican" "Democrat"   "Republican"
#> [445] "Republican" "Republican" "Republican" "Republican"
ggplot(fig115, aes(x = seniority, y = all_pass, color = party)) +
  geom_jitter() +
  labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress")

####let's make the colors match traditional blue democrats and red republicans

ggplot(fig115, aes(x = seniority, y = all_pass, color = party)) +
  geom_jitter() +
  labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress") +
  scale_color_manual(values = c("blue", "red"))

#####make two separate plots using facet_wrap

ggplot(fig115, aes(x = seniority, y = all_pass, color = party)) +
  geom_jitter() +
  labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress") +
  scale_color_manual(values = c("blue", "red")) +
  facet_wrap( ~ party)

9.2 Distributions

library(tidyverse)

cces <- read_csv("week4/cces.csv")
#> 
#> ── Column specification ───────────────────────────────────────────────────────────────
#> cols(
#>   .default = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
#####boxplots

###make a basic boxplot
ggplot(cces, aes(y = faminc_new)) + geom_boxplot()

####break up boxplots by education group -- add a aesthetic mapping for group
ggplot(cces, aes(y = faminc_new, group = educ)) +
  geom_boxplot()

####add labels and a title
ggplot(cces, aes(y = faminc_new, group = educ)) +
  geom_boxplot() +
  labs(x = "Income Level", y = "Family Income", title = "Family Inc. by Respondent Ed. Level")

####reformat the data to create a dichotomous categorical variable for four-year college grads or more, and then all respondents with 2 year college degrees or less

cces$educ_category <-
  recode(
    cces$educ,
    `1` = "<4 yr Degree",
    `2` = "<4 yr Degree",
    `3` = "<4 yr Degree",
    `4` = "<4 yr Degree",
    `5` = "4 yr Deg. +",
    `6` = "4 yr Deg. +"
  )

###make sure you change the aesthetic mapping so the new categorical variable is mapped to "x" rather than "group"

ggplot(cces, aes(y = faminc_new, x = educ_category)) +
  geom_boxplot() +
  labs(x = "Income Level", y = "Family Income", title = "Family Inc. by Respondent Ed. Level")

####make a histogram
ggplot(cces, aes(x = faminc_new)) +
  geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

####make a density plot
ggplot(cces, aes(x = faminc_new)) +
  geom_density()