Part 9 Week 4
Data Visualization in R with ggplot2 > Week 1
9.1 Getting Started with ggplot Part 1, 2
library(tidyverse)
#####Load the data (if you want, you could do this locally from your computer rather than download from Dropbox)
<-
cel read_csv(
url(
"https://www.dropbox.com/s/4ebgnkdhhxo5rac/cel_volden_wiseman%20_coursera.csv?raw=1"
) )
#>
#> ── Column specification ───────────────────────────────────────────────────────────────
#> cols(
#> .default = col_double(),
#> thomas_name = col_character(),
#> st_name = col_character()
#> )
#> ℹ Use `spec()` for the full column specifications.
names(cel)
#> [1] "thomas_num" "thomas_name" "icpsr" "congress"
#> [5] "year" "st_name" "cd" "dem"
#> [9] "elected" "female" "votepct" "dwnom1"
#> [13] "deleg_size" "speaker" "subchr" "afam"
#> [17] "latino" "votepct_sq" "power" "chair"
#> [21] "state_leg" "state_leg_prof" "majority" "maj_leader"
#> [25] "min_leader" "meddist" "majdist" "all_bills"
#> [29] "all_aic" "all_abc" "all_pass" "all_law"
#> [33] "les" "seniority" "benchmark" "expectation"
#> [37] "TotalInParty" "RankInParty"
dim(cel)
#> [1] 10262 38
table(cel$year)
#>
#> 1973 1975 1977 1979 1981 1983 1985 1987 1989 1991 1993 1995 1997 1999 2001 2003 2005
#> 444 444 443 442 447 444 445 446 449 447 446 445 449 442 447 444 445
#> 2007 2009 2011 2013 2015 2017
#> 452 451 449 450 443 448
summary(cel$all_bills)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.0 7.0 12.0 16.8 21.0 258.0
#for making a scatterplot
####filter the data we want
<- filter(cel, congress == 115)
fig115 <- select(fig115, "seniority", "all_pass")
fig115
####these commands do the same thing as above, just with piping
<-
fig115 %>% filter(congress == 115) %>% select("seniority", "all_pass")
cel
###check to make sure the filter worked properly
head(fig115)
#> # A tibble: 6 x 2
#> seniority all_pass
#> <dbl> <dbl>
#> 1 2 1
#> 2 3 2
#> 3 11 0
#> 4 2 3
#> 5 2 1
#> 6 4 1
####set up the data and aesthetics
ggplot(fig115, aes(x = seniority, y = all_pass))
####add the marks
ggplot(fig115, aes(x = seniority, y = all_pass)) +
geom_point()
####jitter adds random noise to the data to avoid overplotting
ggplot(fig115, aes(x = seniority, y = all_pass)) +
geom_jitter()
####add some labels and a title
ggplot(fig115, aes(x = seniority, y = all_pass)) +
geom_jitter() +
labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress")
#####modify filter and select to grab "dem"
<- cel %>%
fig115 filter(congress == 115) %>%
select("seniority", "all_pass", "dem")
$dem fig115
#> [1] 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 0 0 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 0 0 0 1
#> [42] 1 0 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 1 1 0 0 0 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 1 1 0 1
#> [83] 1 1 0 1 0 0 1 1 1 0 1 0 0 0 1 0 1 1 1 1 0 1 1 1 1 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 1
#> [124] 1 1 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0
#> [165] 1 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0
#> [206] 0 1 0 1 0 1 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1
#> [247] 1 0 0 0 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0 1 1 0 0 0 0 1 1
#> [288] 1 0 1 0 1 1 1 0 0 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 1 0 1 1 0
#> [329] 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0 1 1 1 0 1 0 1 1 1 1 1
#> [370] 0 0 1 1 0 1 0 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 1 0 0 0 1 1 1
#> [411] 0 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0
ggplot(fig115, aes(x = seniority, y = all_pass, color = dem)) +
geom_jitter() +
labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress")
####colors are strange, let's fix
#####make dem a categorical variable called "party"
<- recode(fig115$dem, `1` = "Democrat", `0` = "Republican")
party
<- add_column(fig115, party)
fig115
$party fig115
#> [1] "Republican" "Democrat" "Republican" "Democrat" "Republican" "Republican"
#> [7] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#> [13] "Republican" "Democrat" "Republican" "Democrat" "Democrat" "Democrat"
#> [19] "Republican" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [25] "Democrat" "Republican" "Republican" "Republican" "Democrat" "Democrat"
#> [31] "Democrat" "Democrat" "Republican" "Democrat" "Republican" "Democrat"
#> [37] "Republican" "Republican" "Republican" "Republican" "Democrat" "Democrat"
#> [43] "Republican" "Republican" "Republican" "Republican" "Republican" "Democrat"
#> [49] "Democrat" "Republican" "Republican" "Democrat" "Democrat" "Democrat"
#> [55] "Democrat" "Republican" "Republican" "Democrat" "Democrat" "Democrat"
#> [61] "Republican" "Republican" "Republican" "Democrat" "Democrat" "Democrat"
#> [67] "Democrat" "Democrat" "Democrat" "Democrat" "Republican" "Democrat"
#> [73] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#> [79] "Democrat" "Democrat" "Republican" "Democrat" "Democrat" "Democrat"
#> [85] "Republican" "Democrat" "Republican" "Republican" "Democrat" "Democrat"
#> [91] "Democrat" "Republican" "Democrat" "Republican" "Republican" "Republican"
#> [97] "Democrat" "Republican" "Democrat" "Democrat" "Democrat" "Democrat"
#> [103] "Republican" "Democrat" "Democrat" "Democrat" "Democrat" "Republican"
#> [109] "Republican" "Republican" "Democrat" "Republican" "Democrat" "Democrat"
#> [115] "Republican" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [121] "Democrat" "Republican" "Democrat" "Democrat" "Democrat" "Republican"
#> [127] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [133] "Republican" "Republican" "Republican" "Democrat" "Republican" "Democrat"
#> [139] "Republican" "Republican" "Democrat" "Democrat" "Republican" "Republican"
#> [145] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [151] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Democrat"
#> [157] "Republican" "Republican" "Republican" "Republican" "Republican" "Democrat"
#> [163] "Democrat" "Republican" "Democrat" "Republican" "Republican" "Democrat"
#> [169] "Democrat" "Republican" "Republican" "Republican" "Republican" "Democrat"
#> [175] "Democrat" "Republican" "Republican" "Republican" "Democrat" "Republican"
#> [181] "Republican" "Democrat" "Republican" "Republican" "Democrat" "Republican"
#> [187] "Democrat" "Republican" "Republican" "Republican" "Republican" "Republican"
#> [193] "Democrat" "Democrat" "Democrat" "Republican" "Republican" "Republican"
#> [199] "Democrat" "Democrat" "Republican" "Republican" "Democrat" "Republican"
#> [205] "Republican" "Republican" "Democrat" "Republican" "Democrat" "Republican"
#> [211] "Democrat" "Republican" "Democrat" "Democrat" "Democrat" "Democrat"
#> [217] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [223] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [229] "Democrat" "Republican" "Republican" "Democrat" "Democrat" "Democrat"
#> [235] "Republican" "Democrat" "Democrat" "Democrat" "Republican" "Democrat"
#> [241] "Republican" "Democrat" "Democrat" "Democrat" "Republican" "Democrat"
#> [247] "Democrat" "Republican" "Republican" "Republican" "Democrat" "Democrat"
#> [253] "Republican" "Republican" "Democrat" "Democrat" "Democrat" "Republican"
#> [259] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [265] "Republican" "Democrat" "Republican" "Republican" "Republican" "Democrat"
#> [271] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Democrat"
#> [277] "Republican" "Republican" "Republican" "Democrat" "Democrat" "Republican"
#> [283] "Republican" "Republican" "Republican" "Democrat" "Democrat" "Democrat"
#> [289] "Republican" "Democrat" "Republican" "Democrat" "Democrat" "Democrat"
#> [295] "Republican" "Republican" "Democrat" "Democrat" "Republican" "Democrat"
#> [301] "Republican" "Democrat" "Democrat" "Republican" "Republican" "Democrat"
#> [307] "Republican" "Democrat" "Democrat" "Republican" "Democrat" "Republican"
#> [313] "Democrat" "Democrat" "Republican" "Democrat" "Democrat" "Democrat"
#> [319] "Republican" "Democrat" "Democrat" "Republican" "Republican" "Democrat"
#> [325] "Republican" "Democrat" "Democrat" "Republican" "Democrat" "Republican"
#> [331] "Republican" "Republican" "Republican" "Democrat" "Republican" "Democrat"
#> [337] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#> [343] "Republican" "Republican" "Republican" "Democrat" "Republican" "Republican"
#> [349] "Republican" "Republican" "Democrat" "Republican" "Democrat" "Democrat"
#> [355] "Democrat" "Republican" "Republican" "Republican" "Democrat" "Democrat"
#> [361] "Democrat" "Republican" "Democrat" "Republican" "Democrat" "Democrat"
#> [367] "Democrat" "Democrat" "Democrat" "Republican" "Republican" "Democrat"
#> [373] "Democrat" "Republican" "Democrat" "Republican" "Democrat" "Democrat"
#> [379] "Democrat" "Republican" "Republican" "Republican" "Democrat" "Democrat"
#> [385] "Democrat" "Democrat" "Republican" "Republican" "Republican" "Republican"
#> [391] "Republican" "Democrat" "Democrat" "Republican" "Republican" "Republican"
#> [397] "Democrat" "Democrat" "Democrat" "Republican" "Republican" "Democrat"
#> [403] "Republican" "Democrat" "Republican" "Republican" "Republican" "Democrat"
#> [409] "Democrat" "Democrat" "Republican" "Democrat" "Republican" "Republican"
#> [415] "Republican" "Democrat" "Democrat" "Democrat" "Democrat" "Democrat"
#> [421] "Republican" "Republican" "Republican" "Republican" "Republican" "Republican"
#> [427] "Democrat" "Democrat" "Democrat" "Democrat" "Republican" "Republican"
#> [433] "Democrat" "Republican" "Republican" "Democrat" "Republican" "Democrat"
#> [439] "Republican" "Republican" "Republican" "Republican" "Democrat" "Republican"
#> [445] "Republican" "Republican" "Republican" "Republican"
ggplot(fig115, aes(x = seniority, y = all_pass, color = party)) +
geom_jitter() +
labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress")
####let's make the colors match traditional blue democrats and red republicans
ggplot(fig115, aes(x = seniority, y = all_pass, color = party)) +
geom_jitter() +
labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress") +
scale_color_manual(values = c("blue", "red"))
#####make two separate plots using facet_wrap
ggplot(fig115, aes(x = seniority, y = all_pass, color = party)) +
geom_jitter() +
labs(x = "Seniority", y = "Bills Passed", title = "Seniority and Bills Passed in the 115th Congress") +
scale_color_manual(values = c("blue", "red")) +
facet_wrap( ~ party)
9.2 Distributions
library(tidyverse)
<- read_csv("week4/cces.csv") cces
#>
#> ── Column specification ───────────────────────────────────────────────────────────────
#> cols(
#> .default = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
#####boxplots
###make a basic boxplot
ggplot(cces, aes(y = faminc_new)) + geom_boxplot()
####break up boxplots by education group -- add a aesthetic mapping for group
ggplot(cces, aes(y = faminc_new, group = educ)) +
geom_boxplot()
####add labels and a title
ggplot(cces, aes(y = faminc_new, group = educ)) +
geom_boxplot() +
labs(x = "Income Level", y = "Family Income", title = "Family Inc. by Respondent Ed. Level")
####reformat the data to create a dichotomous categorical variable for four-year college grads or more, and then all respondents with 2 year college degrees or less
$educ_category <-
ccesrecode(
$educ,
cces`1` = "<4 yr Degree",
`2` = "<4 yr Degree",
`3` = "<4 yr Degree",
`4` = "<4 yr Degree",
`5` = "4 yr Deg. +",
`6` = "4 yr Deg. +"
)
###make sure you change the aesthetic mapping so the new categorical variable is mapped to "x" rather than "group"
ggplot(cces, aes(y = faminc_new, x = educ_category)) +
geom_boxplot() +
labs(x = "Income Level", y = "Family Income", title = "Family Inc. by Respondent Ed. Level")
####make a histogram
ggplot(cces, aes(x = faminc_new)) +
geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
####make a density plot
ggplot(cces, aes(x = faminc_new)) +
geom_density()