Chapter 9 Appendix
9.1 Graphs
9.1.1 Lahman
%>%
Batting group_by(yearID) %>%
summarize(totalHR = sum(HR)) %>%
filter(yearID >= 1990) %>%
ggplot(aes(x = yearID, y = totalHR, fill = factor(yearID))) +
geom_col() +
labs(title = "Total Home Runs from 1990-2021", x = "Year", y = "Total HRs") +
theme_bw() +
theme(text = element_text(family = "serif")) +
theme(legend.position = "none")
%>%
Pitching group_by(yearID) %>%
filter(yearID >= 1921, !is.na(ERA)) %>%
summarize(avgERA = mean(ERA)) %>%
ggplot(aes(x = yearID, y = avgERA)) +
geom_line(size = 1, color = "dodgerblue") +
labs(title = "Average ERA in the Last 100 Years",
x = "Year", y = "Average ERA") +
theme_bw() +
theme(text = element_text(family = "serif"))
%>%
Fielding filter(yearID >= 2012) %>%
ggplot(aes(x = factor(POS), y = DP)) +
geom_jitter(alpha = 0.4, aes(color = factor(yearID)),
position = position_jitter(0.2)) +
labs(title = "Number of Double Plays by Position",
x = "Position", y = "Double Plays", color = "Year") +
theme_bw() +
theme(text = element_text(family = "serif"))
%>%
Salaries group_by(yearID) %>%
summarize(Low = min(salary), Median = median(salary),
Mean = mean(salary), High = max(salary)) %>%
ggplot(aes(x = yearID)) +
geom_ribbon(aes(ymin = Low, ymax = High),
fill = "lightblue", color = "black", alpha = 0.5) +
geom_line(aes(y = Median), color = "black", linetype = "dashed") +
geom_line(aes(y = Mean), color = "black", linetype = "dotted") +
scale_y_continuous(labels = scales::label_dollar()) +
labs(title = "MLB Salary Range Over Time",
subtitle = "Median = Dashed Line, Mean = Dotted Line",
y = "Salary",
x = "Season") +
theme_bw() +
theme(text = element_text(family = "serif"))
9.1.2 FanGraphs
%>%
fg_update ggplot(aes(x = BB_rate, y = K_rate, label = Team, fill = WAR)) +
geom_label(alpha = 0.6, size = 3, family = "serif") +
labs(title = "K% and BB% For Each MLB Team",
x = "Walk Rate(%)", y = "Strikeout Rate(%)") +
scale_fill_viridis_c() +
theme_bw() +
theme(text = element_text(family = "serif"))
%>%
judge_filtered ggplot(aes(x = EV, LA, color = factor(RBI))) +
geom_point(alpha = 0.6) +
labs(title = "Aaron Judge Batting",
x = "Exit Velocity", y = "Launch Angle", color = "RBI") +
scale_color_viridis_d() +
theme_bw() +
theme(text = element_text(family = "serif"))
# pl_filtered %>%
# ggplot(aes(x = factor(Season), y = ERA)) +
# geom_jitter(alpha = 0.6, aes(color = Age),
# position=position_jitter(0.2)) +
# labs(title = "Pitchers", x = "Season") +
# scale_color_viridis_b() +
# theme_bw() +
# theme(text = element_text(family = "serif"))
9.1.3 Statcast
%>%
sc_download ggplot(aes(x = xwoba)) +
geom_density(fill = "cornflowerblue", alpha = .5) +
theme_bw() +
theme(text = element_text(family = "serif"))
%>%
judge_sc group_by(description, pitch_type) %>%
summarize(count = n()) %>%
ggplot(aes(x = description, y = pitch_type, fill = count)) +
geom_tile() +
labs(x = "Description", y = "Pitch Type", color = "Release Speed") +
scale_fill_viridis_b() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(text = element_text(family = "serif"))
%>%
sc_lead_evb group_by(player_id) %>%
ggplot(aes(x = max_distance, y = max_hit_speed, color = avg_distance)) +
geom_point(alpha = .5, aes(size = avg_hit_speed)) +
geom_label(data = sc_lead_evb %>%
filter(max_distance %in% range(max_distance) |
%in% range(max_hit_speed)),
max_hit_speed aes(label = `last_name, first_name`), size = 3,
show.legend = FALSE, family = "serif") +
labs(title = "Hitting Leaders", x = "Max Distance", y = "Max Hit Speed",
color = "Average Distance", size = "Average Hit Speed") +
scale_color_viridis_b() +
theme_bw() +
theme(text = element_text(family = "serif"))
%>%
sc_lead_exp ggplot(aes(x = ba, y = est_ba, color = pa)) +
geom_point(alpha = .5) +
geom_abline() +
labs(x = "Batting Average", y = "Expected Batting Average",
color = "Plate Appearances") +
scale_color_viridis_b() +
theme_bw() +
theme(text = element_text(family = "serif"))
9.1.4 Restructuring Data
%>%
join_all_keys ggplot(aes(x = IPouts, y = salary, color = IPouts)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(title = "Pitcher Salary by Outs Pitched",
x = "Outs Pitched", y = "Salary") +
scale_color_viridis_c() +
scale_y_continuous(labels = scales::label_dollar()) +
theme_bw() +
theme(legend.position = "none") +
theme(text = element_text(family = "serif"))
9.1.5 Modeling
%>%
team_pred ggplot(aes(x = predict(model2), y = R)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, color = "cornflowerblue", size = 1) +
labs(x = "Predicted Runs", y = "Actual Runs") +
theme_bw() +
theme(text = element_text(family = "serif"))
ggplot(all_draft, aes(x = OvPck, y = prediction)) +
geom_line(linewidth = 1) +
theme_bw() +
lims(y = c(0, 1)) +
labs(title = "Chances a First Round Draft Pick Achieves Positive WAR",
caption = "Data collected from Baseball Reference (2004-2013)",
x = "Overall Pick Number",
y = "Predicted Probability")
ggplot(all_draft, aes(x = OvPck, y = prediction2, color = Type)) +
geom_line(linewidth = 1) +
theme_bw() +
lims(y = c(0, 1)) +
labs(title = "Chances a First Round Draft Pick Achieves Positive WAR by Draft Type",
caption = "Data collected from Baseball Reference (2004-2013)",
x = "Overall Pick Number",
y = "Predicted Probability")
9.2 Functions
9.2.1 Data Scraping and Functions
<- function(year, round) {
scrape_draft
require(rvest)
<- paste0("https://www.baseball-reference.com/draft/?year_ID=",
url
year,"&draft_round=",
round,"&draft_type=junreg&query_type=year_round&from_type_jc=0&from_type_hs=0&from_type_4y=0&from_type_unk=0")
<- url %>%
data read_html()
<- data %>%
draft_data html_element("table") %>%
html_table()
draft_data }
9.2.2 Visualizations
<- function(top = 3.75, bottom = 1.5, linecolor = "gray60"){
geom_zone geom_rect(xmin = -.7083, xmax = .7083, ymin = bottom, ymax = top,
alpha = 0, color = linecolor, linewidth = 1.5)
}
<- function(){
geom_plate <- data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, -.25, -.5, -.25))
df <- geom_polygon(data = df, aes(x = x, y = y), fill = "white", color = "gray60", linewidth = 1.25)
g
g }
<- function(...) {
geom_spray ggplot(...) +
geom_curve(x = 33, xend = 223, y = -100, yend = -100, curvature = -.65) +
geom_segment(x = 128, xend = 33, y = -208, yend = -100) +
geom_segment(x = 128, xend = 223, y = -208, yend = -100) +
geom_curve(x = 83, xend = 173, y = -155, yend = -156, curvature = -.65, linetype = "dotted") +
coord_fixed() +
scale_x_continuous(NULL, limits = c(25, 225)) +
scale_y_continuous(NULL, limits = c(-225, -25))
}