Chapter 8 Modeling
8.1 Correlation
To compute the correlation between two variables, we use the ‘cor()’ function:
<- fread("http://citadel.sjfc.edu/faculty/ageraci/data/ExamTimesScores.txt")
df str(df)
## Classes 'data.table' and 'data.frame': 30 obs. of 2 variables:
## $ time : int 30 41 41 43 47 48 51 54 54 56 ...
## $ score: int 100 84 94 90 88 99 85 84 94 100 ...
## - attr(*, ".internal.selfref")=<externalptr>
<- cor(score ~ time, data=df,use = "complete.obs")) (correlation
## [1] -0.125
8.2 Scatterplot
xyplot(score ~ time, data=df,
main = "Exam time vs. Score",
xlab = "Time (in minutes)",
ylab = "Score on Exam",
type = c("p", "r"),
pch = 16, cex = 1.2)
8.3 Linear Regression:
<- lm(score ~ time, data=df)) (mod
##
## Call:
## lm(formula = score ~ time, data = df)
##
## Coefficients:
## (Intercept) time
## 88.8751 -0.0996
8.4 Residual Diagnostic Graphs
xyplot(resid(mod) ~ predict(mod), xlab="Fitted Values",
ylab="Residual Values", pch = 16, cex = 1.5,
main = "Residual vs. Fitted Diagnostic Graph",
data=df)
xyplot(fitted.values ~ model$time, xlab="Observed Values",
ylab="Fitted Values",
pch = 16, cex = 1.5,
main = "Fitted vs. Actual Diagnostic Graph",
type = c("p", "r"), data=mod)
8.5 Making predictions
"Score Prediction for time = 45 minutes"
## [1] "Score Prediction for time = 45 minutes"
predict(mod, data.frame(time = 45))
## 1
## 84.393
"Score Prediction for time = 70 minutes"
## [1] "Score Prediction for time = 70 minutes"
predict(mod, data.frame(time = 70))
## 1
## 81.904