# attach the packages `dplyr` and `readr`
# use `read_csv()` to import the dataset and assign the data to `Titanic_2`
# get an overview over the data and drop `Name`
# change the column names
# attach the package `corrplot`
# check correlations using `corrplot()`
# attach the packages `dplyr` and `readr`
library(readr)
library(dplyr)
# use `read_csv()` to import the dataset and assign the data to `Titanic_2`
Titanic_2 <- read_csv("https://stanford.io/2O9RUCF")
# get an overview over the data and drop `Name`
summary(Titanic_2)
# or
str(Titanic_2)
# or
head(Titanic_2)
Titanic_2 <- Titanic_2[, -3]
# change the column names
colnames(Titanic_2) <- c("Survived", "Class", "Sex", "Age", "Siblings", "Parents", "Fare")
# attach the package `corrplot`
library(corrplot)
# check correlations using `corrplot()`
corrplot(cor(select_if(Titanic_2, is.numeric)))
# (the highest correlation is between fare and passenger class)
test_object("Titanic_2")
test_or({
test_function("summary", args = "object")
},{
test_function("head", args = "x")
},{
test_function("str", args = "object")
})
test_function("library")
test_function("corrplot", args = "corr")
success_msg("Correct. Unsurprisingly, the highest correlation (-0.55) is between Fare and Class so collinearity is not an issue here.")