William’s Portfolio
2023-12-31
Chapter 1 Model Analysis
# Load necessary libraries
library(class)
library(rpart)
library(e1071)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
# Load the Iris dataset
data(iris)
# Function to evaluate model accuracy
<- function(model, test_data) {
evaluate_model <- as.character(predict(model, test_data[, 1:4]))
predicted_labels <- sum(predicted_labels == test_data$Species) / nrow(test_data)
accuracy return(accuracy)
}
# Function for bootstrapping
<- function(model, data, num_bootstrap) {
bootstrapped_accuracy <- numeric(num_bootstrap)
accuracies for (i in 1:num_bootstrap) {
# Create a bootstrap sample
<- sample(nrow(data), replace = TRUE)
bootstrap_indices <- data[bootstrap_indices, ]
bootstrap_sample
# Train the model on the bootstrap sample
if (model == "knn") {
<- knn(train = bootstrap_sample[, 1:4], test = data[-bootstrap_indices, 1:4], cl = bootstrap_sample$Species, k = 3)
model_fit else if (model == "svm") {
} <- svm(Species ~ ., data = bootstrap_sample, kernel = "radial", cost = 1)
model_fit else {
} <- rpart(Species ~ ., data = bootstrap_sample, method = "class")
model_fit
}
# Evaluate model accuracy on the original test set
if (model == "knn") {
<- as.character(model_fit)
predicted_labels else {
} <- as.character(predict(model_fit, data[-bootstrap_indices, 1:4]))
predicted_labels
}<- sum(predicted_labels == data$Species[-bootstrap_indices]) / nrow(data[-bootstrap_indices, ])
accuracies[i]
}return(accuracies)
}
# Split the dataset into training and testing sets
set.seed(123)
<- sample(1:nrow(iris), 0.7 * nrow(iris))
sample_indices <- iris[sample_indices, ]
train_data <- iris[-sample_indices, ]
test_data
# K-Nearest Neighbors (KNN) model
<- sum(knn(train_data[, 1:4], test_data[, 1:4], train_data$Species, k = 3) == test_data$Species) / nrow(test_data)
knn_accuracy
# Decision Trees model
<- evaluate_model(rpart(Species ~ ., data = train_data, method = "class"), test_data)
decision_tree_accuracy
# Support Vector Machines (SVM) model
<- evaluate_model(svm(Species ~ ., data = train_data, kernel = "radial", cost = 1), test_data)
svm_accuracy
# Number of bootstrap iterations
<- 100
num_bootstrap
# Evaluate models using bootstrapping
<- bootstrapped_accuracy("knn", train_data, num_bootstrap)
knn_bootstrapped_accuracies <- bootstrapped_accuracy("rpart", train_data, num_bootstrap)
decision_tree_bootstrapped_accuracies <- bootstrapped_accuracy("svm", train_data, num_bootstrap) svm_bootstrapped_accuracies
## K-Nearest Neighbors (KNN) Accuracy: 0.9777778
## Decision Trees Accuracy: 0
## Support Vector Machines (SVM) Accuracy: 0.9777778
## Bootstrapped K-Nearest Neighbors (KNN) Mean Accuracy: 0.94177
## Bootstrapped Decision Trees Mean Accuracy: 0
## Bootstrapped Support Vector Machines (SVM) Mean Accuracy: 0.9528486