William’s Portfolio
2023-12-31
Chapter 1 Model Analysis
# Load necessary libraries
library(class)
library(rpart)
library(e1071)
library(caret)## Loading required package: ggplot2
## Loading required package: lattice
# Load the Iris dataset
data(iris)
# Function to evaluate model accuracy
evaluate_model <- function(model, test_data) {
predicted_labels <- as.character(predict(model, test_data[, 1:4]))
accuracy <- sum(predicted_labels == test_data$Species) / nrow(test_data)
return(accuracy)
}
# Function for bootstrapping
bootstrapped_accuracy <- function(model, data, num_bootstrap) {
accuracies <- numeric(num_bootstrap)
for (i in 1:num_bootstrap) {
# Create a bootstrap sample
bootstrap_indices <- sample(nrow(data), replace = TRUE)
bootstrap_sample <- data[bootstrap_indices, ]
# Train the model on the bootstrap sample
if (model == "knn") {
model_fit <- knn(train = bootstrap_sample[, 1:4], test = data[-bootstrap_indices, 1:4], cl = bootstrap_sample$Species, k = 3)
} else if (model == "svm") {
model_fit <- svm(Species ~ ., data = bootstrap_sample, kernel = "radial", cost = 1)
} else {
model_fit <- rpart(Species ~ ., data = bootstrap_sample, method = "class")
}
# Evaluate model accuracy on the original test set
if (model == "knn") {
predicted_labels <- as.character(model_fit)
} else {
predicted_labels <- as.character(predict(model_fit, data[-bootstrap_indices, 1:4]))
}
accuracies[i] <- sum(predicted_labels == data$Species[-bootstrap_indices]) / nrow(data[-bootstrap_indices, ])
}
return(accuracies)
}
# Split the dataset into training and testing sets
set.seed(123)
sample_indices <- sample(1:nrow(iris), 0.7 * nrow(iris))
train_data <- iris[sample_indices, ]
test_data <- iris[-sample_indices, ]
# K-Nearest Neighbors (KNN) model
knn_accuracy <- sum(knn(train_data[, 1:4], test_data[, 1:4], train_data$Species, k = 3) == test_data$Species) / nrow(test_data)
# Decision Trees model
decision_tree_accuracy <- evaluate_model(rpart(Species ~ ., data = train_data, method = "class"), test_data)
# Support Vector Machines (SVM) model
svm_accuracy <- evaluate_model(svm(Species ~ ., data = train_data, kernel = "radial", cost = 1), test_data)
# Number of bootstrap iterations
num_bootstrap <- 100
# Evaluate models using bootstrapping
knn_bootstrapped_accuracies <- bootstrapped_accuracy("knn", train_data, num_bootstrap)
decision_tree_bootstrapped_accuracies <- bootstrapped_accuracy("rpart", train_data, num_bootstrap)
svm_bootstrapped_accuracies <- bootstrapped_accuracy("svm", train_data, num_bootstrap)## K-Nearest Neighbors (KNN) Accuracy: 0.9777778
## Decision Trees Accuracy: 0
## Support Vector Machines (SVM) Accuracy: 0.9777778
## Bootstrapped K-Nearest Neighbors (KNN) Mean Accuracy: 0.94177
## Bootstrapped Decision Trees Mean Accuracy: 0
## Bootstrapped Support Vector Machines (SVM) Mean Accuracy: 0.9528486