第 32 章 机器学习

Rstudio工厂的 Max Kuhn 大神正主持机器学习的开发,日臻成熟了,感觉很强大啊。

library(tidyverse)
library(tidymodels)

32.1 数据

penguins <- read_csv("./demo_data/penguins.csv") %>%
  janitor::clean_names() %>% 
  drop_na()

penguins %>%
  head()
penguins %>%
  ggplot(aes(x = bill_length_mm, y = bill_depth_mm, 
             color = species, shape = species)
         ) +
  geom_point()

32.2 机器学习

split <- penguins %>% 
  mutate(species = as_factor(species)) %>% 
  mutate(species = fct_lump(species, 1)) %>% 
  initial_split()

split

training_data <- training(split)
training_data
testing_data <- testing(split)
testing_data

32.3 model01

model_logistic <- parsnip::logistic_reg() %>% 
  set_engine("glm") %>% 
  set_mode("classification") %>% 
  fit(species ~ bill_length_mm + bill_depth_mm, data = training_data)


bind_cols(
  predict(model_logistic, new_data = testing_data, type = "class"),
  predict(model_logistic, new_data = testing_data, type = "prob"),
  testing_data
)


predict(model, new_data = testing_data) %>% 
  bind_cols(testing_data) %>% 
  count(.pred_class, species)

32.4 model02

model_neighbor <- parsnip::nearest_neighbor(neighbors = 10) %>% 
  set_engine("kknn") %>% 
  set_mode("classification") %>% 
  fit(species ~ bill_length_mm, data = training_data)

predict(model_neighbor, new_data = testing_data) %>% 
  bind_cols(testing_data) %>% 
  count(.pred_class, species)

32.5 model03

model_multinom <- parsnip::multinom_reg() %>% 
  set_engine("nnet") %>% 
  set_mode("classification") %>% 
  fit(species ~ bill_length_mm, data = training_data)

predict(model_multinom, new_data = testing_data) %>% 
  bind_cols(testing_data) %>% 
  count(.pred_class, species)

32.6 model04

model_decision <- parsnip::decision_tree() %>% 
  set_engine("rpart") %>% 
  set_mode("classification") %>% 
  fit(species ~ bill_length_mm, data = training_data)

predict(model_decision, new_data = testing_data) %>% 
  bind_cols(testing_data) %>% 
  count(.pred_class, species)