Chapter 37 purrr Package

What You’ll Learn:

  • map() family functions
  • Type-safe iteration
  • Error handling in iteration
  • Advanced patterns
  • Advantages over apply family

Key Errors Covered: 15+ purrr errors

Difficulty: ⭐⭐⭐ Advanced

37.1 Introduction

purrr provides modern, type-safe iteration:

library(purrr)

# map() instead of lapply()
map(1:3, ~ . * 2)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 4
#> 
#> [[3]]
#> [1] 6

# Type-safe variants
map_dbl(1:3, ~ . * 2)
#> [1] 2 4 6

37.2 map() Family

💡 Key Insight: Type-Safe Mapping

library(purrr)

# map() returns list (like lapply)
map(1:3, sqrt)
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 1.414214
#> 
#> [[3]]
#> [1] 1.732051

# Type-specific variants
map_dbl(1:3, sqrt)      # numeric vector
#> [1] 1.000000 1.414214 1.732051
map_chr(1:3, as.character)  # character vector
#> [1] "1" "2" "3"
map_int(1:3, ~ .x)      # integer vector
#> [1] 1 2 3
map_lgl(c(TRUE, FALSE, TRUE), ~ .x)  # logical vector
#> [1]  TRUE FALSE  TRUE

# Error if wrong type
# map_dbl(1:3, as.character)  # Error!

# map_df for data frames
map_df(1:3, ~ data.frame(x = .x, y = .x^2))
#>   x y
#> 1 1 1
#> 2 2 4
#> 3 3 9

37.3 Formula Syntax

💡 Key Insight: Convenient Formula Syntax

# Three ways to write functions

# 1. Regular function
map(1:3, function(x) x * 2)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 4
#> 
#> [[3]]
#> [1] 6

# 2. Formula with ~ (one argument: .x or .)
map(1:3, ~ .x * 2)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 4
#> 
#> [[3]]
#> [1] 6
map(1:3, ~ . * 2)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 4
#> 
#> [[3]]
#> [1] 6

# 3. Formula with ~ (two arguments: .x and .y)
map2(1:3, 4:6, ~ .x + .y)
#> [[1]]
#> [1] 5
#> 
#> [[2]]
#> [1] 7
#> 
#> [[3]]
#> [1] 9

# Complex expressions
map(1:3, ~ {
  squared <- .x^2
  sqrt(squared)
})
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] 3

# Accessing list elements
lst <- list(
  list(a = 1, b = 2),
  list(a = 3, b = 4)
)

map(lst, ~ .x$a)
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 3
# Or simpler:
map(lst, "a")
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 3

37.4 Error #1: Type Mismatch

⭐⭐ INTERMEDIATE 🔢 TYPE

37.4.1 The Error

# Function returns character, but expecting numeric
map_dbl(1:3, ~ as.character(.x))
#> Error in `map_dbl()`:
#> ℹ In index: 1.
#> Caused by error:
#> ! Can't coerce from a string to a double.

🔴 ERROR

Error: Can't coerce element 1 from a character to a double

37.4.2 Solutions

SOLUTION: Use Correct Type

# Use map_chr for character output
map_chr(1:3, ~ as.character(.x))
#> [1] "1" "2" "3"

# Or use map() and get list
map(1:3, ~ as.character(.x))
#> [[1]]
#> [1] "1"
#> 
#> [[2]]
#> [1] "2"
#> 
#> [[3]]
#> [1] "3"

# Convert inside function
map_dbl(1:3, ~ as.numeric(as.character(.x)))
#> [1] 1 2 3

37.5 map2() and pmap()

💡 Key Insight: Multiple Inputs

# map2() for two inputs
map2_dbl(1:3, 4:6, ~ .x + .y)
#> [1] 5 7 9

map2_chr(c("a", "b", "c"), 1:3, ~ paste(.x, .y))
#> [1] "a 1" "b 2" "c 3"

# pmap() for multiple inputs (list of inputs)
inputs <- list(
  x = 1:3,
  y = 4:6,
  z = 7:9
)

pmap_dbl(inputs, function(x, y, z) x + y + z)
#> [1] 12 15 18

# With formula
pmap_dbl(inputs, ~ ..1 + ..2 + ..3)
#> [1] 12 15 18

# Named arguments
pmap_chr(
  list(
    name = c("Alice", "Bob"),
    age = c(25, 30)
  ),
  ~ paste(.x, "is", .y, "years old")
)
#> [1] "Alice is 25 years old" "Bob is 30 years old"

37.6 Error Handling

🎯 Best Practice: Safe Iteration

# Data with potential errors
data <- list("1", "2", "not_a_number", "4")

# map() will fail
# map_dbl(data, as.numeric)  # Error!

# possibly() returns default on error
safe_numeric <- possibly(as.numeric, otherwise = NA_real_)
map_dbl(data, safe_numeric)
#> Warning in .Primitive("as.double")(x, ...): NAs introduced by coercion
#> [1]  1  2 NA  4

# safely() returns list of result and error
safe_parse <- safely(as.numeric)
map(data, safe_parse)
#> Warning in .Primitive("as.double")(x, ...): NAs introduced by coercion
#> [[1]]
#> [[1]]$result
#> [1] 1
#> 
#> [[1]]$error
#> NULL
#> 
#> 
#> [[2]]
#> [[2]]$result
#> [1] 2
#> 
#> [[2]]$error
#> NULL
#> 
#> 
#> [[3]]
#> [[3]]$result
#> [1] NA
#> 
#> [[3]]$error
#> NULL
#> 
#> 
#> [[4]]
#> [[4]]$result
#> [1] 4
#> 
#> [[4]]$error
#> NULL

# quietly() captures messages/warnings
quiet_sqrt <- quietly(sqrt)
map(c(4, -1, 9), quiet_sqrt)
#> [[1]]
#> [[1]]$result
#> [1] 2
#> 
#> [[1]]$output
#> [1] ""
#> 
#> [[1]]$warnings
#> character(0)
#> 
#> [[1]]$messages
#> character(0)
#> 
#> 
#> [[2]]
#> [[2]]$result
#> [1] NaN
#> 
#> [[2]]$output
#> [1] ""
#> 
#> [[2]]$warnings
#> [1] "NaNs produced"
#> 
#> [[2]]$messages
#> character(0)
#> 
#> 
#> [[3]]
#> [[3]]$result
#> [1] 3
#> 
#> [[3]]$output
#> [1] ""
#> 
#> [[3]]$warnings
#> character(0)
#> 
#> [[3]]$messages
#> character(0)

# Try each until one works
try_functions <- list(
  as.numeric,
  function(x) 0
)

map(data, ~ reduce(try_functions, function(val, f) {
  if (!is.na(val)) return(val)
  try(f(.x), silent = TRUE)
}, .init = NA))
#> Warning in f(.x): NAs introduced by coercion
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] 0
#> 
#> [[4]]
#> [1] 4

37.7 Advanced Patterns

🎯 Best Practice: Complex Patterns

# Keep/discard based on condition
numbers <- list(1, "a", 3, "b", 5)

keep(numbers, is.numeric)
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] 5
discard(numbers, is.character)
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] 5

# Detect if any/all meet condition
some(numbers, is.numeric)
#> Error in min(n, len): invalid 'type' (builtin) of argument
every(numbers, is.numeric)
#> [1] FALSE

# Find position
detect(numbers, is.character)
#> [1] "a"
detect_index(numbers, is.character)
#> [1] 2

# Reduce (fold)
reduce(1:5, `+`)
#> [1] 15
reduce(1:5, `*`)
#> [1] 120

# Accumulate (show intermediate steps)
accumulate(1:5, `+`)
#> [1]  1  3  6 10 15

# Modify elements
modify(list(1, 2, 3), ~ .x * 2)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 4
#> 
#> [[3]]
#> [1] 6
modify_if(list(1, "a", 3), is.numeric, ~ .x * 2)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] "a"
#> 
#> [[3]]
#> [1] 6
modify_at(list(a = 1, b = 2, c = 3), "b", ~ .x * 10)
#> $a
#> [1] 1
#> 
#> $b
#> [1] 20
#> 
#> $c
#> [1] 3

37.8 Nested Data

💡 Key Insight: Working with Nested Lists

# Nested list
nested <- list(
  person1 = list(name = "Alice", age = 25, scores = c(90, 85, 92)),
  person2 = list(name = "Bob", age = 30, scores = c(88, 92, 87)),
  person3 = list(name = "Charlie", age = 35, scores = c(95, 89, 91))
)

# Extract single element
map(nested, "name")
#> $person1
#> [1] "Alice"
#> 
#> $person2
#> [1] "Bob"
#> 
#> $person3
#> [1] "Charlie"
map(nested, "age")
#> $person1
#> [1] 25
#> 
#> $person2
#> [1] 30
#> 
#> $person3
#> [1] 35

# Extract nested element
map(nested, list("scores", 1))  # First score
#> $person1
#> [1] 90
#> 
#> $person2
#> [1] 88
#> 
#> $person3
#> [1] 95

# Complex extraction
map(nested, ~ mean(.x$scores))
#> $person1
#> [1] 89
#> 
#> $person2
#> [1] 89
#> 
#> $person3
#> [1] 91.66667

# Modify nested structure
map(nested, ~ {
  .x$avg_score <- mean(.x$scores)
  .x
})
#> $person1
#> $person1$name
#> [1] "Alice"
#> 
#> $person1$age
#> [1] 25
#> 
#> $person1$scores
#> [1] 90 85 92
#> 
#> $person1$avg_score
#> [1] 89
#> 
#> 
#> $person2
#> $person2$name
#> [1] "Bob"
#> 
#> $person2$age
#> [1] 30
#> 
#> $person2$scores
#> [1] 88 92 87
#> 
#> $person2$avg_score
#> [1] 89
#> 
#> 
#> $person3
#> $person3$name
#> [1] "Charlie"
#> 
#> $person3$age
#> [1] 35
#> 
#> $person3$scores
#> [1] 95 89 91
#> 
#> $person3$avg_score
#> [1] 91.66667

# Flatten nested lists
nested_scores <- map(nested, "scores")
flatten_dbl(nested_scores)
#> Warning: Outer names are only allowed for unnamed scalar atomic inputs
#> [1] 90 85 92 88 92 87 95 89 91

37.9 Comparison with Base R

🎯 Best Practice: purrr vs Base R

# Get means of each column
df <- mtcars[, 1:3]

# Base R
lapply(df, mean)
#> $mpg
#> [1] 20.09062
#> 
#> $cyl
#> [1] 6.1875
#> 
#> $disp
#> [1] 230.7219
sapply(df, mean)
#>       mpg       cyl      disp 
#>  20.09062   6.18750 230.72188

# purrr (type-safe)
map(df, mean)
#> $mpg
#> [1] 20.09062
#> 
#> $cyl
#> [1] 6.1875
#> 
#> $disp
#> [1] 230.7219
map_dbl(df, mean)
#>       mpg       cyl      disp 
#>  20.09062   6.18750 230.72188

# Multiple inputs
# Base R
mapply(function(x, y) x + y, 1:3, 4:6)
#> [1] 5 7 9

# purrr (cleaner)
map2_dbl(1:3, 4:6, ~ .x + .y)
#> [1] 5 7 9

# Error handling
# Base R
tryCatch(as.numeric("a"), error = function(e) NA)
#> Warning in doTryCatch(return(expr), name, parentenv, handler): NAs introduced
#> by coercion
#> [1] NA

# purrr (composable)
possibly(as.numeric, NA)("a")
#> Warning in .f(...): NAs introduced by coercion
#> [1] NA

# Advantages of purrr:
# - Type safety
# - Consistent interface
# - Better error handling
# - Cleaner syntax
# - Composable functions

37.10 Real-World Examples

🎯 Best Practice: Practical Uses

library(dplyr)

# 1. Read multiple files
# files <- list.files(pattern = "\\.csv$")
# data <- map_df(files, read_csv, .id = "file")

# 2. Fit multiple models
models <- mtcars %>%
  split(.$cyl) %>%
  map(~ lm(mpg ~ hp, data = .))

# Get R-squared from each
map_dbl(models, ~ summary(.)$r.squared)
#>          4          6          8 
#> 0.27405583 0.01614624 0.08044919

# 3. Extract nested information
results <- list(
  model1 = list(coef = c(1.5, 2.3), r2 = 0.85),
  model2 = list(coef = c(1.8, 2.1), r2 = 0.90),
  model3 = list(coef = c(1.2, 2.5), r2 = 0.78)
)

map_dbl(results, "r2")
#> model1 model2 model3 
#>   0.85   0.90   0.78
map(results, "coef")
#> $model1
#> [1] 1.5 2.3
#> 
#> $model2
#> [1] 1.8 2.1
#> 
#> $model3
#> [1] 1.2 2.5

# 4. Validate data
data_list <- list(
  df1 = data.frame(x = 1:3, y = 4:6),
  df2 = data.frame(x = 1:3),  # Missing y
  df3 = data.frame(x = 1:3, y = 4:6)
)

# Check all have 'y' column
map_lgl(data_list, ~ "y" %in% names(.))
#>   df1   df2   df3 
#>  TRUE FALSE  TRUE

# Filter to valid ones
valid_data <- keep(data_list, ~ "y" %in% names(.))
length(valid_data)
#> [1] 2

37.11 Summary

Key Takeaways:

  1. Type-safe mapping - Use map_dbl(), map_chr(), etc.
  2. Formula syntax - Clean with ~ .x
  3. Error handling - possibly(), safely(), quietly()
  4. Multiple inputs - map2(), pmap()
  5. Powerful helpers - keep(), discard(), reduce()
  6. Nested data - Easy extraction with list indexing
  7. Consistent interface - All functions work similarly

Quick Reference:

Function Purpose Returns
map() Apply to each List
map_dbl() Apply to each Numeric vector
map_chr() Apply to each Character vector
map_int() Apply to each Integer vector
map_lgl() Apply to each Logical vector
map2() Two inputs List/vector
pmap() Multiple inputs List/vector
walk() Side effects only Input invisibly

Common Patterns:

# Basic mapping
map(list, function)
map_dbl(list, ~ .x * 2)

# Multiple inputs
map2(x, y, ~ .x + .y)
pmap(list(x, y, z), function(x, y, z) x + y + z)

# Error handling
possibly(function, otherwise = NA)
safely(function)

# Filtering
keep(list, is.numeric)
discard(list, is.na)

# Detection
some(list, is.numeric)
every(list, is.numeric)

# Reduction
reduce(list, `+`)
accumulate(list, `+`)

# Extraction
map(nested_list, "element_name")
map(nested_list, list("level1", "level2"))

Best Practices:

# ✅ Good
Use type-specific map variants (map_dbl, etc.)
Use formula syntax for clarity
Handle errors with possibly/safely
Use descriptive function names

# ❌ Avoid
Using map() when type matters
Ignoring potential errors
Complex nested anonymous functions