Chapter 37 purrr Package
What You’ll Learn:
- map() family functions
- Type-safe iteration
- Error handling in iteration
- Advanced patterns
- Advantages over apply family
Key Errors Covered: 15+ purrr errors
Difficulty: ⭐⭐⭐ Advanced
37.2 map() Family
💡 Key Insight: Type-Safe Mapping
library(purrr)
# map() returns list (like lapply)
map(1:3, sqrt)
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 1.414214
#>
#> [[3]]
#> [1] 1.732051
# Type-specific variants
map_dbl(1:3, sqrt) # numeric vector
#> [1] 1.000000 1.414214 1.732051
map_chr(1:3, as.character) # character vector
#> [1] "1" "2" "3"
map_int(1:3, ~ .x) # integer vector
#> [1] 1 2 3
map_lgl(c(TRUE, FALSE, TRUE), ~ .x) # logical vector
#> [1] TRUE FALSE TRUE
# Error if wrong type
# map_dbl(1:3, as.character) # Error!
# map_df for data frames
map_df(1:3, ~ data.frame(x = .x, y = .x^2))
#> x y
#> 1 1 1
#> 2 2 4
#> 3 3 937.3 Formula Syntax
💡 Key Insight: Convenient Formula Syntax
# Three ways to write functions
# 1. Regular function
map(1:3, function(x) x * 2)
#> [[1]]
#> [1] 2
#>
#> [[2]]
#> [1] 4
#>
#> [[3]]
#> [1] 6
# 2. Formula with ~ (one argument: .x or .)
map(1:3, ~ .x * 2)
#> [[1]]
#> [1] 2
#>
#> [[2]]
#> [1] 4
#>
#> [[3]]
#> [1] 6
map(1:3, ~ . * 2)
#> [[1]]
#> [1] 2
#>
#> [[2]]
#> [1] 4
#>
#> [[3]]
#> [1] 6
# 3. Formula with ~ (two arguments: .x and .y)
map2(1:3, 4:6, ~ .x + .y)
#> [[1]]
#> [1] 5
#>
#> [[2]]
#> [1] 7
#>
#> [[3]]
#> [1] 9
# Complex expressions
map(1:3, ~ {
squared <- .x^2
sqrt(squared)
})
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [1] 3
# Accessing list elements
lst <- list(
list(a = 1, b = 2),
list(a = 3, b = 4)
)
map(lst, ~ .x$a)
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 3
# Or simpler:
map(lst, "a")
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 337.4 Error #1: Type Mismatch
⭐⭐ INTERMEDIATE 🔢 TYPE
37.4.1 The Error
# Function returns character, but expecting numeric
map_dbl(1:3, ~ as.character(.x))
#> Error in `map_dbl()`:
#> ℹ In index: 1.
#> Caused by error:
#> ! Can't coerce from a string to a double.🔴 ERROR
Error: Can't coerce element 1 from a character to a double
37.5 map2() and pmap()
💡 Key Insight: Multiple Inputs
# map2() for two inputs
map2_dbl(1:3, 4:6, ~ .x + .y)
#> [1] 5 7 9
map2_chr(c("a", "b", "c"), 1:3, ~ paste(.x, .y))
#> [1] "a 1" "b 2" "c 3"
# pmap() for multiple inputs (list of inputs)
inputs <- list(
x = 1:3,
y = 4:6,
z = 7:9
)
pmap_dbl(inputs, function(x, y, z) x + y + z)
#> [1] 12 15 18
# With formula
pmap_dbl(inputs, ~ ..1 + ..2 + ..3)
#> [1] 12 15 18
# Named arguments
pmap_chr(
list(
name = c("Alice", "Bob"),
age = c(25, 30)
),
~ paste(.x, "is", .y, "years old")
)
#> [1] "Alice is 25 years old" "Bob is 30 years old"37.6 Error Handling
🎯 Best Practice: Safe Iteration
# Data with potential errors
data <- list("1", "2", "not_a_number", "4")
# map() will fail
# map_dbl(data, as.numeric) # Error!
# possibly() returns default on error
safe_numeric <- possibly(as.numeric, otherwise = NA_real_)
map_dbl(data, safe_numeric)
#> Warning in .Primitive("as.double")(x, ...): NAs introduced by coercion
#> [1] 1 2 NA 4
# safely() returns list of result and error
safe_parse <- safely(as.numeric)
map(data, safe_parse)
#> Warning in .Primitive("as.double")(x, ...): NAs introduced by coercion
#> [[1]]
#> [[1]]$result
#> [1] 1
#>
#> [[1]]$error
#> NULL
#>
#>
#> [[2]]
#> [[2]]$result
#> [1] 2
#>
#> [[2]]$error
#> NULL
#>
#>
#> [[3]]
#> [[3]]$result
#> [1] NA
#>
#> [[3]]$error
#> NULL
#>
#>
#> [[4]]
#> [[4]]$result
#> [1] 4
#>
#> [[4]]$error
#> NULL
# quietly() captures messages/warnings
quiet_sqrt <- quietly(sqrt)
map(c(4, -1, 9), quiet_sqrt)
#> [[1]]
#> [[1]]$result
#> [1] 2
#>
#> [[1]]$output
#> [1] ""
#>
#> [[1]]$warnings
#> character(0)
#>
#> [[1]]$messages
#> character(0)
#>
#>
#> [[2]]
#> [[2]]$result
#> [1] NaN
#>
#> [[2]]$output
#> [1] ""
#>
#> [[2]]$warnings
#> [1] "NaNs produced"
#>
#> [[2]]$messages
#> character(0)
#>
#>
#> [[3]]
#> [[3]]$result
#> [1] 3
#>
#> [[3]]$output
#> [1] ""
#>
#> [[3]]$warnings
#> character(0)
#>
#> [[3]]$messages
#> character(0)
# Try each until one works
try_functions <- list(
as.numeric,
function(x) 0
)
map(data, ~ reduce(try_functions, function(val, f) {
if (!is.na(val)) return(val)
try(f(.x), silent = TRUE)
}, .init = NA))
#> Warning in f(.x): NAs introduced by coercion
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [1] 0
#>
#> [[4]]
#> [1] 437.7 Advanced Patterns
🎯 Best Practice: Complex Patterns
# Keep/discard based on condition
numbers <- list(1, "a", 3, "b", 5)
keep(numbers, is.numeric)
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 3
#>
#> [[3]]
#> [1] 5
discard(numbers, is.character)
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 3
#>
#> [[3]]
#> [1] 5
# Detect if any/all meet condition
some(numbers, is.numeric)
#> Error in min(n, len): invalid 'type' (builtin) of argument
every(numbers, is.numeric)
#> [1] FALSE
# Find position
detect(numbers, is.character)
#> [1] "a"
detect_index(numbers, is.character)
#> [1] 2
# Reduce (fold)
reduce(1:5, `+`)
#> [1] 15
reduce(1:5, `*`)
#> [1] 120
# Accumulate (show intermediate steps)
accumulate(1:5, `+`)
#> [1] 1 3 6 10 15
# Modify elements
modify(list(1, 2, 3), ~ .x * 2)
#> [[1]]
#> [1] 2
#>
#> [[2]]
#> [1] 4
#>
#> [[3]]
#> [1] 6
modify_if(list(1, "a", 3), is.numeric, ~ .x * 2)
#> [[1]]
#> [1] 2
#>
#> [[2]]
#> [1] "a"
#>
#> [[3]]
#> [1] 6
modify_at(list(a = 1, b = 2, c = 3), "b", ~ .x * 10)
#> $a
#> [1] 1
#>
#> $b
#> [1] 20
#>
#> $c
#> [1] 337.8 Nested Data
💡 Key Insight: Working with Nested Lists
# Nested list
nested <- list(
person1 = list(name = "Alice", age = 25, scores = c(90, 85, 92)),
person2 = list(name = "Bob", age = 30, scores = c(88, 92, 87)),
person3 = list(name = "Charlie", age = 35, scores = c(95, 89, 91))
)
# Extract single element
map(nested, "name")
#> $person1
#> [1] "Alice"
#>
#> $person2
#> [1] "Bob"
#>
#> $person3
#> [1] "Charlie"
map(nested, "age")
#> $person1
#> [1] 25
#>
#> $person2
#> [1] 30
#>
#> $person3
#> [1] 35
# Extract nested element
map(nested, list("scores", 1)) # First score
#> $person1
#> [1] 90
#>
#> $person2
#> [1] 88
#>
#> $person3
#> [1] 95
# Complex extraction
map(nested, ~ mean(.x$scores))
#> $person1
#> [1] 89
#>
#> $person2
#> [1] 89
#>
#> $person3
#> [1] 91.66667
# Modify nested structure
map(nested, ~ {
.x$avg_score <- mean(.x$scores)
.x
})
#> $person1
#> $person1$name
#> [1] "Alice"
#>
#> $person1$age
#> [1] 25
#>
#> $person1$scores
#> [1] 90 85 92
#>
#> $person1$avg_score
#> [1] 89
#>
#>
#> $person2
#> $person2$name
#> [1] "Bob"
#>
#> $person2$age
#> [1] 30
#>
#> $person2$scores
#> [1] 88 92 87
#>
#> $person2$avg_score
#> [1] 89
#>
#>
#> $person3
#> $person3$name
#> [1] "Charlie"
#>
#> $person3$age
#> [1] 35
#>
#> $person3$scores
#> [1] 95 89 91
#>
#> $person3$avg_score
#> [1] 91.66667
# Flatten nested lists
nested_scores <- map(nested, "scores")
flatten_dbl(nested_scores)
#> Warning: Outer names are only allowed for unnamed scalar atomic inputs
#> [1] 90 85 92 88 92 87 95 89 9137.9 Comparison with Base R
🎯 Best Practice: purrr vs Base R
# Get means of each column
df <- mtcars[, 1:3]
# Base R
lapply(df, mean)
#> $mpg
#> [1] 20.09062
#>
#> $cyl
#> [1] 6.1875
#>
#> $disp
#> [1] 230.7219
sapply(df, mean)
#> mpg cyl disp
#> 20.09062 6.18750 230.72188
# purrr (type-safe)
map(df, mean)
#> $mpg
#> [1] 20.09062
#>
#> $cyl
#> [1] 6.1875
#>
#> $disp
#> [1] 230.7219
map_dbl(df, mean)
#> mpg cyl disp
#> 20.09062 6.18750 230.72188
# Multiple inputs
# Base R
mapply(function(x, y) x + y, 1:3, 4:6)
#> [1] 5 7 9
# purrr (cleaner)
map2_dbl(1:3, 4:6, ~ .x + .y)
#> [1] 5 7 9
# Error handling
# Base R
tryCatch(as.numeric("a"), error = function(e) NA)
#> Warning in doTryCatch(return(expr), name, parentenv, handler): NAs introduced
#> by coercion
#> [1] NA
# purrr (composable)
possibly(as.numeric, NA)("a")
#> Warning in .f(...): NAs introduced by coercion
#> [1] NA
# Advantages of purrr:
# - Type safety
# - Consistent interface
# - Better error handling
# - Cleaner syntax
# - Composable functions37.10 Real-World Examples
🎯 Best Practice: Practical Uses
library(dplyr)
# 1. Read multiple files
# files <- list.files(pattern = "\\.csv$")
# data <- map_df(files, read_csv, .id = "file")
# 2. Fit multiple models
models <- mtcars %>%
split(.$cyl) %>%
map(~ lm(mpg ~ hp, data = .))
# Get R-squared from each
map_dbl(models, ~ summary(.)$r.squared)
#> 4 6 8
#> 0.27405583 0.01614624 0.08044919
# 3. Extract nested information
results <- list(
model1 = list(coef = c(1.5, 2.3), r2 = 0.85),
model2 = list(coef = c(1.8, 2.1), r2 = 0.90),
model3 = list(coef = c(1.2, 2.5), r2 = 0.78)
)
map_dbl(results, "r2")
#> model1 model2 model3
#> 0.85 0.90 0.78
map(results, "coef")
#> $model1
#> [1] 1.5 2.3
#>
#> $model2
#> [1] 1.8 2.1
#>
#> $model3
#> [1] 1.2 2.5
# 4. Validate data
data_list <- list(
df1 = data.frame(x = 1:3, y = 4:6),
df2 = data.frame(x = 1:3), # Missing y
df3 = data.frame(x = 1:3, y = 4:6)
)
# Check all have 'y' column
map_lgl(data_list, ~ "y" %in% names(.))
#> df1 df2 df3
#> TRUE FALSE TRUE
# Filter to valid ones
valid_data <- keep(data_list, ~ "y" %in% names(.))
length(valid_data)
#> [1] 237.11 Summary
Key Takeaways:
- Type-safe mapping - Use map_dbl(), map_chr(), etc.
- Formula syntax - Clean with ~ .x
- Error handling - possibly(), safely(), quietly()
- Multiple inputs - map2(), pmap()
- Powerful helpers - keep(), discard(), reduce()
- Nested data - Easy extraction with list indexing
- Consistent interface - All functions work similarly
Quick Reference:
| Function | Purpose | Returns |
|---|---|---|
| map() | Apply to each | List |
| map_dbl() | Apply to each | Numeric vector |
| map_chr() | Apply to each | Character vector |
| map_int() | Apply to each | Integer vector |
| map_lgl() | Apply to each | Logical vector |
| map2() | Two inputs | List/vector |
| pmap() | Multiple inputs | List/vector |
| walk() | Side effects only | Input invisibly |
Common Patterns:
# Basic mapping
map(list, function)
map_dbl(list, ~ .x * 2)
# Multiple inputs
map2(x, y, ~ .x + .y)
pmap(list(x, y, z), function(x, y, z) x + y + z)
# Error handling
possibly(function, otherwise = NA)
safely(function)
# Filtering
keep(list, is.numeric)
discard(list, is.na)
# Detection
some(list, is.numeric)
every(list, is.numeric)
# Reduction
reduce(list, `+`)
accumulate(list, `+`)
# Extraction
map(nested_list, "element_name")
map(nested_list, list("level1", "level2"))Best Practices: