Chapter 36 apply Family & Iteration
What You’ll Learn:
- apply, lapply, sapply, mapply
- Common iteration errors
- When to use each function
- Performance considerations
- Alternative approaches
Key Errors Covered: 20+ iteration errors
Difficulty: ⭐⭐⭐ Advanced
36.1 Introduction
R’s apply family provides vectorized operations:
# Instead of loops
result <- numeric(nrow(mtcars))
for (i in 1:nrow(mtcars)) {
result[i] <- mean(as.numeric(mtcars[i, ]))
}
# Use apply
result <- apply(mtcars, 1, mean)
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
head(result)
#> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
#> NA NA NA NA
#> Hornet Sportabout Valiant
#> NA NALet’s master the apply family!
36.2 apply() - Arrays and Matrices
💡 Key Insight: apply() for Matrices
# Create matrix
mat <- matrix(1:12, nrow = 3, ncol = 4)
mat
#> [,1] [,2] [,3] [,4]
#> [1,] 1 4 7 10
#> [2,] 2 5 8 11
#> [3,] 3 6 9 12
# Apply to rows (MARGIN = 1)
row_sums <- apply(mat, 1, sum)
row_sums
#> [1] 22 26 30
# Apply to columns (MARGIN = 2)
col_means <- apply(mat, 2, mean)
col_means
#> [1] 2 5 8 11
# Custom function
apply(mat, 1, function(x) max(x) - min(x))
#> [1] 9 9 9
# With additional arguments
apply(mat, 2, sum, na.rm = TRUE)
#> [1] 6 15 24 33
# Both dimensions
apply(mat, c(1, 2), sqrt)
#> [,1] [,2] [,3] [,4]
#> [1,] 1.000000 2.000000 2.645751 3.162278
#> [2,] 1.414214 2.236068 2.828427 3.316625
#> [3,] 1.732051 2.449490 3.000000 3.46410236.3 Error #1: dim(X) must have positive length
⭐ BEGINNER 🔢 TYPE
36.3.1 The Error
# Try apply on vector
vec <- 1:10
apply(vec, 1, sum)
#> Error in apply(vec, 1, sum): dim(X) must have a positive length🔴 ERROR
Error in apply(vec, 1, sum) : dim(X) must have a positive length
36.3.3 Solutions
✅ SOLUTION 1: Use lapply() or sapply()
# For vectors, use lapply/sapply
vec <- 1:10
# Wrong: apply(vec, 1, sqrt)
# Right: use sapply
sapply(vec, sqrt)
#> [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427
#> [9] 3.000000 3.162278
# Or vectorized operation
sqrt(vec)
#> [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427
#> [9] 3.000000 3.16227836.4 lapply() - Lists
💡 Key Insight: lapply() Always Returns List
# Create list
my_list <- list(a = 1:5, b = 6:10, c = 11:15)
# Apply function to each element
result <- lapply(my_list, mean)
result
#> $a
#> [1] 3
#>
#> $b
#> [1] 8
#>
#> $c
#> [1] 13
# With custom function
lapply(my_list, function(x) sum(x^2))
#> $a
#> [1] 55
#>
#> $b
#> [1] 330
#>
#> $c
#> [1] 855
# On data frame (df is a list of columns)
lapply(mtcars[, 1:3], mean)
#> $mpg
#> [1] 20.09062
#>
#> $cyl
#> [1] 6.1875
#>
#> $disp
#> [1] 230.7219
# Extract elements
lapply(my_list, `[`, 1:2)
#> $a
#> [1] 1 2
#>
#> $b
#> [1] 6 7
#>
#> $c
#> [1] 11 12
# With multiple arguments
lapply(my_list, sum, na.rm = TRUE)
#> $a
#> [1] 15
#>
#> $b
#> [1] 40
#>
#> $c
#> [1] 6536.5 sapply() - Simplified
💡 Key Insight: sapply() Simplifies Output
my_list <- list(a = 1:5, b = 6:10, c = 11:15)
# lapply returns list
lapply(my_list, mean)
#> $a
#> [1] 3
#>
#> $b
#> [1] 8
#>
#> $c
#> [1] 13
# sapply returns vector
sapply(my_list, mean)
#> a b c
#> 3 8 13
# sapply with matrix output
sapply(my_list, function(x) c(mean = mean(x), sd = sd(x)))
#> a b c
#> mean 3.000000 8.000000 13.000000
#> sd 1.581139 1.581139 1.581139
# When can't simplify, returns list
sapply(my_list, summary) # Returns list
#> a b c
#> Min. 1 6 11
#> 1st Qu. 2 7 12
#> Median 3 8 13
#> Mean 3 8 13
#> 3rd Qu. 4 9 14
#> Max. 5 10 1536.6 Error #2: Unexpected Output Type
⭐⭐ INTERMEDIATE 🔢 TYPE
36.6.1 The Problem
# sapply behavior depends on output
result1 <- sapply(1:3, function(x) x)
result1
#> [1] 1 2 3
class(result1) # vector
#> [1] "integer"
result2 <- sapply(1:3, function(x) c(x, x^2))
result2
#> [,1] [,2] [,3]
#> [1,] 1 2 3
#> [2,] 1 4 9
class(result2) # matrix
#> [1] "matrix" "array"
result3 <- sapply(1:3, function(x) list(x, x^2))
result3
#> [,1] [,2] [,3]
#> [1,] 1 2 3
#> [2,] 1 4 9
class(result3) # list
#> [1] "matrix" "array"36.6.2 Solutions
✅ SOLUTION 1: Use vapply() for Type Safety
# Specify output type
result <- vapply(1:3, function(x) x * 2, FUN.VALUE = numeric(1))
result
#> [1] 2 4 6
# Will error if output doesn't match
# vapply(1:3, function(x) c(x, x^2), FUN.VALUE = numeric(1))
# For multiple values
result <- vapply(1:3, function(x) c(x, x^2), FUN.VALUE = numeric(2))
result
#> [,1] [,2] [,3]
#> [1,] 1 2 3
#> [2,] 1 4 936.7 mapply() - Multiple Arguments
💡 Key Insight: mapply() for Parallel Iteration
# Apply function to multiple vectors in parallel
mapply(function(x, y) x + y,
x = 1:5,
y = 6:10)
#> [1] 7 9 11 13 15
# Multiple arguments
mapply(rep,
x = 1:4,
times = 4:1)
#> [[1]]
#> [1] 1 1 1 1
#>
#> [[2]]
#> [1] 2 2 2
#>
#> [[3]]
#> [1] 3 3
#>
#> [[4]]
#> [1] 4
# With data frames
df1 <- data.frame(a = 1:3, b = 4:6)
df2 <- data.frame(c = 7:9, d = 10:12)
mapply(function(x, y) x + y, df1$a, df2$c)
#> [1] 8 10 12
# MoreArgs for constant arguments
mapply(function(x, y, z) x + y + z,
x = 1:3,
y = 4:6,
MoreArgs = list(z = 10))
#> [1] 15 17 1936.8 tapply() - Grouped Apply
💡 Key Insight: tapply() for Split-Apply-Combine
# Apply function by groups
tapply(mtcars$mpg, mtcars$cyl, mean)
#> 4 6 8
#> 26.66364 19.74286 15.10000
# Multiple grouping variables
tapply(mtcars$mpg,
list(Cyl = mtcars$cyl, Gear = mtcars$gear),
mean)
#> Gear
#> Cyl 3 4 5
#> 4 21.50 26.925 28.2
#> 6 19.75 19.750 19.7
#> 8 15.05 NA 15.4
# With custom function
tapply(mtcars$mpg, mtcars$cyl,
function(x) c(mean = mean(x), sd = sd(x)))
#> $`4`
#> mean sd
#> 26.663636 4.509828
#>
#> $`6`
#> mean sd
#> 19.742857 1.453567
#>
#> $`8`
#> mean sd
#> 15.100000 2.560048
# Like dplyr group_by + summarize
library(dplyr)
mtcars %>%
group_by(cyl) %>%
summarize(mean_mpg = mean(mpg))
#> # A tibble: 3 × 2
#> cyl mean_mpg
#> <dbl> <dbl>
#> 1 4 26.7
#> 2 6 19.7
#> 3 8 15.136.9 Common Patterns
🎯 Best Practice: Choose Right Function
# Pattern 1: Apply to each column of data frame
lapply(mtcars[, 1:3], mean)
#> $mpg
#> [1] 20.09062
#>
#> $cyl
#> [1] 6.1875
#>
#> $disp
#> [1] 230.7219
sapply(mtcars[, 1:3], mean)
#> mpg cyl disp
#> 20.09062 6.18750 230.72188
# Pattern 2: Apply to each row
apply(mtcars[, 1:3], 1, sum)
#> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
#> 187.0 187.0 134.8 285.4
#> Hornet Sportabout Valiant Duster 360 Merc 240D
#> 386.7 249.1 382.3 175.1
#> Merc 230 Merc 280 Merc 280C Merc 450SE
#> 167.6 192.8 191.4 300.2
#> Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
#> 301.1 299.0 490.4 478.4
#> Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
#> 462.7 115.1 110.1 109.0
#> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
#> 145.6 341.5 327.2 371.3
#> Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
#> 427.2 110.3 150.3 129.5
#> Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
#> 374.8 170.7 324.0 146.4
# Pattern 3: Apply with multiple inputs
mapply(function(x, y) x / y,
x = mtcars$hp,
y = mtcars$wt)
#> [1] 41.98473 38.26087 40.08621 34.21462 50.87209 30.34682 68.62745 19.43574
#> [9] 30.15873 35.75581 35.75581 44.22604 48.25737 47.61905 39.04762 39.63864
#> [17] 43.03087 30.00000 32.19814 35.42234 39.35091 42.61364 43.66812 63.80208
#> [25] 45.51365 34.10853 42.52336 74.68605 83.28076 63.17690 93.83754 39.20863
# Pattern 4: Apply by groups
tapply(mtcars$mpg, mtcars$cyl, mean)
#> 4 6 8
#> 26.66364 19.74286 15.10000
# Pattern 5: Nested lists
nested <- list(
a = list(x = 1:3, y = 4:6),
b = list(x = 7:9, y = 10:12)
)
# Get all 'x' elements
lapply(nested, `[[`, "x")
#> $a
#> [1] 1 2 3
#>
#> $b
#> [1] 7 8 9
# Apply to nested structure
lapply(nested, function(sublist) {
lapply(sublist, mean)
})
#> $a
#> $a$x
#> [1] 2
#>
#> $a$y
#> [1] 5
#>
#>
#> $b
#> $b$x
#> [1] 8
#>
#> $b$y
#> [1] 1136.10 Performance Considerations
🎯 Best Practice: Vectorize When Possible
# Compare performance
n <- 10000
# Loop (slow)
system.time({
result <- numeric(n)
for (i in 1:n) {
result[i] <- sqrt(i)
}
})
#> user system elapsed
#> 0.008 0.001 0.005
# sapply (better)
system.time({
result <- sapply(1:n, sqrt)
})
#> user system elapsed
#> 0.003 0.000 0.003
# Vectorized (best)
system.time({
result <- sqrt(1:n)
})
#> user system elapsed
#> 0 0 0
# When apply family is appropriate
df <- data.frame(matrix(rnorm(1000), ncol = 10))
# Row-wise operations (apply is good)
system.time({
apply(df, 1, mean)
})
#> user system elapsed
#> 0.000 0.000 0.001
# Column-wise operations (vectorized is better)
system.time({
colMeans(df)
})
#> user system elapsed
#> 0 0 036.11 Error #3: Function Not Vectorized
⭐⭐ INTERMEDIATE 🧠 LOGIC
36.11.1 The Problem
# Custom function expecting single value
my_function <- function(x) {
if (x > 0) {
return("positive")
} else {
return("negative")
}
}
# Doesn't work with vectors
my_function(c(-1, 2, -3, 4))
#> Error in if (x > 0) {: the condition has length > 1⚠️ WARNING
Warning: the condition has length > 1
36.11.2 Solutions
✅ SOLUTION 1: Use sapply/vapply
✅ SOLUTION 2: Vectorize the Function
# Make function vectorized
my_function_vec <- Vectorize(my_function)
my_function_vec(c(-1, 2, -3, 4))
#> [1] "negative" "positive" "negative" "positive"
# Or rewrite using ifelse
my_function_better <- function(x) {
ifelse(x > 0, "positive", "negative")
}
my_function_better(c(-1, 2, -3, 4))
#> [1] "negative" "positive" "negative" "positive"36.12 Alternative: purrr Package
💡 Key Insight: purrr for Modern Iteration
library(purrr)
# map() family (like lapply/sapply)
map(1:3, ~ . * 2) # Returns list
#> [[1]]
#> [1] 2
#>
#> [[2]]
#> [1] 4
#>
#> [[3]]
#> [1] 6
map_dbl(1:3, ~ . * 2) # Returns numeric
#> [1] 2 4 6
map_chr(1:3, ~ as.character(.)) # Returns character
#> [1] "1" "2" "3"
# map2() for two inputs (like mapply)
map2_dbl(1:3, 4:6, ~ .x + .y)
#> [1] 5 7 9
# pmap() for multiple inputs
pmap_dbl(list(x = 1:3, y = 4:6, z = 7:9),
function(x, y, z) x + y + z)
#> [1] 12 15 18
# Useful helpers
list(a = 1:3, b = 4:6, c = 7:9) %>%
map_dbl(mean)
#> a b c
#> 2 5 8
# Safe operations
map(c("1", "2", "not_a_number"),
possibly(as.numeric, otherwise = NA))
#> Warning in .Primitive("as.double")(x, ...): NAs introduced by coercion
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [1] NA36.13 Summary
Key Takeaways:
- apply() - For matrices/arrays
- lapply() - Always returns list
- sapply() - Simplifies output
- vapply() - Type-safe sapply
- mapply() - Multiple inputs
- tapply() - Grouped operations
- Vectorize when possible - Faster than apply
Quick Reference:
| Function | Input | Output | Use Case |
|---|---|---|---|
| apply() | Matrix/array | Vector/list | Row/column operations |
| lapply() | List/vector | List | Any operation |
| sapply() | List/vector | Vector/matrix | When simplified OK |
| vapply() | List/vector | Specified type | Type safety |
| mapply() | Multiple vectors | Vector/list | Parallel iteration |
| tapply() | Vector + groups | Array | Split-apply-combine |
Usage Patterns:
# Matrices
apply(matrix, 1, function) # By row
apply(matrix, 2, function) # By column
# Lists
lapply(list, function) # Returns list
sapply(list, function) # Simplified
vapply(list, function, type) # Type-safe
# Multiple inputs
mapply(function, x, y)
# Grouped
tapply(values, groups, function)
# Modern alternative
library(purrr)
map(list, function) # Like lapply
map_dbl(list, function) # Like sapply with numericBest Practices:
36.14 Exercises
📝 Exercise 1: Apply Practice
Using mtcars: 1. Calculate row means using apply 2. Calculate column medians using apply 3. Find max value in each row 4. Compare performance with vectorized versions
📝 Exercise 2: Custom Function with Apply
Write function to: 1. Take a data frame 2. For each numeric column, calculate mean, sd, min, max 3. Return as data frame 4. Use appropriate apply function
📝 Exercise 3: mapply Practice
Create two vectors and: 1. Add them element-wise with mapply 2. Use custom function with multiple arguments 3. Compare with vectorized approach
36.15 Exercise Answers
Click to see answers
Exercise 1:
# 1. Row means
row_means_apply <- apply(mtcars, 1, mean)
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
row_means_vec <- rowMeans(mtcars)
#> Error in rowMeans(mtcars): 'x' must be numeric
all.equal(row_means_apply, row_means_vec)
#> Error: object 'row_means_vec' not found
# 2. Column medians
col_medians <- apply(mtcars, 2, median)
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
col_medians
#> mpg cyl disp hp drat wt qsec
#> NA NA NA NA NA NA NA
#> vs am gear carb cyl_factor
#> NA NA NA NA NA
# 3. Max in each row
row_max <- apply(mtcars, 1, max)
head(row_max)
#> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
#> "6" "6" "4" "6"
#> Hornet Sportabout Valiant
#> "8" "6"
# 4. Performance comparison
library(microbenchmark)
microbenchmark(
apply = apply(mtcars, 1, mean),
rowMeans = rowMeans(mtcars),
times = 100
)
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Error in rowMeans(mtcars): 'x' must be numeric
# Vectorized is much faster!Exercise 2:
summarize_numeric <- function(df) {
# Get numeric columns
numeric_cols <- sapply(df, is.numeric)
df_numeric <- df[, numeric_cols]
# Calculate statistics for each column
stats <- lapply(df_numeric, function(col) {
c(
mean = mean(col, na.rm = TRUE),
sd = sd(col, na.rm = TRUE),
min = min(col, na.rm = TRUE),
max = max(col, na.rm = TRUE)
)
})
# Convert to data frame
result <- do.call(rbind, stats)
result <- as.data.frame(result)
result$variable <- rownames(result)
rownames(result) <- NULL
result[, c("variable", "mean", "sd", "min", "max")]
}
# Test
summarize_numeric(mtcars)
#> variable mean sd min max
#> 1 mpg 20.090625 6.0269481 10.400 33.900
#> 2 cyl 6.187500 1.7859216 4.000 8.000
#> 3 disp 230.721875 123.9386938 71.100 472.000
#> 4 hp 146.687500 68.5628685 52.000 335.000
#> 5 drat 3.596563 0.5346787 2.760 4.930
#> 6 wt 3.217250 0.9784574 1.513 5.424
#> 7 qsec 17.848750 1.7869432 14.500 22.900
#> 8 vs 0.437500 0.5040161 0.000 1.000
#> 9 am 0.406250 0.4989909 0.000 1.000
#> 10 gear 3.687500 0.7378041 3.000 5.000
#> 11 carb 2.812500 1.6152000 1.000 8.000
# Alternative using vapply for type safety
summarize_numeric_safe <- function(df) {
numeric_cols <- sapply(df, is.numeric)
df_numeric <- df[, numeric_cols]
stats <- vapply(df_numeric, function(col) {
c(mean = mean(col, na.rm = TRUE),
sd = sd(col, na.rm = TRUE),
min = min(col, na.rm = TRUE),
max = max(col, na.rm = TRUE))
}, FUN.VALUE = numeric(4))
result <- as.data.frame(t(stats))
result$variable <- rownames(result)
rownames(result) <- NULL
result[, c("variable", "mean", "sd", "min", "max")]
}
summarize_numeric_safe(iris)
#> variable mean sd min max
#> 1 Sepal.Length 5.843333 0.8280661 4.3 7.9
#> 2 Sepal.Width 3.057333 0.4358663 2.0 4.4
#> 3 Petal.Length 3.758000 1.7652982 1.0 6.9
#> 4 Petal.Width 1.199333 0.7622377 0.1 2.5Exercise 3:
# Create vectors
x <- 1:10
y <- 11:20
# 1. Add with mapply
result_mapply <- mapply(function(a, b) a + b, x, y)
result_mapply
#> [1] 12 14 16 18 20 22 24 26 28 30
# 2. Custom function
weighted_sum <- function(a, b, weight = 0.5) {
a * weight + b * (1 - weight)
}
result_custom <- mapply(weighted_sum, x, y, MoreArgs = list(weight = 0.3))
result_custom
#> [1] 8 9 10 11 12 13 14 15 16 17
# 3. Compare with vectorized
result_vec <- x + y
all.equal(result_mapply, result_vec)
#> [1] TRUE
# Performance
microbenchmark(
mapply = mapply(`+`, x, y),
vectorized = x + y,
times = 1000
)
#> Unit: nanoseconds
#> expr min lq mean median uq max neval
#> mapply 11878 12636 14675.229 13797.5 14260.5 107160 1000
#> vectorized 150 187 242.601 202.0 221.0 25582 1000
# Vectorized is MUCH faster