Chapter 36 apply Family & Iteration

What You’ll Learn:

  • apply, lapply, sapply, mapply
  • Common iteration errors
  • When to use each function
  • Performance considerations
  • Alternative approaches

Key Errors Covered: 20+ iteration errors

Difficulty: ⭐⭐⭐ Advanced

36.1 Introduction

R’s apply family provides vectorized operations:

# Instead of loops
result <- numeric(nrow(mtcars))
for (i in 1:nrow(mtcars)) {
  result[i] <- mean(as.numeric(mtcars[i, ]))
}

# Use apply
result <- apply(mtcars, 1, mean)
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
head(result)
#>         Mazda RX4     Mazda RX4 Wag        Datsun 710    Hornet 4 Drive 
#>                NA                NA                NA                NA 
#> Hornet Sportabout           Valiant 
#>                NA                NA

Let’s master the apply family!

36.2 apply() - Arrays and Matrices

💡 Key Insight: apply() for Matrices

# Create matrix
mat <- matrix(1:12, nrow = 3, ncol = 4)
mat
#>      [,1] [,2] [,3] [,4]
#> [1,]    1    4    7   10
#> [2,]    2    5    8   11
#> [3,]    3    6    9   12

# Apply to rows (MARGIN = 1)
row_sums <- apply(mat, 1, sum)
row_sums
#> [1] 22 26 30

# Apply to columns (MARGIN = 2)
col_means <- apply(mat, 2, mean)
col_means
#> [1]  2  5  8 11

# Custom function
apply(mat, 1, function(x) max(x) - min(x))
#> [1] 9 9 9

# With additional arguments
apply(mat, 2, sum, na.rm = TRUE)
#> [1]  6 15 24 33

# Both dimensions
apply(mat, c(1, 2), sqrt)
#>          [,1]     [,2]     [,3]     [,4]
#> [1,] 1.000000 2.000000 2.645751 3.162278
#> [2,] 1.414214 2.236068 2.828427 3.316625
#> [3,] 1.732051 2.449490 3.000000 3.464102

36.3 Error #1: dim(X) must have positive length

⭐ BEGINNER 🔢 TYPE

36.3.1 The Error

# Try apply on vector
vec <- 1:10
apply(vec, 1, sum)
#> Error in apply(vec, 1, sum): dim(X) must have a positive length

🔴 ERROR

Error in apply(vec, 1, sum) : dim(X) must have a positive length

36.3.2 What It Means

apply() requires a matrix/array, not a vector.

36.3.3 Solutions

SOLUTION 1: Use lapply() or sapply()

# For vectors, use lapply/sapply
vec <- 1:10

# Wrong: apply(vec, 1, sqrt)

# Right: use sapply
sapply(vec, sqrt)
#>  [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427
#>  [9] 3.000000 3.162278

# Or vectorized operation
sqrt(vec)
#>  [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427
#>  [9] 3.000000 3.162278

SOLUTION 2: Convert to Matrix

# If you really need apply
vec <- 1:10
mat <- matrix(vec, ncol = 1)
apply(mat, 1, function(x) x * 2)
#>  [1]  2  4  6  8 10 12 14 16 18 20

# But this is unnecessary - use vectorization
vec * 2
#>  [1]  2  4  6  8 10 12 14 16 18 20

36.4 lapply() - Lists

💡 Key Insight: lapply() Always Returns List

# Create list
my_list <- list(a = 1:5, b = 6:10, c = 11:15)

# Apply function to each element
result <- lapply(my_list, mean)
result
#> $a
#> [1] 3
#> 
#> $b
#> [1] 8
#> 
#> $c
#> [1] 13

# With custom function
lapply(my_list, function(x) sum(x^2))
#> $a
#> [1] 55
#> 
#> $b
#> [1] 330
#> 
#> $c
#> [1] 855

# On data frame (df is a list of columns)
lapply(mtcars[, 1:3], mean)
#> $mpg
#> [1] 20.09062
#> 
#> $cyl
#> [1] 6.1875
#> 
#> $disp
#> [1] 230.7219

# Extract elements
lapply(my_list, `[`, 1:2)
#> $a
#> [1] 1 2
#> 
#> $b
#> [1] 6 7
#> 
#> $c
#> [1] 11 12

# With multiple arguments
lapply(my_list, sum, na.rm = TRUE)
#> $a
#> [1] 15
#> 
#> $b
#> [1] 40
#> 
#> $c
#> [1] 65

36.5 sapply() - Simplified

💡 Key Insight: sapply() Simplifies Output

my_list <- list(a = 1:5, b = 6:10, c = 11:15)

# lapply returns list
lapply(my_list, mean)
#> $a
#> [1] 3
#> 
#> $b
#> [1] 8
#> 
#> $c
#> [1] 13

# sapply returns vector
sapply(my_list, mean)
#>  a  b  c 
#>  3  8 13

# sapply with matrix output
sapply(my_list, function(x) c(mean = mean(x), sd = sd(x)))
#>             a        b         c
#> mean 3.000000 8.000000 13.000000
#> sd   1.581139 1.581139  1.581139

# When can't simplify, returns list
sapply(my_list, summary)  # Returns list
#>         a  b  c
#> Min.    1  6 11
#> 1st Qu. 2  7 12
#> Median  3  8 13
#> Mean    3  8 13
#> 3rd Qu. 4  9 14
#> Max.    5 10 15

36.6 Error #2: Unexpected Output Type

⭐⭐ INTERMEDIATE 🔢 TYPE

36.6.1 The Problem

# sapply behavior depends on output
result1 <- sapply(1:3, function(x) x)
result1
#> [1] 1 2 3
class(result1)  # vector
#> [1] "integer"

result2 <- sapply(1:3, function(x) c(x, x^2))
result2
#>      [,1] [,2] [,3]
#> [1,]    1    2    3
#> [2,]    1    4    9
class(result2)  # matrix
#> [1] "matrix" "array"

result3 <- sapply(1:3, function(x) list(x, x^2))
result3
#>      [,1] [,2] [,3]
#> [1,] 1    2    3   
#> [2,] 1    4    9
class(result3)  # list
#> [1] "matrix" "array"

36.6.2 Solutions

SOLUTION 1: Use vapply() for Type Safety

# Specify output type
result <- vapply(1:3, function(x) x * 2, FUN.VALUE = numeric(1))
result
#> [1] 2 4 6

# Will error if output doesn't match
# vapply(1:3, function(x) c(x, x^2), FUN.VALUE = numeric(1))

# For multiple values
result <- vapply(1:3, function(x) c(x, x^2), FUN.VALUE = numeric(2))
result
#>      [,1] [,2] [,3]
#> [1,]    1    2    3
#> [2,]    1    4    9

SOLUTION 2: Use lapply() and Post-Process

# Always get list, then convert
result <- lapply(1:3, function(x) x * 2)
unlist(result)
#> [1] 2 4 6

# Or use do.call
do.call(c, result)
#> Error in do.call(c, result): 'what' must be a function or character string

36.7 mapply() - Multiple Arguments

💡 Key Insight: mapply() for Parallel Iteration

# Apply function to multiple vectors in parallel
mapply(function(x, y) x + y, 
       x = 1:5, 
       y = 6:10)
#> [1]  7  9 11 13 15

# Multiple arguments
mapply(rep, 
       x = 1:4, 
       times = 4:1)
#> [[1]]
#> [1] 1 1 1 1
#> 
#> [[2]]
#> [1] 2 2 2
#> 
#> [[3]]
#> [1] 3 3
#> 
#> [[4]]
#> [1] 4

# With data frames
df1 <- data.frame(a = 1:3, b = 4:6)
df2 <- data.frame(c = 7:9, d = 10:12)

mapply(function(x, y) x + y, df1$a, df2$c)
#> [1]  8 10 12

# MoreArgs for constant arguments
mapply(function(x, y, z) x + y + z,
       x = 1:3,
       y = 4:6,
       MoreArgs = list(z = 10))
#> [1] 15 17 19

36.8 tapply() - Grouped Apply

💡 Key Insight: tapply() for Split-Apply-Combine

# Apply function by groups
tapply(mtcars$mpg, mtcars$cyl, mean)
#>        4        6        8 
#> 26.66364 19.74286 15.10000

# Multiple grouping variables
tapply(mtcars$mpg, 
       list(Cyl = mtcars$cyl, Gear = mtcars$gear), 
       mean)
#>    Gear
#> Cyl     3      4    5
#>   4 21.50 26.925 28.2
#>   6 19.75 19.750 19.7
#>   8 15.05     NA 15.4

# With custom function
tapply(mtcars$mpg, mtcars$cyl, 
       function(x) c(mean = mean(x), sd = sd(x)))
#> $`4`
#>      mean        sd 
#> 26.663636  4.509828 
#> 
#> $`6`
#>      mean        sd 
#> 19.742857  1.453567 
#> 
#> $`8`
#>      mean        sd 
#> 15.100000  2.560048

# Like dplyr group_by + summarize
library(dplyr)
mtcars %>%
  group_by(cyl) %>%
  summarize(mean_mpg = mean(mpg))
#> # A tibble: 3 × 2
#>     cyl mean_mpg
#>   <dbl>    <dbl>
#> 1     4     26.7
#> 2     6     19.7
#> 3     8     15.1

36.9 Common Patterns

🎯 Best Practice: Choose Right Function

# Pattern 1: Apply to each column of data frame
lapply(mtcars[, 1:3], mean)
#> $mpg
#> [1] 20.09062
#> 
#> $cyl
#> [1] 6.1875
#> 
#> $disp
#> [1] 230.7219
sapply(mtcars[, 1:3], mean)
#>       mpg       cyl      disp 
#>  20.09062   6.18750 230.72188

# Pattern 2: Apply to each row
apply(mtcars[, 1:3], 1, sum)
#>           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
#>               187.0               187.0               134.8               285.4 
#>   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
#>               386.7               249.1               382.3               175.1 
#>            Merc 230            Merc 280           Merc 280C          Merc 450SE 
#>               167.6               192.8               191.4               300.2 
#>          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
#>               301.1               299.0               490.4               478.4 
#>   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
#>               462.7               115.1               110.1               109.0 
#>       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
#>               145.6               341.5               327.2               371.3 
#>    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
#>               427.2               110.3               150.3               129.5 
#>      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
#>               374.8               170.7               324.0               146.4

# Pattern 3: Apply with multiple inputs
mapply(function(x, y) x / y,
       x = mtcars$hp,
       y = mtcars$wt)
#>  [1] 41.98473 38.26087 40.08621 34.21462 50.87209 30.34682 68.62745 19.43574
#>  [9] 30.15873 35.75581 35.75581 44.22604 48.25737 47.61905 39.04762 39.63864
#> [17] 43.03087 30.00000 32.19814 35.42234 39.35091 42.61364 43.66812 63.80208
#> [25] 45.51365 34.10853 42.52336 74.68605 83.28076 63.17690 93.83754 39.20863

# Pattern 4: Apply by groups
tapply(mtcars$mpg, mtcars$cyl, mean)
#>        4        6        8 
#> 26.66364 19.74286 15.10000

# Pattern 5: Nested lists
nested <- list(
  a = list(x = 1:3, y = 4:6),
  b = list(x = 7:9, y = 10:12)
)

# Get all 'x' elements
lapply(nested, `[[`, "x")
#> $a
#> [1] 1 2 3
#> 
#> $b
#> [1] 7 8 9

# Apply to nested structure
lapply(nested, function(sublist) {
  lapply(sublist, mean)
})
#> $a
#> $a$x
#> [1] 2
#> 
#> $a$y
#> [1] 5
#> 
#> 
#> $b
#> $b$x
#> [1] 8
#> 
#> $b$y
#> [1] 11

36.10 Performance Considerations

🎯 Best Practice: Vectorize When Possible

# Compare performance
n <- 10000

# Loop (slow)
system.time({
  result <- numeric(n)
  for (i in 1:n) {
    result[i] <- sqrt(i)
  }
})
#>    user  system elapsed 
#>   0.008   0.001   0.005

# sapply (better)
system.time({
  result <- sapply(1:n, sqrt)
})
#>    user  system elapsed 
#>   0.003   0.000   0.003

# Vectorized (best)
system.time({
  result <- sqrt(1:n)
})
#>    user  system elapsed 
#>       0       0       0

# When apply family is appropriate
df <- data.frame(matrix(rnorm(1000), ncol = 10))

# Row-wise operations (apply is good)
system.time({
  apply(df, 1, mean)
})
#>    user  system elapsed 
#>   0.000   0.000   0.001

# Column-wise operations (vectorized is better)
system.time({
  colMeans(df)
})
#>    user  system elapsed 
#>       0       0       0

36.11 Error #3: Function Not Vectorized

⭐⭐ INTERMEDIATE 🧠 LOGIC

36.11.1 The Problem

# Custom function expecting single value
my_function <- function(x) {
  if (x > 0) {
    return("positive")
  } else {
    return("negative")
  }
}

# Doesn't work with vectors
my_function(c(-1, 2, -3, 4))
#> Error in if (x > 0) {: the condition has length > 1

⚠️ WARNING

Warning: the condition has length > 1

36.11.2 Solutions

SOLUTION 1: Use sapply/vapply

# Apply to each element
sapply(c(-1, 2, -3, 4), my_function)
#> [1] "negative" "positive" "negative" "positive"

# Type-safe version
vapply(c(-1, 2, -3, 4), my_function, FUN.VALUE = character(1))
#> [1] "negative" "positive" "negative" "positive"

SOLUTION 2: Vectorize the Function

# Make function vectorized
my_function_vec <- Vectorize(my_function)
my_function_vec(c(-1, 2, -3, 4))
#> [1] "negative" "positive" "negative" "positive"

# Or rewrite using ifelse
my_function_better <- function(x) {
  ifelse(x > 0, "positive", "negative")
}

my_function_better(c(-1, 2, -3, 4))
#> [1] "negative" "positive" "negative" "positive"

36.12 Alternative: purrr Package

💡 Key Insight: purrr for Modern Iteration

library(purrr)

# map() family (like lapply/sapply)
map(1:3, ~ . * 2)           # Returns list
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 4
#> 
#> [[3]]
#> [1] 6
map_dbl(1:3, ~ . * 2)       # Returns numeric
#> [1] 2 4 6
map_chr(1:3, ~ as.character(.))  # Returns character
#> [1] "1" "2" "3"

# map2() for two inputs (like mapply)
map2_dbl(1:3, 4:6, ~ .x + .y)
#> [1] 5 7 9

# pmap() for multiple inputs
pmap_dbl(list(x = 1:3, y = 4:6, z = 7:9), 
         function(x, y, z) x + y + z)
#> [1] 12 15 18

# Useful helpers
list(a = 1:3, b = 4:6, c = 7:9) %>%
  map_dbl(mean)
#> a b c 
#> 2 5 8

# Safe operations
map(c("1", "2", "not_a_number"), 
    possibly(as.numeric, otherwise = NA))
#> Warning in .Primitive("as.double")(x, ...): NAs introduced by coercion
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] NA

36.13 Summary

Key Takeaways:

  1. apply() - For matrices/arrays
  2. lapply() - Always returns list
  3. sapply() - Simplifies output
  4. vapply() - Type-safe sapply
  5. mapply() - Multiple inputs
  6. tapply() - Grouped operations
  7. Vectorize when possible - Faster than apply

Quick Reference:

Function Input Output Use Case
apply() Matrix/array Vector/list Row/column operations
lapply() List/vector List Any operation
sapply() List/vector Vector/matrix When simplified OK
vapply() List/vector Specified type Type safety
mapply() Multiple vectors Vector/list Parallel iteration
tapply() Vector + groups Array Split-apply-combine

Usage Patterns:

# Matrices
apply(matrix, 1, function)    # By row
apply(matrix, 2, function)    # By column

# Lists
lapply(list, function)        # Returns list
sapply(list, function)        # Simplified
vapply(list, function, type)  # Type-safe

# Multiple inputs
mapply(function, x, y)

# Grouped
tapply(values, groups, function)

# Modern alternative
library(purrr)
map(list, function)           # Like lapply
map_dbl(list, function)       # Like sapply with numeric

Best Practices:

# ✅ Good
Use vectorized operations when possible
Use vapply() for type safety
Choose right function for task
Consider purrr for complex operations

# ❌ Avoid
Using apply() on vectors
Using sapply() when type matters
Growing objects in loops
Unnecessary apply when vectorized solution exists

36.14 Exercises

📝 Exercise 1: Apply Practice

Using mtcars: 1. Calculate row means using apply 2. Calculate column medians using apply 3. Find max value in each row 4. Compare performance with vectorized versions

📝 Exercise 2: Custom Function with Apply

Write function to: 1. Take a data frame 2. For each numeric column, calculate mean, sd, min, max 3. Return as data frame 4. Use appropriate apply function

📝 Exercise 3: mapply Practice

Create two vectors and: 1. Add them element-wise with mapply 2. Use custom function with multiple arguments 3. Compare with vectorized approach

36.15 Exercise Answers

Click to see answers

Exercise 1:

# 1. Row means
row_means_apply <- apply(mtcars, 1, mean)
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
row_means_vec <- rowMeans(mtcars)
#> Error in rowMeans(mtcars): 'x' must be numeric

all.equal(row_means_apply, row_means_vec)
#> Error: object 'row_means_vec' not found

# 2. Column medians
col_medians <- apply(mtcars, 2, median)
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
#> Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
#> argument is not numeric or logical: returning NA
col_medians
#>        mpg        cyl       disp         hp       drat         wt       qsec 
#>         NA         NA         NA         NA         NA         NA         NA 
#>         vs         am       gear       carb cyl_factor 
#>         NA         NA         NA         NA         NA

# 3. Max in each row
row_max <- apply(mtcars, 1, max)
head(row_max)
#>         Mazda RX4     Mazda RX4 Wag        Datsun 710    Hornet 4 Drive 
#>               "6"               "6"               "4"               "6" 
#> Hornet Sportabout           Valiant 
#>               "8"               "6"

# 4. Performance comparison
library(microbenchmark)

microbenchmark(
  apply = apply(mtcars, 1, mean),
  rowMeans = rowMeans(mtcars),
  times = 100
)
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
#> returning NA
#> Error in rowMeans(mtcars): 'x' must be numeric

# Vectorized is much faster!

Exercise 2:

summarize_numeric <- function(df) {
  # Get numeric columns
  numeric_cols <- sapply(df, is.numeric)
  df_numeric <- df[, numeric_cols]
  
  # Calculate statistics for each column
  stats <- lapply(df_numeric, function(col) {
    c(
      mean = mean(col, na.rm = TRUE),
      sd = sd(col, na.rm = TRUE),
      min = min(col, na.rm = TRUE),
      max = max(col, na.rm = TRUE)
    )
  })
  
  # Convert to data frame
  result <- do.call(rbind, stats)
  result <- as.data.frame(result)
  result$variable <- rownames(result)
  rownames(result) <- NULL
  
  result[, c("variable", "mean", "sd", "min", "max")]
}

# Test
summarize_numeric(mtcars)
#>    variable       mean          sd    min     max
#> 1       mpg  20.090625   6.0269481 10.400  33.900
#> 2       cyl   6.187500   1.7859216  4.000   8.000
#> 3      disp 230.721875 123.9386938 71.100 472.000
#> 4        hp 146.687500  68.5628685 52.000 335.000
#> 5      drat   3.596563   0.5346787  2.760   4.930
#> 6        wt   3.217250   0.9784574  1.513   5.424
#> 7      qsec  17.848750   1.7869432 14.500  22.900
#> 8        vs   0.437500   0.5040161  0.000   1.000
#> 9        am   0.406250   0.4989909  0.000   1.000
#> 10     gear   3.687500   0.7378041  3.000   5.000
#> 11     carb   2.812500   1.6152000  1.000   8.000

# Alternative using vapply for type safety
summarize_numeric_safe <- function(df) {
  numeric_cols <- sapply(df, is.numeric)
  df_numeric <- df[, numeric_cols]
  
  stats <- vapply(df_numeric, function(col) {
    c(mean = mean(col, na.rm = TRUE),
      sd = sd(col, na.rm = TRUE),
      min = min(col, na.rm = TRUE),
      max = max(col, na.rm = TRUE))
  }, FUN.VALUE = numeric(4))
  
  result <- as.data.frame(t(stats))
  result$variable <- rownames(result)
  rownames(result) <- NULL
  
  result[, c("variable", "mean", "sd", "min", "max")]
}

summarize_numeric_safe(iris)
#>       variable     mean        sd min max
#> 1 Sepal.Length 5.843333 0.8280661 4.3 7.9
#> 2  Sepal.Width 3.057333 0.4358663 2.0 4.4
#> 3 Petal.Length 3.758000 1.7652982 1.0 6.9
#> 4  Petal.Width 1.199333 0.7622377 0.1 2.5

Exercise 3:

# Create vectors
x <- 1:10
y <- 11:20

# 1. Add with mapply
result_mapply <- mapply(function(a, b) a + b, x, y)
result_mapply
#>  [1] 12 14 16 18 20 22 24 26 28 30

# 2. Custom function
weighted_sum <- function(a, b, weight = 0.5) {
  a * weight + b * (1 - weight)
}

result_custom <- mapply(weighted_sum, x, y, MoreArgs = list(weight = 0.3))
result_custom
#>  [1]  8  9 10 11 12 13 14 15 16 17

# 3. Compare with vectorized
result_vec <- x + y
all.equal(result_mapply, result_vec)
#> [1] TRUE

# Performance
microbenchmark(
  mapply = mapply(`+`, x, y),
  vectorized = x + y,
  times = 1000
)
#> Unit: nanoseconds
#>        expr   min    lq      mean  median      uq    max neval
#>      mapply 11878 12636 14675.229 13797.5 14260.5 107160  1000
#>  vectorized   150   187   242.601   202.0   221.0  25582  1000

# Vectorized is MUCH faster