elegant code
get the average Sepal.Length per Species
■ Consider a better alternative:
groups <- levels(iris$Species)
averages <- c()
for(g in groups) averages[g] <-
mean(iris$Sepal.Length[iris$Species==g])
rm(g, groups)
averages
conditionally set invalid records to NA
■ Consider a better alternative:
for(i in c(1:nrow(DF))){
if(DF[i, "column1"] != "valid"){ # Where column1 is not
DF[i, "column2"] <- NA # 'valid', set
} # column2 to NA
}
scale numeric columns
■ Consider a better alternative:
for(col in names(DF)) {
if(is.numeric(DF[ ,col])) {
mean_val <- mean(DF[ ,col], na.rm=TRUE)
sd_val <- sd(DF[ ,col], na.rm=TRUE)
for(i in 1:nrow(DF)) {
DF[i, col] <- (DF[i, col] - mean_val) / sd_val
}
}
}
then click here to see the elegant approach:
read multiple csv files
■ Consider a better alternative:
file1 <- read.csv("data1.csv")
file2 <- read.csv("data2.csv")
file3 <- read.csv("data3.csv")
# ... repeat for 50 files
combined <- rbind(file1, file2, file3) # ... and so on
then click here to see the elegant approach:
count occurrences
■ Consider a better alternative:
categories <- unique(DF$category)
counts <- numeric(length(categories))
names(counts) <- categories
for(i in 1:nrow(DF)) {
cat <- DF$category[i]
counts[cat] <- counts[cat] + 1
}
create age groups
■ Consider a better alternative:
DF$age_group <- NA
for(i in 1:nrow(DF)) {
if(DF$age[i] >= 0 & DF$age[i] < 18) DF$age_group[i] <- "0-17"
else if(DF$age[i] >= 18 & DF$age[i] < 30) DF$age_group[i] <- "18-29"
else if(DF$age[i] >= 30 & DF$age[i] < 50) DF$age_group[i] <- "30-49"
else if(DF$age[i] >= 50 & DF$age[i] < 65) DF$age_group[i] <- "50-64"
else if(DF$age[i] >= 65) DF$age_group[i] <- "65+"
else DF$age_group[i] <- NA
}
then click here to see the elegant approach:
More examples are shown in the full R course tutorial slides.