4 How to deal with NA’s
“NA” stands for “Not Available”, or it means “missing value”. These R functions are useful to deal with NA’s, is.na(), na.omit(), complete.cases(), colSums(), rowSums(), tidyr::replace_na() and dplyr::na_if().
Example:
rm(list=ls())
# load packages
library(dplyr) # for na.if
library(tidyr) # for replace_na
# create a fake data set
fk_data <- data.frame(v1 = 1:5,
v2 = c(1, 3, NA, 8, -99),
v3 = c(NA, NA, rnorm(3)))
# which variables have NA's and how many NA's
colSums(is.na(fk_data))
# which rows have NA's and how many NA's
rowSums(is.na(fk_data))
# get all the rows which have NA's
fk_data_na_rows <- fk_data[rowSums(is.na(fk_data)) > 0, ]
# get all the rows which do not have NA's
fk_data_without_na_rows <- fk_data[rowSums(is.na(fk_data)) == 0, ]
# or
fk_data_without_na_rows <- fk_data[complete.cases(fk_data) == TRUE, ]
# or
fk_data_without_na_rows <- na.omit(fk_data)
# replace all the NA's with 0
fk_data_replace <- fk_data
fk_data_replace[is.na(fk_data_replace)] <- 0
# or directly fk_data[is.na(fk_data)] <- 0, if we don't need fk_data anymore
# replace NA in v3 with 1
fk_data_replace <-
fk_data %>%
replace_na(list(v3 = 1))
# replace -99 in fk_data by NA
fk_data_1 <- na_if(fk_data, -99)
# find the mean of v2 in fk_data
the_mean <- mean(fk_data$v2, na.rm = TRUE)