Data structure
Vectors
A vector is a collection of values that all have the same data type. It can be a numeric or character vector depending on the data type of the elements.
- Creating vectors
The command c(…) creates vectors of all data types.
<- c(1, 3, 5, 7); oddnum # numeric vector oddnum
## [1] 1 3 5 7
<- c("red", "yellow", "blue", "red", "blue") ; colors # character vector colors
## [1] "red" "yellow" "blue" "red" "blue"
is.vector(oddnum) # returns TRUE if 'oddnum' is a vector
## [1] TRUE
is.vector(colors) # returns TRUE if 'colors' is a vector
## [1] TRUE
# as.vector(object) : this attempts to coerce 'object' into a vector
To create a patterned numeric vector, use seq() or rep() function.
# seq(from=value1, to=value2, length=value3) creates a sequence from 'value1' to 'value2' with length of 'value3'
<- seq(from=1, to=20, length=5); s s
## [1] 1.00 5.75 10.50 15.25 20.00
# or seq(from=value1, to=value2, by=value3) creates a sequence from 'value1' to 'value2' with step size 'value3'
<- seq(from=1, to=20, by=2); s2 s2
## [1] 1 3 5 7 9 11 13 15 17 19
# using colon(:) creates a sequence of integers
<- 1:8 ; numbers # numeric vector numbers
## [1] 1 2 3 4 5 6 7 8
# rep(x, times=value) replicates x, 'value' times
<- rep(1:3, times=2); r r
## [1] 1 2 3 1 2 3
# or rep(x, each=value) replicates each element of x 'value' times before moving on to the next
<- rep(1:3, each=2); r2 r2
## [1] 1 1 2 2 3 3
To select a subset of elements in the vector, use square brackets ([]). Below are some examples.
3] # selects the third element in 'numbers' numbers[
## [1] 3
c(1, 4)] # selects the first and fourth elements in 'colors' colors[
## [1] "red" "red"
Factors
A factor can be viewed as a special case of a vector. We usually use factors to represent categorical data (which has a fixed set of possible values). A set of possible categories in the data is referred to as levels of the factor.
To create a factor, use the command factor().
# factor(vector) creates the vector (character or numeric) as a factor
<- factor(c("small", "large", "small", "medium", "medium")) ; size size
## [1] small large small medium medium
## Levels: large medium small
is.factor(size) # returns TRUE is 'size' is a factor
## [1] TRUE
is.factor(colors) # vector
## [1] FALSE
# To coerce 'colors' vector into a factor, use as.factor()
<- as.factor(colors)) (colors
## [1] red yellow blue red blue
## Levels: blue red yellow
is.factor(colors)
## [1] TRUE
Matrices
A matrix is a two-dimensional generalization of a vector. The values are arranged in rows and columns, and the elements must have the same data type.
To create a matrix, use the command matrix().
# matrix(data=vector, nrow=value1, ncol=value2) converts a vector into a matrix (value1 x value2)
<- matrix(1:8, nrow = 4, ncol = 2) ; m # creates a matrix with 4 rows and 2 columns m
## [,1] [,2]
## [1,] 1 5
## [2,] 2 6
## [3,] 3 7
## [4,] 4 8
is.matrix(m) # returns TRUE if 'm' is a matrix
## [1] TRUE
dim(m); nrow(m); ncol(m)
## [1] 4 2
## [1] 4
## [1] 2
is.matrix(size) # factor
## [1] FALSE
# To coerce 'size' factor into a matrix, use as.matrix()
<- as.matrix(size)) (size
## [,1]
## [1,] "small"
## [2,] "large"
## [3,] "small"
## [4,] "medium"
## [5,] "medium"
is.matrix(size)
## [1] TRUE
By default, values are stored by columns. To store values by rows, use the optional argument byrow=T.
<- matrix(1:8, nrow = 4, ncol = 2, byrow=T) ; m2 m2
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
## [3,] 5 6
## [4,] 7 8
You can also combine vectors by rows or columns to create a matrix by using rbind() or cbind() functions.
# rbind(arguments) binds together vectors in arguments "row-wise"
<- 1:5
a <- 11:15
b <- rbind(a, b) ; ab2 # combine 'a' and 'b' by rows ab2
## [,1] [,2] [,3] [,4] [,5]
## a 1 2 3 4 5
## b 11 12 13 14 15
is.matrix(ab2)
## [1] TRUE
# cbind(arguments) binds together vectors in arguments "column-wise"
<- cbind(a, b) ; ab # combine 'a' and 'b' by columns ab
## a b
## [1,] 1 11
## [2,] 2 12
## [3,] 3 13
## [4,] 4 14
## [5,] 5 15
is.matrix(ab)
## [1] TRUE
To select a subset of elements in the matrix, you can do:
3, 2] # selects the element in row 3 and column 2 of 'ab' ab[
## b
## 13
1:3, ] # selects rows 1-3 of 'ab' ab[
## a b
## [1,] 1 11
## [2,] 2 12
## [3,] 3 13
1:2] # selects columns 1-2 of 'ab' ab[,
## a b
## [1,] 1 11
## [2,] 2 12
## [3,] 3 13
## [4,] 4 14
## [5,] 5 15
Dataframes
A dataframe is a collection of vectors with the same length (but they can be of different data types). We usually use a dataframe to represent an entire dataset.
To create a dataframe, use data.frame() function.
# data.frame(name1=object1, name2=object2, ..., name_m=object_m) takes a number of objects (e.g., vectors, factors) and returns a single object containing all the variables.
<- data.frame(shape=c("circle", "triangle", "rectangle", "circle", "circle"),
dataset size = size, colors = colors,
score = c(5, 4, 2, 9, 8))
dataset
## shape size colors score
## 1 circle small red 5
## 2 triangle large yellow 4
## 3 rectangle small blue 2
## 4 circle medium red 9
## 5 circle medium blue 8
is.data.frame(dataset)
## [1] TRUE
is.data.frame(ab) # matrix
## [1] FALSE
# To coerce 'ab' matrix into a dataframe, use as.data.frame() function
<- as.data.frame(ab)) (ab
## a b
## 1 1 11
## 2 2 12
## 3 3 13
## 4 4 14
## 5 5 15
is.data.frame(ab)
## [1] TRUE
To select a specific variable (vector) or a subset of the dataframe, do:
4, 3] # selects the element in row 4 and column 3 dataset[
## [1] red
## Levels: blue red yellow
3]] # selects the third variable in 'dataset' dataset[[
## [1] red yellow blue red blue
## Levels: blue red yellow
"colors"]] # selects 'colors' variable in 'dataset' dataset[[
## [1] red yellow blue red blue
## Levels: blue red yellow
$shape # selects 'shape' variable in 'dataset' dataset
## [1] circle triangle rectangle circle circle
## Levels: circle rectangle triangle
$shape[3] # selects the third element of 'shape' variable in 'dataset' dataset
## [1] rectangle
## Levels: circle rectangle triangle
subset(dataset, size=="medium") # selects a subset of data that satisfies 'size=medium'
## shape size colors score
## 4 circle medium red 9
## 5 circle medium blue 8
Lists
A list is a collection of data objects. The components can have different data types and lengths.
To create a list, use list().
# list(name1=object1, name2=object2, ..., name_m=object_m) creates a list of m components
<- list(names = c("Bob", "Anne"),
all_combined age = c(26, 43),
numbers = numbers, samples = dataset)
all_combined
## $names
## [1] "Bob" "Anne"
##
## $age
## [1] 26 43
##
## $numbers
## [1] 1 2 3 4 5 6 7 8
##
## $samples
## shape size colors score
## 1 circle small red 5
## 2 triangle large yellow 4
## 3 rectangle small blue 2
## 4 circle medium red 9
## 5 circle medium blue 8
is.list(all_combined)
## [1] TRUE
length(all_combined)
## [1] 4
names(all_combined)
## [1] "names" "age" "numbers" "samples"
You can extract specific components/elements of the list in various ways.
$names # returns 'names' component all_combined
## [1] "Bob" "Anne"
'names']] all_combined[[
## [1] "Bob" "Anne"
1]] # returns the first component all_combined[[
## [1] "Bob" "Anne"
$names[1] # returns the first element of 'names' all_combined
## [1] "Bob"
'names']][2] # returns the second element of 'names' all_combined[[
## [1] "Anne"
2]][1] # returns the first element of the second component all_combined[[
## [1] 26