Chapter 6 Advanced Data Structures
Lander's chapter 5 - Advanced Data Structures
Data come in many types and structures, which can pose a problem for some analysis environments but R handles them with aplomb. The most common data structure is the one-dimensional vector, which forms the basis of everything in R. The most powerful structure is the data.frame--something special in R that most other languages do not have--which handles mixed data types in a spreadsheet-like format. Lists are useful for stroing colelctions of items like a hash in Perl.
The main difference between an array and a matrix is that matrices are restricted to two dimensions while arrays can have an arbitrary number.
####################################
#chapter 5 advanced data structures#
####################################
#data.frame
#assign names
x<-10:1
y<--4:5
q<-c("hockey", "football", "basketball", "curling", "rugby",
"lacrosse", "basketball", "tennis", "cricket", "soccer")
(theDF<-data.frame(First = x, second=y, sport=q))
## First second sport
## 1 10 -4 hockey
## 2 9 -3 football
## 3 8 -2 basketball
## 4 7 -1 curling
## 5 6 0 rugby
## 6 5 1 lacrosse
## 7 4 2 basketball
## 8 3 3 tennis
## 9 2 4 cricket
## 10 1 5 soccer
nrow(theDF)
## [1] 10
ncol(theDF)
## [1] 3
dim(theDF)
## [1] 10 3
names(theDF)
## [1] "First" "second" "sport"
rownames(theDF)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
head(theDF, n=6)
## First second sport
## 1 10 -4 hockey
## 2 9 -3 football
## 3 8 -2 basketball
## 4 7 -1 curling
## 5 6 0 rugby
## 6 5 1 lacrosse
tail(theDF)
## First second sport
## 5 6 0 rugby
## 6 5 1 lacrosse
## 7 4 2 basketball
## 8 3 3 tennis
## 9 2 4 cricket
## 10 1 5 soccer
theDF <- data.frame(First=x, Second=y, Sport=q)
theDF[, "Sport"]
## [1] "hockey" "football" "basketball" "curling" "rugby" "lacrosse" "basketball"
## [8] "tennis" "cricket" "soccer"
class(theDF[, "Sport"])#returns factor
## [1] "character"
class(theDF[, "Sport", drop=F]) #returns data.frame
## [1] "data.frame"
#model.matrix to create a set of indicator variables
(newFactor <- factor(c("red", "blue", "green", "green"),
levels = c("red", "blue", "green"),
ordered = T))
## [1] red blue green green
## Levels: red < blue < green
model.matrix(~newFactor-1) #it's necessary to use ordered to have the exactly same order
## newFactorred newFactorblue newFactorgreen
## 1 1 0 0
## 2 0 1 0
## 3 0 0 1
## 4 0 0 1
## attr(,"assign")
## [1] 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$newFactor
## [1] "contr.poly"
#list
#can store any number of items of any type
list3 <- list(c(1:3), 3:7)
list3
## [[1]]
## [1] 1 2 3
##
## [[2]]
## [1] 3 4 5 6 7
(list5 <- list(theDF, 1:10, list3))
## [[1]]
## First Second Sport
## 1 10 -4 hockey
## 2 9 -3 football
## 3 8 -2 basketball
## 4 7 -1 curling
## 5 6 0 rugby
## 6 5 1 lacrosse
## 7 4 2 basketball
## 8 3 3 tennis
## 9 2 4 cricket
## 10 1 5 soccer
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10
##
## [[3]]
## [[3]][[1]]
## [1] 1 2 3
##
## [[3]][[2]]
## [1] 3 4 5 6 7
#create empty list
emptyList <- vector(mode = "list", length = 4)
emptyList
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
#use double brackets; drop=FALSE ensures a return of data.frame
list3[2]
## [[1]]
## [1] 3 4 5 6 7
list3[[2]]
## [1] 3 4 5 6 7
length(list3)
## [1] 2
#matrix
(A <- matrix(1:10, nrow = 5))
## [,1] [,2]
## [1,] 1 6
## [2,] 2 7
## [3,] 3 8
## [4,] 4 9
## [5,] 5 10
(B <- matrix(21:30, nrow = 5))
## [,1] [,2]
## [1,] 21 26
## [2,] 22 27
## [3,] 23 28
## [4,] 24 29
## [5,] 25 30
(C <- matrix(21:40, nrow = 2))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 21 23 25 27 29 31 33 35 37 39
## [2,] 22 24 26 28 30 32 34 36 38 40
#A%*%B
A%*%t(B)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 177 184 191 198 205
## [2,] 224 233 242 251 260
## [3,] 271 282 293 304 315
## [4,] 318 331 344 357 370
## [5,] 365 380 395 410 425
rownames(A)
## NULL
colnames(B)
## NULL
#array: same type multidimensional vector
theArray <- array(1:12, dim = c(2,3,2))
theArray
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12