R语言基础
filter
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE
head(cars[cars$speed > 15,]) #filter cars with speed > 15
## speed dist
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
Show variable names
names(cars) #show names of variables
## [1] "speed" "dist"
colnames(cars) #can be used for matrix
## [1] "speed" "dist"
Create Sequence
seq(from = 7, to = 18, by = 2) #create sequence
## [1] 7 9 11 13 15 17
rep(c(3,5,6),times = 3) #replicate full sequence three times
## [1] 3 5 6 3 5 6 3 5 6
rep(c(3,5,6),1:3) #rep 3 once, 5 twice, 6 three times
## [1] 3 5 5 6 6 6
## [1] 3 5 5 5 5 6 6 6 6 6
rep(c(3,5,6), c(2,2,2)) #equivalent to each = 2
## [1] 3 3 5 5 6 6
## [1] 3 3 5 5 6 6
Create Matrix
x <- 1:12
dim(x) <- c(3,4) #change dimension by column rank
x
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
matrix(1:12, nrow = 3, byrow = T) #create a matrix by row rank
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
y <- matrix(1:12, nrow = 3, byrow = T)
rownames(y) <- letters[1:3] #give row names
t(y) #transpose matrix
## a b c
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
cbind(a = 1:4,b = 5:8, c = 9:12) #bind multiple vectors by column
## a b c
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
rbind(a = 1:4, b = 5:8, c = 9:12) #bind vectors by row
## [,1] [,2] [,3] [,4]
## a 1 2 3 4
## b 5 6 7 8
## c 9 10 11 12
Factors
pain <- c(0,3,2,2,1)
fpain <- factor(pain, levels = 0:3) #factorize painfulness
levels(fpain) <- c("none","mild", "medium", "severe") #name painful levels
fpain
## [1] none severe medium medium mild
## Levels: none mild medium severe
as.numeric(fpain) #extract painful levels, always starting from 1
## [1] 1 4 3 3 2
List
x1 = 1:10
x2 = 5:14
mylist = list(first = x1, second = x2) #combine two vectors to a list
mylist
## $first
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $second
## [1] 5 6 7 8 9 10 11 12 13 14
mylist$first #extract values by list name
## [1] 1 2 3 4 5 6 7 8 9 10
Dataframe
df = data.frame(x1,x2) #create a dataframe
head(df)
## x1 x2
## 1 1 5
## 2 2 6
## 3 3 7
## 4 4 8
## 5 5 9
## 6 6 10
df$x1 #extract values by column name
## [1] 1 2 3 4 5 6 7 8 9 10
Index and Select
x1[5] #select the fifth number in x1
## [1] 5
x1[c(3,5,7)] #select the third, fifth, and seventh number in x1
## [1] 3 5 7
x1[x1 > 5] #select the number greater than 5
## [1] 6 7 8 9 10
x1[6] <- 700 #give the value 700 to the sixth number in x1
x1
## [1] 1 2 3 4 5 700 7 8 9 10
x1[-c(1,3,5)] #remove the first, third, and fifth number by negative index
## [1] 2 4 700 7 8 9 10
Select for Matrix
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
tail(cars) #view the last six obs
## speed dist
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
## [1] 7
cars[3,] #get the third row data
## speed dist
## 3 7 4
cars[,2] #get the second column
## [1] 2 10 4 22 16 10 18 26 34 17 28 14 20 24 28 26 34 34 46 26 36 60 80
## [24] 20 26 54 32 40 32 40 50 42 56 76 84 36 46 68 32 48 52 56 64 66 54 70
## [47] 92 93 120 85
cars[cars$speed>9,] #select observations with speed > 9
## speed dist
## 7 10 18
## 8 10 26
## 9 10 34
## 10 11 17
## 11 11 28
## 12 12 14
## 13 12 20
## 14 12 24
## 15 12 28
## 16 13 26
## 17 13 34
## 18 13 34
## 19 13 46
## 20 14 26
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
mtcars$mpg[mtcars$gear==4] #get mpg for gear = 4
## [1] 21.0 21.0 22.8 24.4 22.8 19.2 17.8 32.4 30.4 33.9 27.3 21.4
split(mtcars$mpg, mtcars$gear) #split mpg for gear type
## $`3`
## [1] 21.4 18.7 18.1 14.3 16.4 17.3 15.2 10.4 10.4 14.7 21.5 15.5 15.2 13.3 19.2
##
## $`4`
## [1] 21.0 21.0 22.8 24.4 22.8 19.2 17.8 32.4 30.4 33.9 27.3 21.4
##
## $`5`
## [1] 26.0 30.4 15.8 19.7 15.0
Lappy and Sapply
sapply(mtcars, mean, na.rm = T) #get mean value for all variables
## mpg cyl disp hp drat wt qsec vs
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750 0.437500
## am gear carb
## 0.406250 3.687500 2.812500
## $mpg
## [1] 20.09062
##
## $cyl
## [1] 6.1875
##
## $disp
## [1] 230.7219
##
## $hp
## [1] 146.6875
##
## $drat
## [1] 3.596563
##
## $wt
## [1] 3.21725
##
## $qsec
## [1] 17.84875
##
## $vs
## [1] 0.4375
##
## $am
## [1] 0.40625
##
## $gear
## [1] 3.6875
##
## $carb
## [1] 2.8125
replicate(10, mean(rnorm(10))) #replicate a process
## [1] 0.3458089 -0.5549575 -0.1988860 0.4008482 -0.1877146 -0.4651017 -0.4552833 0.1834721
## [9] -0.1747121 -0.3867673
m <- matrix(rnorm(12), nrow = 4)
apply(m, 2, min) #get column min
## [1] -2.464909 -1.554084 -1.064148
tapply(mtcars$mpg, mtcars$gear, mean) # mean mpg value by gear
## 3 4 5
## 16.10667 24.53333 21.38000
Sort and Order
## [1] 10.4 10.4 13.3 14.3 14.7 15.0 15.2 15.2 15.5 15.8 16.4 17.3 17.8 18.1 18.7 19.2 19.2 19.7
## [19] 21.0 21.0 21.4 21.4 21.5 22.8 22.8 24.4 26.0 27.3 30.4 30.4 32.4 33.9
order(mtcars$mpg) #get rank index to pick 15th obs, 16th obs, 24th obs...
## [1] 15 16 24 7 17 31 14 23 22 29 12 13 11 6 5 10 25 30 1 2 4 32 21 3 9 8 27 26 19 28
## [31] 18 20
mtcars[order(mtcars$mpg),] #pick and rank the obs
## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1