Chapter 1 基础知识

1.1 初始步骤

plot( x = cars$speed, y = cars$dist,pch = 2) 
#pch指定绘图符号

lines(cars$speed,cars$dist)

plot(cars$speed,cars$dist,pch = 2)

lines(cars$speed,2*(cars$speed)+2.1) #linear model

1.2 R语言基础

filter

cars$speed > 20
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [46]  TRUE  TRUE  TRUE  TRUE  TRUE
head(cars[cars$speed > 15,]) #filter cars with speed > 15 
##    speed dist
## 27    16   32
## 28    16   40
## 29    17   32
## 30    17   40
## 31    17   50
## 32    18   42

Show variable names

names(cars) #show names of variables
## [1] "speed" "dist"
colnames(cars) #can be used for matrix
## [1] "speed" "dist"

Create Sequence

seq(from = 7, to = 18, by = 2) #create sequence
## [1]  7  9 11 13 15 17
rep(c(3,5,6),times = 3) #replicate full sequence three times
## [1] 3 5 6 3 5 6 3 5 6
rep(c(3,5,6),1:3) #rep 3 once, 5 twice, 6 three times
## [1] 3 5 5 6 6 6
rep(c(3,5,6),c(1,4,5))
##  [1] 3 5 5 5 5 6 6 6 6 6
rep(c(3,5,6), c(2,2,2)) #equivalent to each = 2
## [1] 3 3 5 5 6 6
rep(c(3,5,6), each = 2) 
## [1] 3 3 5 5 6 6

Create Matrix

x <- 1:12
dim(x) <- c(3,4) #change dimension by column rank
x
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
matrix(1:12, nrow = 3, byrow = T) #create a matrix by row rank
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
y <- matrix(1:12, nrow = 3, byrow = T)
rownames(y) <- letters[1:3] #give row names

t(y) #transpose matrix
##      a b  c
## [1,] 1 5  9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
cbind(a = 1:4,b = 5:8, c = 9:12) #bind multiple vectors by column
##      a b  c
## [1,] 1 5  9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
rbind(a = 1:4, b = 5:8, c = 9:12) #bind vectors by row
##   [,1] [,2] [,3] [,4]
## a    1    2    3    4
## b    5    6    7    8
## c    9   10   11   12

Factors

pain <- c(0,3,2,2,1)
fpain <- factor(pain, levels = 0:3) #factorize painfulness
levels(fpain) <- c("none","mild", "medium", "severe") #name painful levels
fpain
## [1] none   severe medium medium mild  
## Levels: none mild medium severe
as.numeric(fpain) #extract painful levels, always starting from 1
## [1] 1 4 3 3 2

List

x1 = 1:10
x2 = 5:14
mylist = list(first = x1, second = x2) #combine two vectors to a list
mylist
## $first
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $second
##  [1]  5  6  7  8  9 10 11 12 13 14
mylist$first #extract values by list name
##  [1]  1  2  3  4  5  6  7  8  9 10

Dataframe

df = data.frame(x1,x2) #create a dataframe
head(df)
##   x1 x2
## 1  1  5
## 2  2  6
## 3  3  7
## 4  4  8
## 5  5  9
## 6  6 10
df$x1 #extract values by column name
##  [1]  1  2  3  4  5  6  7  8  9 10

Index and Select

x1[5] #select the fifth number in x1
## [1] 5
x1[c(3,5,7)] #select the third, fifth, and seventh number in x1
## [1] 3 5 7
x1[x1 > 5] #select the number greater than 5
## [1]  6  7  8  9 10
x1[6] <- 700 #give the value 700 to the sixth number in x1
x1
##  [1]   1   2   3   4   5 700   7   8   9  10
x1[-c(1,3,5)] #remove the first, third, and fifth number by negative index
## [1]   2   4 700   7   8   9  10

Select for Matrix

head(cars)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
tail(cars) #view the last six obs
##    speed dist
## 45    23   54
## 46    24   70
## 47    24   92
## 48    24   93
## 49    24  120
## 50    25   85
cars[3,1]
## [1] 7
cars[3,] #get the third row data
##   speed dist
## 3     7    4
cars[,2] #get the second column
##  [1]   2  10   4  22  16  10  18  26  34  17  28  14  20  24  28  26  34  34  46  26  36  60  80
## [24]  20  26  54  32  40  32  40  50  42  56  76  84  36  46  68  32  48  52  56  64  66  54  70
## [47]  92  93 120  85
cars[cars$speed>9,] #select observations with speed > 9
##    speed dist
## 7     10   18
## 8     10   26
## 9     10   34
## 10    11   17
## 11    11   28
## 12    12   14
## 13    12   20
## 14    12   24
## 15    12   28
## 16    13   26
## 17    13   34
## 18    13   34
## 19    13   46
## 20    14   26
## 21    14   36
## 22    14   60
## 23    14   80
## 24    15   20
## 25    15   26
## 26    15   54
## 27    16   32
## 28    16   40
## 29    17   32
## 30    17   40
## 31    17   50
## 32    18   42
## 33    18   56
## 34    18   76
## 35    18   84
## 36    19   36
## 37    19   46
## 38    19   68
## 39    20   32
## 40    20   48
## 41    20   52
## 42    20   56
## 43    20   64
## 44    22   66
## 45    23   54
## 46    24   70
## 47    24   92
## 48    24   93
## 49    24  120
## 50    25   85
mtcars$mpg[mtcars$gear==4] #get mpg for gear = 4
##  [1] 21.0 21.0 22.8 24.4 22.8 19.2 17.8 32.4 30.4 33.9 27.3 21.4
split(mtcars$mpg, mtcars$gear) #split mpg for gear type
## $`3`
##  [1] 21.4 18.7 18.1 14.3 16.4 17.3 15.2 10.4 10.4 14.7 21.5 15.5 15.2 13.3 19.2
## 
## $`4`
##  [1] 21.0 21.0 22.8 24.4 22.8 19.2 17.8 32.4 30.4 33.9 27.3 21.4
## 
## $`5`
## [1] 26.0 30.4 15.8 19.7 15.0

Lappy and Sapply

sapply(mtcars, mean, na.rm = T) #get mean value for all variables
##        mpg        cyl       disp         hp       drat         wt       qsec         vs 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750   0.437500 
##         am       gear       carb 
##   0.406250   3.687500   2.812500
lapply(mtcars, mean)
## $mpg
## [1] 20.09062
## 
## $cyl
## [1] 6.1875
## 
## $disp
## [1] 230.7219
## 
## $hp
## [1] 146.6875
## 
## $drat
## [1] 3.596563
## 
## $wt
## [1] 3.21725
## 
## $qsec
## [1] 17.84875
## 
## $vs
## [1] 0.4375
## 
## $am
## [1] 0.40625
## 
## $gear
## [1] 3.6875
## 
## $carb
## [1] 2.8125
replicate(10, mean(rnorm(10))) #replicate a process
##  [1]  0.3458089 -0.5549575 -0.1988860  0.4008482 -0.1877146 -0.4651017 -0.4552833  0.1834721
##  [9] -0.1747121 -0.3867673
m <- matrix(rnorm(12), nrow = 4)
apply(m, 2, min) #get column min
## [1] -2.464909 -1.554084 -1.064148
tapply(mtcars$mpg, mtcars$gear, mean) # mean mpg value by gear
##        3        4        5 
## 16.10667 24.53333 21.38000

1.2.1 Sort and Order

sort(mtcars$mpg)
##  [1] 10.4 10.4 13.3 14.3 14.7 15.0 15.2 15.2 15.5 15.8 16.4 17.3 17.8 18.1 18.7 19.2 19.2 19.7
## [19] 21.0 21.0 21.4 21.4 21.5 22.8 22.8 24.4 26.0 27.3 30.4 30.4 32.4 33.9
order(mtcars$mpg) #get rank index to pick 15th obs, 16th obs, 24th obs...
##  [1] 15 16 24  7 17 31 14 23 22 29 12 13 11  6  5 10 25 30  1  2  4 32 21  3  9  8 27 26 19 28
## [31] 18 20
mtcars[order(mtcars$mpg),] #pick and rank the obs
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1