1 Import date, filtrare, statistici descriptive simple (exemple)

Cu tabelul stateData.csv:

  1. Importăm tabelul în R
state <- read.csv('stateData.csv')
  1. Afișăm dimensiunea tabelului:
dim(state)
## [1] 50 12
  1. Afișăm structura tabelului:
str(state)
## 'data.frame':    50 obs. of  12 variables:
##  $ X             : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ state.abb     : chr  "AL" "AK" "AZ" "AR" ...
##  $ state.area    : int  51609 589757 113909 53104 158693 104247 5009 2057 58560 58876 ...
##  $ state.region  : int  2 4 4 2 4 4 1 2 2 2 ...
##  $ population    : int  3615 365 2212 2110 21198 2541 3100 579 8277 4931 ...
##  $ income        : int  3624 6315 4530 3378 5114 4884 5348 4809 4815 4091 ...
##  $ illiteracy    : num  2.1 1.5 1.8 1.9 1.1 0.7 1.1 0.9 1.3 2 ...
##  $ life.exp      : num  69 69.3 70.5 70.7 71.7 ...
##  $ murder        : num  15.1 11.3 7.8 10.1 10.3 6.8 3.1 6.2 10.7 13.9 ...
##  $ highSchoolGrad: num  41.3 66.7 58.1 39.9 62.6 63.9 56 54.6 52.6 40.6 ...
##  $ frost         : int  20 152 15 65 20 166 139 103 11 60 ...
##  $ area          : int  50708 566432 113417 51945 156361 103766 4862 1982 54090 58073 ...
  1. Afișăm primele 6 observații
head(state, 6)
##            X state.abb state.area state.region population income illiteracy
## 1    Alabama        AL      51609            2       3615   3624        2.1
## 2     Alaska        AK     589757            4        365   6315        1.5
## 3    Arizona        AZ     113909            4       2212   4530        1.8
## 4   Arkansas        AR      53104            2       2110   3378        1.9
## 5 California        CA     158693            4      21198   5114        1.1
## 6   Colorado        CO     104247            4       2541   4884        0.7
##   life.exp murder highSchoolGrad frost   area
## 1    69.05   15.1           41.3    20  50708
## 2    69.31   11.3           66.7   152 566432
## 3    70.55    7.8           58.1    15 113417
## 4    70.66   10.1           39.9    65  51945
## 5    71.71   10.3           62.6    20 156361
## 6    72.06    6.8           63.9   166 103766
  1. Afișăm ultimele 4 observații
tail(state, 4)
##                X state.abb state.area state.region population income illiteracy
## 47    Washington        WA      68192            4       3559   4864        0.6
## 48 West Virginia        WV      24181            2       1799   3617        1.4
## 49     Wisconsin        WI      56154            3       4589   4468        0.7
## 50       Wyoming        WY      97914            4        376   4566        0.6
##    life.exp murder highSchoolGrad frost  area
## 47    71.72    4.3           63.5    32 66570
## 48    69.48    6.7           41.6   100 24070
## 49    72.48    3.0           54.5   149 54464
## 50    70.29    6.9           62.9   173 97203
  1. Calculăm media variabilei population.
mean(state$population)
## [1] 4246.42
  1. Calculăm mediana variabilei population.
median(state$population)
## [1] 2838.5
  1. Calculăm valoarea maximă a variabilei life.exp
max(state$life.exp)
## [1] 73.6
  1. Calculăm valoarea minimă a variabilei state.area
min(state$state.area)
## [1] 1214
  1. Selectăm statele cu populație peste 2000 și illiteracy sub 1.
subset(state, population>2000 & illiteracy<1)
##             X state.abb state.area state.region population income illiteracy
## 6    Colorado        CO     104247            4       2541   4884        0.7
## 13   Illinois        IL      56400            3      11197   5107        0.9
## 14    Indiana        IN      36291            3       5313   4458        0.7
## 15       Iowa        IA      56290            3       2861   4628        0.5
## 16     Kansas        KS      82264            3       2280   4669        0.6
## 20   Maryland        MD      10577            2       4122   5299        0.9
## 22   Michigan        MI      58216            3       9111   4751        0.9
## 23  Minnesota        MN      84068            3       3921   4675        0.6
## 25   Missouri        MO      69686            3       4767   4254        0.8
## 35       Ohio        OH      41222            3      10735   4561        0.8
## 37     Oregon        OR      96981            4       2284   4660        0.6
## 47 Washington        WA      68192            4       3559   4864        0.6
## 49  Wisconsin        WI      56154            3       4589   4468        0.7
##    life.exp murder highSchoolGrad frost   area
## 6     72.06    6.8           63.9   166 103766
## 13    70.14   10.3           52.6   127  55748
## 14    70.88    7.1           52.9   122  36097
## 15    72.56    2.3           59.0   140  55941
## 16    72.58    4.5           59.9   114  81787
## 20    70.22    8.5           52.3   101   9891
## 22    70.63   11.1           52.8   125  56817
## 23    72.96    2.3           57.6   160  79289
## 25    70.69    9.3           48.8   108  68995
## 35    70.82    7.4           53.2   124  40975
## 37    72.13    4.2           60.0    44  96184
## 47    71.72    4.3           63.5    32  66570
## 49    72.48    3.0           54.5   149  54464
  1. Cate sunt?
pt11 <- subset(state, population>2000 & illiteracy<1)
dim(pt11)
## [1] 13 12

sau pentru a vedea doar numărul de linii:

dim(pt11)[1]
## [1] 13

sau direct dimensiunea:

dim(subset(state, population>2000 & illiteracy<1))
## [1] 13 12
  1. Selectăm statele cu populație sub 2000 și income sub 4000
subset(state, population<2000 & income<4000)
##                X state.abb state.area state.region population income illiteracy
## 19         Maine        ME      33215            1       1058   3694        0.7
## 31    New Mexico        NM     121666            4       1144   3601        2.2
## 45       Vermont        VT       9609            1        472   3907        0.6
## 48 West Virginia        WV      24181            2       1799   3617        1.4
##    life.exp murder highSchoolGrad frost   area
## 19    70.39    2.7           54.7   161  30920
## 31    70.32    9.7           55.2   120 121412
## 45    71.64    5.5           57.1   168   9267
## 48    69.48    6.7           41.6   100  24070

sau

state[state$population<2000 & state$income<4000,]
##                X state.abb state.area state.region population income illiteracy
## 19         Maine        ME      33215            1       1058   3694        0.7
## 31    New Mexico        NM     121666            4       1144   3601        2.2
## 45       Vermont        VT       9609            1        472   3907        0.6
## 48 West Virginia        WV      24181            2       1799   3617        1.4
##    life.exp murder highSchoolGrad frost   area
## 19    70.39    2.7           54.7   161  30920
## 31    70.32    9.7           55.2   120 121412
## 45    71.64    5.5           57.1   168   9267
## 48    69.48    6.7           41.6   100  24070

sau doar prima coloană:

state[state$population<2000 & state$income<4000,1]
## [1] "Maine"         "New Mexico"    "Vermont"       "West Virginia"