15 Manipulare de date

c4 <- read.csv('curs4.csv')
c4
##    Var1 Var2
## 1     A    1
## 2     A    2
## 3     B    3
## 4     B    4
## 5     B    5
## 6     C    6
## 7     C    7
## 8     C    8
## 9     C    9
## 10    D   10
## 11    D   11
c4_grupat <- group_by(c4, Var1)
c4_grupat
## # A tibble: 11 × 2
## # Groups:   Var1 [4]
##    Var1   Var2
##    <chr> <int>
##  1 A         1
##  2 A         2
##  3 B         3
##  4 B         4
##  5 B         5
##  6 C         6
##  7 C         7
##  8 C         8
##  9 C         9
## 10 D        10
## 11 D        11
str(c4_grupat)
## gropd_df [11 × 2] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ Var1: chr [1:11] "A" "A" "B" "B" ...
##  $ Var2: int [1:11] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "groups")= tibble [4 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Var1 : chr [1:4] "A" "B" "C" "D"
##   ..$ .rows: list<int> [1:4] 
##   .. ..$ : int [1:2] 1 2
##   .. ..$ : int [1:3] 3 4 5
##   .. ..$ : int [1:4] 6 7 8 9
##   .. ..$ : int [1:2] 10 11
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
grupe_continent <- group_by(gapminder, continent)
str(grupe_continent)
## gropd_df [1,704 × 9] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ country      : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent    : Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year         : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp      : num [1:1704] 28.8 30.3 32 34 36.1 ...
##  $ pop          : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap    : num [1:1704] 779 821 853 836 740 ...
##  $ tara_mica    : logi [1:1704] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ tara_mica_f  : Factor w/ 2 levels "mare","mica": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tara_mica_f_o: Ord.factor w/ 2 levels "mica"<"mare": 2 2 2 2 2 2 2 2 2 2 ...
##  - attr(*, "groups")= tibble [5 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ continent: Factor w/ 5 levels "Africa","Americas",..: 1 2 3 4 5
##   ..$ .rows    : list<int> [1:5] 
##   .. ..$ : int [1:624] 25 26 27 28 29 30 31 32 33 34 ...
##   .. ..$ : int [1:300] 49 50 51 52 53 54 55 56 57 58 ...
##   .. ..$ : int [1:396] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ : int [1:360] 13 14 15 16 17 18 19 20 21 22 ...
##   .. ..$ : int [1:24] 61 62 63 64 65 66 67 68 69 70 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
c4_sumar <- summarise(c4_grupat,
                      MediaVar2 = mean(Var2),
                      MedianaVar2 = median(Var2),
                      n=n()
                      )
c4_sumar
## # A tibble: 4 × 4
##   Var1  MediaVar2 MedianaVar2     n
##   <chr>     <dbl>       <dbl> <int>
## 1 A           1.5         1.5     2
## 2 B           4           4       3
## 3 C           7.5         7.5     4
## 4 D          10.5        10.5     2
continente_sumar <- summarise(grupe_continent,
                              minLifeExp=min(lifeExp),
                              maxLifeExp=max(lifeExp),
                              MediaPop=mean(pop),
                              n=n()
                              )
continente_sumar
## # A tibble: 5 × 5
##   continent minLifeExp maxLifeExp  MediaPop     n
##   <fct>          <dbl>      <dbl>     <dbl> <int>
## 1 Africa          23.6       76.4  9916003.   624
## 2 Americas        37.6       80.7 24504795.   300
## 3 Asia            28.8       82.6 77038722.   396
## 4 Europe          43.6       81.8 17169765.   360
## 5 Oceania         69.1       81.2  8874672.    24
c4_ordonat <- arrange(c4_sumar, n)
c4_ordonat
## # A tibble: 4 × 4
##   Var1  MediaVar2 MedianaVar2     n
##   <chr>     <dbl>       <dbl> <int>
## 1 A           1.5         1.5     2
## 2 D          10.5        10.5     2
## 3 B           4           4       3
## 4 C           7.5         7.5     4
c4_ordonat <- arrange(c4_sumar, desc(n))
c4_ordonat
## # A tibble: 4 × 4
##   Var1  MediaVar2 MedianaVar2     n
##   <chr>     <dbl>       <dbl> <int>
## 1 C           7.5         7.5     4
## 2 B           4           4       3
## 3 A           1.5         1.5     2
## 4 D          10.5        10.5     2
continente_ordonate <- arrange(continente_sumar, desc(minLifeExp))
continente_ordonate
## # A tibble: 5 × 5
##   continent minLifeExp maxLifeExp  MediaPop     n
##   <fct>          <dbl>      <dbl>     <dbl> <int>
## 1 Oceania         69.1       81.2  8874672.    24
## 2 Europe          43.6       81.8 17169765.   360
## 3 Americas        37.6       80.7 24504795.   300
## 4 Asia            28.8       82.6 77038722.   396
## 5 Africa          23.6       76.4  9916003.   624
continente_ordonate <- gapminder %>%
  group_by(continent) %>%
  summarise(minLifeExp=min(lifeExp),
            maxLifeExp=max(lifeExp),
            MediaPop=mean(pop),
            n=n()
  ) %>%
  arrange(n)
continente_ordonate
## # A tibble: 5 × 5
##   continent minLifeExp maxLifeExp  MediaPop     n
##   <fct>          <dbl>      <dbl>     <dbl> <int>
## 1 Oceania         69.1       81.2  8874672.    24
## 2 Americas        37.6       80.7 24504795.   300
## 3 Europe          43.6       81.8 17169765.   360
## 4 Asia            28.8       82.6 77038722.   396
## 5 Africa          23.6       76.4  9916003.   624