16.3 Cutting
16.3.1 chop()
santoku
https://hughjonesd.github.io/santoku/tutorials/00-visualintroduction.html
# devtools::install_github("hughjonesd/santoku")
# library(tidyverse) (load tidyverse before santoku to avoid conflicts)
library(santoku)
cut()
in base R
x <- rnorm(100)
cut(x, 5) %>% table() # 5 equal intervals
#> .
#> (-2.88,-1.62] (-1.62,-0.368] (-0.368,0.884] (0.884,2.14] (2.14,3.4]
#> 5 25 52 17 1
cut(x, -3:3) %>% table()
#> .
#> (-3,-2] (-2,-1] (-1,0] (0,1] (1,2] (2,3]
#> 3 15 27 38 16 0
ntile()
in dplyr:
chop()
chopped <- chop(x, breaks = -5:5)
chopped %>% table()
#> .
#> [-3, -2) [-2, -1) [-1, 0) [0, 1) [1, 2) [3, 4)
#> 3 15 27 38 16 1
# chop() returns a factor
tibble(x, chopped)
#> # A tibble: 100 x 2
#> x chopped
#> <dbl> <fct>
#> 1 -0.422 [-1, 0)
#> 2 0.0569 [0, 1)
#> 3 0.711 [0, 1)
#> 4 -1.59 [-2, -1)
#> 5 0.597 [0, 1)
#> 6 1.22 [1, 2)
#> # ... with 94 more rows
If data is beyond the limits of breaks
, they will be extended automatically, unless extend = FALSE
, and values beyond the bounds will be converted to NA
:
chopped <- chop(x, breaks = -1:1, extend = FALSE)
tibble(x, chopped)
#> # A tibble: 100 x 2
#> x chopped
#> <dbl> <fct>
#> 1 -0.422 [-1, 0)
#> 2 0.0569 [0, 1]
#> 3 0.711 [0, 1]
#> 4 -1.59 <NA>
#> 5 0.597 [0, 1]
#> 6 1.22 <NA>
#> # ... with 94 more rows
To chop a single number into a separate category, put the number twice in breaks
:
x_zeros <- x
x_zeros[1:5] <- 0
chopped <- chop(x_zeros, c(-1, 0, 0, 1))
tibble(x, chopped)
#> # A tibble: 100 x 2
#> x chopped
#> <dbl> <fct>
#> 1 -0.422 {0}
#> 2 0.0569 {0}
#> 3 0.711 {0}
#> 4 -1.59 {0}
#> 5 0.597 {0}
#> 6 1.22 (1, 3.39]
#> # ... with 94 more rows
To quickly produce a table of chopped data, use tab()
: