## 16.3 Cutting

### 16.3.1chop()

santoku

https://hughjonesd.github.io/santoku/tutorials/00-visualintroduction.html

# devtools::install_github("hughjonesd/santoku")
# library(tidyverse) (load tidyverse before santoku to avoid conflicts)
library(santoku)

cut() in base R

x <- rnorm(100)
cut(x, 5) %>% table()  # 5 equal intervals
#> .
#>  (-2.88,-1.62] (-1.62,-0.368] (-0.368,0.884]   (0.884,2.14]     (2.14,3.4]
#>              5             25             52             17              1
cut(x, -3:3) %>% table()
#> .
#> (-3,-2] (-2,-1]  (-1,0]   (0,1]   (1,2]   (2,3]
#>       3      15      27      38      16       0

ntile() in dplyr:

ntile(x, 5) %>% table()
#> .
#>  1  2  3  4  5
#> 20 20 20 20 20

chop()

chopped <- chop(x, breaks = -5:5)

chopped %>% table()
#> .
#> [-3, -2) [-2, -1)  [-1, 0)   [0, 1)   [1, 2)   [3, 4)
#>        3       15       27       38       16        1

# chop() returns a factor
tibble(x, chopped)
#> # A tibble: 100 x 2
#>         x chopped
#>     <dbl> <fct>
#> 1 -0.422  [-1, 0)
#> 2  0.0569 [0, 1)
#> 3  0.711  [0, 1)
#> 4 -1.59   [-2, -1)
#> 5  0.597  [0, 1)
#> 6  1.22   [1, 2)
#> # ... with 94 more rows

If data is beyond the limits of breaks, they will be extended automatically, unless extend = FALSE, and values beyond the bounds will be converted to NA:

chopped <- chop(x, breaks = -1:1, extend = FALSE)
tibble(x, chopped)
#> # A tibble: 100 x 2
#>         x chopped
#>     <dbl> <fct>
#> 1 -0.422  [-1, 0)
#> 2  0.0569 [0, 1]
#> 3  0.711  [0, 1]
#> 4 -1.59   <NA>
#> 5  0.597  [0, 1]
#> 6  1.22   <NA>
#> # ... with 94 more rows

To chop a single number into a separate category, put the number twice in breaks:

x_zeros <- x
x_zeros[1:5] <- 0

chopped <- chop(x_zeros, c(-1, 0, 0, 1))
tibble(x, chopped)
#> # A tibble: 100 x 2
#>         x chopped
#>     <dbl> <fct>
#> 1 -0.422  {0}
#> 2  0.0569 {0}
#> 3  0.711  {0}
#> 4 -1.59   {0}
#> 5  0.597  {0}
#> 6  1.22   (1, 3.39]
#> # ... with 94 more rows

To quickly produce a table of chopped data, use tab():

tab(x, breaks = -3:3)
#> x
#>  [-3, -2)  [-2, -1)   [-1, 0)    [0, 1)    [1, 2) (3, 3.39]
#>         3        15        27        38        16         1