Habitat-species associations

An example with data from Barro Colorado Island (census 7 released in 2012)

Kyle Harms, Sabrina Russo and Mauro Lepore (mantainer; leporem@si.edu)

2017-10-20

This vignette will show you how to determine habitat-species associations. It addresses this research question: Given a number of species and habitat types in a plot, is each species significantly aggregated within habitats?

We will use the packages dplyr, ggplot2 and try. The data comes from the bci package.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)

# Installation https://forestgeo.github.io/try/
library(try)
# Installation https://forestgeo.github.io/bci/ or download
# bci.full.Rdata31Aug2012 from https://repository.si.edu/handle/10088/20925
# and use bci.full7 instead of bci::bci12full7.
library(bci)

cns <- bci::bci12full7
# To use YOUR OWN DATA, you may run something like this:
# (Warning: large data sets may run slowly)

load("<PATH>/<CENSUS_DATA>.rdata")
census_data <- <CENSUS_DATA>

load("<PATH>/<HABITAT_DATA>.rdata")
habitat_data <- <HABITAT_DATA>

load("<PATH>/<ELEVATION_DATA>.rdata")
elevation_data <- <ELEVATION_DATA>$col

The Torus Translation Test is implemented via the function tttest(). tttest() ensures that both habitat data and species’ abundance data share the same spatial scale. The match is enforced by extracting plot dimensions from the habitat data. tttest() takes a vector with the names of the species you want to test.

For example, you may want to test all species which number of alive stems is at or over some treshold:

filtered_alive_sp <- filter_alive_sp(censdata = cns, n = 7000)

multi_sp_result <- tttest(
  species = filtered_alive_sp, 
  censdata = cns, 
  habitats = bci_habitat
)
# Traspose to display nicer
t(multi_sp_result)
#>                  alsebl   des2pa   faraoc   hybapr   mourmy   tri2tu
#> N.Hab.1        5.03e+02 4.87e+02 1.23e+03 1.22e+03  282.000 4.08e+02
#> Gr.Hab.1       8.58e+02 4.80e+01 3.81e+02 1.71e+02  202.000 2.18e+02
#> Ls.Hab.1       3.91e+02 1.20e+03 8.68e+02 1.08e+03 1047.000 1.03e+03
#> Eq.Hab.1       1.00e+00 1.00e+00 1.00e+00 1.00e+00    1.000 1.00e+00
#> Rep.Agg.Neut.1 0.00e+00 0.00e+00 0.00e+00 0.00e+00    0.000 0.00e+00
#> Obs.Quantile.1 6.86e-01 3.84e-02 3.05e-01 1.37e-01    0.162 1.74e-01
#> N.Hab.2        1.33e+03 1.98e+03 3.85e+03 5.25e+03 1132.000 2.30e+03
#> Gr.Hab.2       5.80e+01 2.91e+02 6.40e+01 4.95e+02  290.000 8.51e+02
#> Ls.Hab.2       1.19e+03 9.58e+02 1.18e+03 7.54e+02  959.000 3.98e+02
#> Eq.Hab.2       1.00e+00 1.00e+00 1.00e+00 1.00e+00    1.000 1.00e+00
#> Rep.Agg.Neut.2 0.00e+00 0.00e+00 0.00e+00 0.00e+00    0.000 0.00e+00
#> Obs.Quantile.2 4.64e-02 2.33e-01 5.12e-02 3.96e-01    0.232 6.81e-01
#> N.Hab.3        3.66e+03 5.30e+03 1.27e+04 1.45e+04 3790.000 3.88e+03
#> Gr.Hab.3       1.09e+02 4.57e+02 8.72e+02 1.16e+03 1121.000 3.20e+01
#> Ls.Hab.3       1.14e+03 7.92e+02 3.77e+02 8.70e+01  128.000 1.22e+03
#> Eq.Hab.3       1.00e+00 1.00e+00 1.00e+00 1.00e+00    1.000 1.00e+00
#> Rep.Agg.Neut.3 0.00e+00 0.00e+00 0.00e+00 0.00e+00    0.000 0.00e+00
#> Obs.Quantile.3 8.72e-02 3.66e-01 6.98e-01 9.30e-01    0.897 2.56e-02
#> N.Hab.4        2.43e+03 3.26e+03 7.23e+03 6.93e+03 1600.000 4.25e+03
#> Gr.Hab.4       1.25e+03 1.11e+03 9.95e+02 3.44e+02  462.000 1.22e+03
#> Ls.Hab.4       3.00e+00 1.37e+02 2.54e+02 9.05e+02  787.000 2.40e+01
#> Eq.Hab.4       1.00e+00 1.00e+00 1.00e+00 1.00e+00    1.000 1.00e+00
#> Rep.Agg.Neut.4 1.00e+00 0.00e+00 0.00e+00 0.00e+00    0.000 1.00e+00
#> Obs.Quantile.4 9.97e-01 8.90e-01 7.96e-01 2.75e-01    0.370 9.80e-01

Or use the name of one species to test that species only:

one_sp_result <- tttest(
  species = "hybapr",
  censdata = cns,
  habitats = bci_habitat
)

# traspose to see easier
t(one_sp_result)
#>                  hybapr
#> N.Hab.1        1.22e+03
#> Gr.Hab.1       1.71e+02
#> Ls.Hab.1       1.08e+03
#> Eq.Hab.1       1.00e+00
#> Rep.Agg.Neut.1 0.00e+00
#> Obs.Quantile.1 1.37e-01
#> N.Hab.2        5.25e+03
#> Gr.Hab.2       4.95e+02
#> Ls.Hab.2       7.54e+02
#> Eq.Hab.2       1.00e+00
#> Rep.Agg.Neut.2 0.00e+00
#> Obs.Quantile.2 3.96e-01
#> N.Hab.3        1.45e+04
#> Gr.Hab.3       1.16e+03
#> Ls.Hab.3       8.70e+01
#> Eq.Hab.3       1.00e+00
#> Rep.Agg.Neut.3 0.00e+00
#> Obs.Quantile.3 9.30e-01
#> N.Hab.4        6.93e+03
#> Gr.Hab.4       3.44e+02
#> Ls.Hab.4       9.05e+02
#> Eq.Hab.4       1.00e+00
#> Rep.Agg.Neut.4 0.00e+00
#> Obs.Quantile.4 2.75e-01

Your habitat data should have column names x and y; or you will get an error:

# Replace name x by xx to show error
no_x <- rename(bci_habitat, xx = x)
names(no_x)
#> [1] "xx"       "y"        "elev"     "habitats"

tttest(
  species = "hybapr",
  censdata = cns,
  habitats = no_x
)
#> Error in assert_are_names_matching(habitats, c("x", "y")): is_matching_regex : match does not match collapsed_names
#> There was 1 failure:
#>   Position Value                                       Cause
#> 1        1     x does not match '^xx$|^y$|^elev$|^habitats$'

But either name habitat or habitats is ok:

# Previus runs passed with variable `habitats`
names(bci_habitat)
#> [1] "x"        "y"        "elev"     "habitats"

# Now we use variable `habitats`
habitat_not_habitats <- rename(bci_habitat, habitat = habitats)
names(habitat_not_habitats)
#> [1] "x"       "y"       "elev"    "habitat"

tttest(
  species = "hybapr",
  censdata = cns,
  habitats = habitat_not_habitats
)
#>        N.Hab.1 Gr.Hab.1 Ls.Hab.1 Eq.Hab.1 Rep.Agg.Neut.1 Obs.Quantile.1
#> hybapr    1216      171     1078        1              0          0.137
#>        N.Hab.2 Gr.Hab.2 Ls.Hab.2 Eq.Hab.2 Rep.Agg.Neut.2 Obs.Quantile.2
#> hybapr    5246      495      754        1              0          0.396
#>        N.Hab.3 Gr.Hab.3 Ls.Hab.3 Eq.Hab.3 Rep.Agg.Neut.3 Obs.Quantile.3
#> hybapr   14454     1162       87        1              0           0.93
#>        N.Hab.4 Gr.Hab.4 Ls.Hab.4 Eq.Hab.4 Rep.Agg.Neut.4 Obs.Quantile.4
#> hybapr    6930      344      905        1              0          0.275

Instead of all stems, you may want to analyze a specific size class. For that, you need to filter the data by size before you pass the filtered data to tttest().

filtered_size <- filter(cns, between(dbh, 10, 50))

filtered_size_result <- tttest(
  species = "hybapr",
  censdata = filtered_size,
  habitats = bci_habitat
)
t(filtered_size_result)
#>                  hybapr
#> N.Hab.1        1.20e+03
#> Gr.Hab.1       1.71e+02
#> Ls.Hab.1       1.08e+03
#> Eq.Hab.1       1.00e+00
#> Rep.Agg.Neut.1 0.00e+00
#> Obs.Quantile.1 1.37e-01
#> N.Hab.2        5.19e+03
#> Gr.Hab.2       4.64e+02
#> Ls.Hab.2       7.85e+02
#> Eq.Hab.2       1.00e+00
#> Rep.Agg.Neut.2 0.00e+00
#> Obs.Quantile.2 3.71e-01
#> N.Hab.3        1.42e+04
#> Gr.Hab.3       1.11e+03
#> Ls.Hab.3       1.35e+02
#> Eq.Hab.3       1.00e+00
#> Rep.Agg.Neut.3 0.00e+00
#> Obs.Quantile.3 8.91e-01
#> N.Hab.4        6.86e+03
#> Gr.Hab.4       4.68e+02
#> Ls.Hab.4       7.81e+02
#> Eq.Hab.4       1.00e+00
#> Rep.Agg.Neut.4 0.00e+00
#> Obs.Quantile.4 3.74e-01

Finally, to may want to see the distribution of one species on the plot by habitat type. Next, we filter and map the species we want, and we overlay habitat and elevation data.

# NOTE: For some reason, habitat and species data don't match perfectly but 
# have an offset of half gridsize. That offset is corrected next, but if 
# you know why this offset exists in the first place, please let me konw
# (maurolepore@gmail.com).

# Correct offset between species data and habitat data
bci_habitat <- mutate(
  bci_habitat,
  x = x + extract_gridsize(bci_habitat) / 2,
  y = y + extract_gridsize(bci_habitat) / 2
)
filtered_census <- filter(cns, sp == "hybapr")

ggplot(filtered_census, aes(x = gx, y = gy)) +
  # Small points (".") reduce overplotting
  geom_point(shape = ".") +
  # Overlay semi-transparent habitat types
  geom_raster(
    data = bci_habitat, aes(x, y, fill = as.factor(habitats)), alpha = 1/2
  ) + 
  # Foreground also indicates elevation
  geom_contour(data = bci_elevation, aes(x, y, z = elev)) +
  # Force ratio between x and y units to be 1:1
  coord_fixed() +
  labs(fill = "Habitat")
Spatial distribution of 'hybapr' during census 7 from BCI (data released in 2012).

Spatial distribution of ‘hybapr’ during census 7 from BCI (data released in 2012).