library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter(): dplyr, stats
#> lag():    dplyr, stats
library(mvpart)
# source("C:/Users/dora/Downloads/mvpart_1.1-1/mvpart/R/mvpart.R")

Installation

The mvpart package is no longer active on CRAN but can be installed from the archives.

# install.packages("devtools")
devtools::install_github("cran/mvpart")

Or download a realease from https://github.com/cran/mvpart/releases and install it with something like:

install.packages(
  "C:/Users/dora/Desktop/mvpart-1.6-2.tar.gz", 
  repos = NULL, type = "source"
)

Some people reported installation issues (https://goo.gl/oDjjz8).

Data

Overview data

2 files attached:

  • KC3spp20 – is the species matrix of abundance in each of 600 quadrats
# See a few columns from the beginning, middle and end
KC3spp20 %>% dplyr::select(1:2, 250:252, 583:585)
#> # A tibble: 600 x 8
#>    ACMEAC ACTEJA GARCPA GARCRO GARCS1 ZIZYAN ZIZYCA ZIZYXX
#>     <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
#>  1      0      2      0      0      0      0      0      0
#>  2      0      2      0      0      0      0      0      0
#>  3      0      0      0      0      0      0      0      0
#>  4      0      0      0      0      0      0      0      0
#>  5      0      0      0      0      1      0      0      0
#>  6      0      0      0      0      0      0      0      0
#>  7      0      0      0      0      0      0      0      0
#>  8      0      3      0      0      0      0      0      0
#>  9      0      0      0      0      0      0      0      0
#> 10      0      0      0      0      0      0      0      0
#> # ... with 590 more rows

# For a cleaner output for interpretation, normalized tree spp data to a mean of
# 0 and standard deviation of 1 (https://goo.gl/zDLdMi).
  • kc.hab – a bunch of habitat variables for the same 600 quadrats
kc.hab %>% dplyr::glimpse()
#> Observations: 600
#> Variables: 17
#> $ RB_NO3   <dbl> 38.11836, 37.64459, 37.84317, 35.11749, 31.03317, 29....
#> $ RB_NH4   <dbl> 14.024729, 12.900971, 11.899093, 11.075753, 10.416937...
#> $ RB_PO4   <dbl> 0.2903664, 0.2953065, 0.3118399, 0.3349684, 0.3549544...
#> $ Al       <dbl> 0.2653886, 0.2901095, 0.3443849, 0.4109257, 0.4416475...
#> $ pH_water <dbl> 5.706885, 5.682961, 5.605031, 5.584619, 5.643615, 5.6...
#> $ Na       <dbl> 4.24959e-06, 3.92321e-06, 3.17810e-06, 1.05188e-06, 2...
#> $ Mn       <dbl> 0.02355666, 0.02396195, 0.02802107, 0.02861206, 0.023...
#> $ Mg       <dbl> 0.4802953, 0.4917929, 0.5061534, 0.5033838, 0.4756754...
#> $ K        <dbl> 0.1665143, 0.1708170, 0.1873153, 0.1976758, 0.1931228...
#> $ Fe       <dbl> 9.70257e-06, 1.16542e-05, 1.41739e-05, 1.74376e-05, 2...
#> $ Ca       <dbl> 1.0141365, 1.0520401, 1.0804347, 0.9086867, 0.6853189...
#> $ BS       <dbl> 0.8506632, 0.8397293, 0.8163130, 0.7863761, 0.7639926...
#> $ ECEC     <dbl> 2.048155, 2.053425, 2.160907, 2.129814, 1.944862, 1.7...
#> $ Bray_P   <dbl> 4.888940, 4.889003, 4.965516, 4.949387, 4.768834, 4.6...
#> $ meanelev <dbl> 152.6194, 149.7764, 147.2560, 145.9847, 142.9219, 137...
#> $ convex   <dbl> 0.3956250, -0.5323750, -1.9500000, -0.1047500, 0.6016...
#> $ slope    <dbl> 9.402307, 11.837283, 11.064807, 11.843923, 16.917556,...

Results

In this section, we first explore one result in detail; then we’ll re-run the exact same model twice more and we’ll compare the results.

Build and run the model

abundance <- data.matrix(KC3spp20)
environmental_variables <- kc.hab

formula <- abundance ~ RB_NO3 + RB_NH4 + RB_PO4 + Al + pH_water + Na + Mn + 
  Mg + K + Fe + Ca + BS + ECEC + Bray_P + meanelev + convex + slope

# Set a new seed for random numbers to ensure results are reproducible
set.seed(1221)

# See `?mvpart()` for argument details
mvpart_run1 <- mvpart(
  form = formula, 
  data = environmental_variables,
  all.leaves = TRUE,  # annotate all nodes
  rsq = TRUE,  # give "rsq" plot
  pca = TRUE,  # plot PCA of group means and add species and site information
  wgt.ave.pca = TRUE  # plot weighted averages acorss sites for species
)
#> rpart(formula = form, data = data)
#> 
#> Variables actually used in tree construction:
#> [1] Bray_P   BS       Fe       meanelev pH_water RB_PO4  
#> 
#> Root node error: 483776/600 = 806.29
#> 
#> n= 600 
#> 
#>         CP nsplit rel error  xerror     xstd
#> 1 0.083600      0   1.00000 1.00355 0.040850
#> 2 0.025010      2   0.83280 0.85445 0.035633
#> 3 0.021388      3   0.80779 0.85882 0.035499
#> 4 0.021148      4   0.78640 0.84342 0.035072
#> 5 0.021020      5   0.76525 0.84155 0.035058
#> 6 0.020172      6   0.74423 0.82880 0.034711
#> 7 0.014623      7   0.72406 0.80000 0.033189
#> May not be applicable for this method

Model details

Structure

str(mvpart_run1)
#> List of 13
#>  $ frame    :'data.frame':   15 obs. of  9 variables:
#>   ..$ var       : Factor w/ 18 levels "<leaf>","RB_NO3",..: 4 16 13 1 1 6 1 1 11 16 ...
#>   ..$ n         : int [1:15] 600 119 47 18 29 72 69 3 481 154 ...
#>   ..$ wt        : num [1:15] 600 119 47 18 29 72 69 3 481 154 ...
#>   ..$ dev       : num [1:15] 483776 128994 43170 12243 20580 ...
#>   ..$ yval      : num [1:15] 0.276 0.254 0.226 0.204 0.241 ...
#>   ..$ complexity: num [1:15] 0.0836 0.02501 0.02139 0.0063 0.00854 ...
#>   ..$ ncompete  : num [1:15] 4 4 4 0 0 4 0 0 4 4 ...
#>   ..$ nsurrogate: num [1:15] 0 0 0 0 0 0 0 0 0 0 ...
#>   ..$ yval2     : num [1:15, 1:585] 0.0567 0 0 0 0 ...
#>  $ where    : int [1:600] 14 14 11 11 11 11 11 11 11 11 ...
#>  $ call     : language mvpart(form = formula, data = environmental_variables, all.leaves = TRUE,      rsq = TRUE, pca = TRUE, wgt.ave.pca = TRUE)
#>  $ terms    :Classes 'terms', 'formula'  language abundance ~ RB_NO3 + RB_NH4 + RB_PO4 + Al + pH_water + Na + Mn + Mg +      K + Fe + Ca + BS + ECEC + Bray_P + meanelev + convex + slope
#>   .. ..- attr(*, "variables")= language list(abundance, RB_NO3, RB_NH4, RB_PO4, Al, pH_water, Na, Mn, Mg,      K, Fe, Ca, BS, ECEC, Bray_P, meanelev, convex, slope)
#>   .. ..- attr(*, "factors")= int [1:18, 1:17] 0 1 0 0 0 0 0 0 0 0 ...
#>   .. .. ..- attr(*, "dimnames")=List of 2
#>   .. .. .. ..$ : chr [1:18] "abundance" "RB_NO3" "RB_NH4" "RB_PO4" ...
#>   .. .. .. ..$ : chr [1:17] "RB_NO3" "RB_NH4" "RB_PO4" "Al" ...
#>   .. ..- attr(*, "term.labels")= chr [1:17] "RB_NO3" "RB_NH4" "RB_PO4" "Al" ...
#>   .. ..- attr(*, "order")= int [1:17] 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..- attr(*, "intercept")= int 1
#>   .. ..- attr(*, "response")= int 1
#>   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
#>   .. ..- attr(*, "predvars")= language list(abundance, RB_NO3, RB_NH4, RB_PO4, Al, pH_water, Na, Mn, Mg,      K, Fe, Ca, BS, ECEC, Bray_P, meanelev, convex, slope)
#>   .. ..- attr(*, "dataClasses")= Named chr [1:18] "nmatrix.585" "numeric" "numeric" "numeric" ...
#>   .. .. ..- attr(*, "names")= chr [1:18] "abundance" "RB_NO3" "RB_NH4" "RB_PO4" ...
#>  $ cptable  : num [1:7, 1:5] 0.0836 0.025 0.0214 0.0211 0.021 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:7] "1" "2" "3" "4" ...
#>   .. ..$ : chr [1:5] "CP" "nsplit" "rel error" "xerror" ...
#>  $ splits   : num [1:35, 1:5] 600 600 600 600 600 119 119 119 119 119 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:35] "RB_PO4" "RB_NO3" "Fe" "Bray_P" ...
#>   .. ..$ : chr [1:5] "count" "ncat" "improve" "index" ...
#>  $ method   : chr "mrt"
#>  $ dissim   : chr "euclidean"
#>  $ parms    : num 0
#>  $ control  :List of 9
#>   ..$ minsplit      : num 5
#>   ..$ minbucket     : num 2
#>   ..$ cp            : num 0.01
#>   ..$ maxcompete    : num 4
#>   ..$ m