6.4 Application

6.4.1 Nonlinear Estimation Using Gauss-Newton Algorithm

This section demonstrates nonlinear parameter estimation using the Gauss-Newton algorithm and compares results with nls(). The model is given by:

yi=θ0+θ1xi1+θ2exp(0.4xi)+ϵi

where

  • i=1,,n
  • θ0, θ1, θ2 are the unknown parameters.
  • ϵi represents errors.
  1. Loading and Visualizing the Data
library(dplyr)
library(ggplot2)

# Load the dataset
my_data <- read.delim("images/S21hw1pr4.txt", header = FALSE, sep = "") %>%
  dplyr::rename(x = V1, y = V2)

# Plot data
ggplot(my_data, aes(x = x, y = y)) +
  geom_point(color = "blue") +
  labs(title = "Observed Data", x = "X", y = "Y") +
  theme_minimal()

  1. Deriving Starting Values for Parameters

Since nonlinear optimization is sensitive to starting values, we estimate reasonable initial values based on model interpretation.

Finding the Maximum Y Value

max(my_data$y)
#> [1] 2.6722
my_data$x[which.max(my_data$y)]
#> [1] 0.0094
  • When y=2.6722, the corresponding x=0.0094.

  • From the model equation: θ0+0.0094θ1=2.6722

Estimating θ2 from the Median y Value

  • The equation simplifies to: 1+θ2exp(0.4x)=2
# find mean y
mean(my_data$y) 
#> [1] -0.0747864

# find y closest to its mean
my_data$y[which.min(abs(my_data$y - (mean(my_data$y))))] 
#> [1] -0.0773


# find x closest to the mean y
my_data$x[which.min(abs(my_data$y - (mean(my_data$y))))] 
#> [1] 11.0648
  • This yields the equation: 83.58967θ2=1

Finding the Value of θ0 and θ1

# find value of x closet to 1
my_data$x[which.min(abs(my_data$x - 1))] 
#> [1] 0.9895

# find index of x closest to 1
match(my_data$x[which.min(abs(my_data$x - 1))], my_data$x) 
#> [1] 14

# find y value
my_data$y[match(my_data$x[which.min(abs(my_data$x - 1))], my_data$x)]
#> [1] 1.4577
  • This provides another equation: θ0+θ1×0.98952.164479θ2=1.457

Solving for θ0,θ1,θ2

library(matlib)

# Define coefficient matrix
A = matrix(
  c(0, 0.0094, 0, 0, 0, 83.58967, 1, 0.9895, -2.164479),
  nrow = 3,
  ncol = 3,
  byrow = T
)

# Define constant vector
b <- c(2.6722, 1, 1.457)

# Display system of equations
showEqn(A, b)
#> 0*x1 + 0.0094*x2        + 0*x3  =  2.6722 
#> 0*x1      + 0*x2 + 83.58967*x3  =       1 
#> 1*x1 + 0.9895*x2 - 2.164479*x3  =   1.457

# Solve for parameters
theta_start <- Solve(A, b, fractions = FALSE)
#> x1      =  -279.80879739 
#>   x2    =   284.27659574 
#>     x3  =      0.0119632
theta_start
#> [1] "x1      =  -279.80879739" "  x2    =   284.27659574"
#> [3] "    x3  =      0.0119632"
  1. Implementing the Gauss-Newton Algorithm

Using these estimates, we manually implement the Gauss-Newton optimization.

Defining the Model and Its Derivatives

# Starting values
theta_0_strt <- as.numeric(gsub(".*=\\s*", "", theta_start[1]))
theta_1_strt <- as.numeric(gsub(".*=\\s*", "", theta_start[2]))
theta_2_strt <- as.numeric(gsub(".*=\\s*", "", theta_start[3]))

# Model function
mod_4 <- function(theta_0, theta_1, theta_2, x) {
  (theta_0 + theta_1 * x) / (1 + theta_2 * exp(0.4 * x))
}

# Define function expression
f_4 = expression((theta_0 + theta_1 * x) / (1 + theta_2 * exp(0.4 * x)))

# First derivatives
df_4.d_theta_0 <- D(f_4, 'theta_0')
df_4.d_theta_1 <- D(f_4, 'theta_1')
df_4.d_theta_2 <- D(f_4, 'theta_2')

Iterative Gauss-Newton Optimization

# Initialize
theta_vec <- matrix(c(theta_0_strt, theta_1_strt, theta_2_strt))
delta <- matrix(NA, nrow = 3, ncol = 1)
i <- 1

# Evaluate function at initial estimates
f_theta <- as.matrix(eval(f_4, list(
  x = my_data$x,
  theta_0 = theta_vec[1, 1],
  theta_1 = theta_vec[2, 1],
  theta_2 = theta_vec[3, 1]
)))

repeat {
  # Compute Jacobian matrix
  F_theta_0 <- as.matrix(cbind(
    eval(df_4.d_theta_0, list(
      x = my_data$x,
      theta_0 = theta_vec[1, i],
      theta_1 = theta_vec[2, i],
      theta_2 = theta_vec[3, i]
    )),
    eval(df_4.d_theta_1, list(
      x = my_data$x,
      theta_0 = theta_vec[1, i],
      theta_1 = theta_vec[2, i],
      theta_2 = theta_vec[3, i]
    )),
    eval(df_4.d_theta_2, list(
      x = my_data$x,
      theta_0 = theta_vec[1, i],
      theta_1 = theta_vec[2, i],
      theta_2 = theta_vec[3, i]
    ))
  ))
  
  # Compute parameter updates
  delta[, i] = (solve(t(F_theta_0)%*%F_theta_0))%*%t(F_theta_0)%*%(my_data$y-f_theta[,i])
    
  
  # Update parameter estimates
  theta_vec <- cbind(theta_vec, theta_vec[, i] + delta[, i])
  theta_vec[, i + 1] = theta_vec[, i] + delta[, i]
  
  # Increment iteration counter
  i <- i + 1
  
  # Compute new function values
  f_theta <- cbind(f_theta, as.matrix(eval(f_4, list(
    x = my_data$x,
    theta_0 = theta_vec[1, i],
    theta_1 = theta_vec[2, i],
    theta_2 = theta_vec[3, i]
  ))))
  
  delta = cbind(delta, matrix(NA, nrow = 3, ncol = 1))
  
  # Convergence criteria based on SSE
  if (abs(sum((my_data$y - f_theta[, i])^2) - sum((my_data$y - f_theta[, i - 1])^2)) / 
      sum((my_data$y - f_theta[, i - 1])^2) < 0.001) {
    break
  }
}

# Final parameter estimates
theta_vec[, ncol(theta_vec)]
#> [1]  3.6335135 -1.3055166  0.5043502
  1. Checking Convergence and Variance
# Final objective function value (SSE)
sum((my_data$y - f_theta[, i])^2)
#> [1] 19.80165

sigma2 <- 1 / (nrow(my_data) - 3) * 
  (t(my_data$y - f_theta[, ncol(f_theta)]) %*% 
   (my_data$y - f_theta[, ncol(f_theta)]))  # p = 3

# Asymptotic variance-covariance matrix
as.numeric(sigma2)*as.matrix(solve(crossprod(F_theta_0)))
#>             [,1]        [,2]        [,3]
#> [1,]  0.11552571 -0.04817428  0.02685848
#> [2,] -0.04817428  0.02100861 -0.01158212
#> [3,]  0.02685848 -0.01158212  0.00703916
  1. Validating with nls()
nlin_4 <- nls(
  y ~ mod_4(theta_0, theta_1, theta_2, x),
  start = list(
    theta_0 = as.numeric(gsub(".*=\\s*", "", theta_start[1])),
    theta_1 = as.numeric(gsub(".*=\\s*", "", theta_start[2])),
    theta_2 = as.numeric(gsub(".*=\\s*", "", theta_start[3]))
  ),
  data = my_data
)
summary(nlin_4)
#> 
#> Formula: y ~ mod_4(theta_0, theta_1, theta_2, x)
#> 
#> Parameters:
#>         Estimate Std. Error t value Pr(>|t|)    
#> theta_0  3.63591    0.36528   9.954  < 2e-16 ***
#> theta_1 -1.30639    0.15561  -8.395 3.65e-15 ***
#> theta_2  0.50528    0.09215   5.483 1.03e-07 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.2831 on 247 degrees of freedom
#> 
#> Number of iterations to convergence: 9 
#> Achieved convergence tolerance: 2.294e-07

6.4.2 Logistic Growth Model

A classic logistic growth model follows the equation:

P=K1+exp(P0+rt)+ϵ

where:

  • P = population at time t

  • K = carrying capacity (maximum population)

  • r = population growth rate

  • P0 = initial population log-ratio

However, R’s built-in SSlogis function uses a slightly different parameterization:

P=asym1+exp(xmidtscal)

where:

  • asym = carrying capacity (K)

  • xmid = the x-value at the inflection point of the curve

  • scal = scaling parameter

This gives the parameter relationships:

  • K=asym

  • r=1/scal

  • P0=rxmid

# Simulated time-series data
time <- c(1, 2, 3, 5, 10, 15, 20, 25, 30, 35)
population <- c(2.8, 4.2, 3.5, 6.3, 15.7, 21.3, 23.7, 25.1, 25.8, 25.9)

# Plot data points
plot(time, population, las = 1, pch = 16, main = "Logistic Growth Model")


# Fit the logistic growth model using programmed starting values
logisticModelSS <- nls(population ~ SSlogis(time, Asym, xmid, scal))

# Model summary
summary(logisticModelSS)
#> 
#> Formula: population ~ SSlogis(time, Asym, xmid, scal)
#> 
#> Parameters:
#>      Estimate Std. Error t value Pr(>|t|)    
#> Asym  25.5029     0.3666   69.56 3.34e-11 ***
#> xmid   8.7347     0.3007   29.05 1.48e-08 ***
#> scal   3.6353     0.2186   16.63 6.96e-07 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.6528 on 7 degrees of freedom
#> 
#> Number of iterations to convergence: 1 
#> Achieved convergence tolerance: 1.908e-06

# Extract parameter estimates
coef(logisticModelSS)
#>      Asym      xmid      scal 
#> 25.502890  8.734698  3.635333

To fit the model using an alternative parameterization (K,r,P0), we convert the estimated coefficients:

# Convert parameter estimates to alternative logistic model parameters
Ks <- as.numeric(coef(logisticModelSS)[1])  # Carrying capacity (K)
rs <- -1 / as.numeric(coef(logisticModelSS)[3])  # Growth rate (r)
Pos <- -rs * as.numeric(coef(logisticModelSS)[2])  # P_0

# Fit the logistic model with the alternative parameterization
logisticModel <- nls(
    population ~ K / (1 + exp(Po + r * time)),
    start = list(Po = Pos, r = rs, K = Ks)
)

# Model summary
summary(logisticModel)
#> 
#> Formula: population ~ K/(1 + exp(Po + r * time))
#> 
#> Parameters:
#>    Estimate Std. Error t value Pr(>|t|)    
#> Po  2.40272    0.12702   18.92 2.87e-07 ***
#> r  -0.27508    0.01654  -16.63 6.96e-07 ***
#> K  25.50289    0.36665   69.56 3.34e-11 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.6528 on 7 degrees of freedom
#> 
#> Number of iterations to convergence: 0 
#> Achieved convergence tolerance: 1.924e-06

Visualizing the Logistic Model Fit

# Plot original data
plot(time,
     population,
     las = 1,
     pch = 16,
     main = "Logistic Growth Model Fit")

# Overlay the fitted logistic curve
lines(time,
      predict(logisticModel),
      col = "red",
      lwd = 2)

6.4.3 Nonlinear Plateau Model

This example is based on (Schabenberger and Pierce 2001) and demonstrates the use of a plateau model to estimate the relationship between soil nitrate (NO3) concentration and relative yield percent (RYP) at two different depths (30 cm and 60 cm).

# Load data
dat <- read.table("images/dat.txt", header = TRUE)

# Plot NO3 concentration vs. relative yield percent, colored by depth
library(ggplot2)
dat.plot <- ggplot(dat) + 
  geom_point(aes(x = no3, y = ryp, color = as.factor(depth))) +
  labs(color = 'Depth (cm)') + 
  xlab('Soil NO3 Concentration') + 
  ylab('Relative Yield Percent') +
  theme_minimal()

# Display plot
dat.plot

The suggested nonlinear plateau model is given by:

E(Yij)=(β0j+β1jNij)INijαj+(β0j+β1jαj)INij>αj

where:

  • Nij represents the soil nitrate (NO3) concentration for observation i at depth j.

  • i indexes individual observations.

  • j=1,2 corresponds to depths 30 cm and 60 cm.

This model assumes a linear increase up to a threshold (αj), beyond which the response levels off (plateaus).

Defining the Plateau Model as a Function

# Define the nonlinear plateau model function
nonlinModel <- function(predictor, b0, b1, alpha) {
  ifelse(predictor <= alpha, 
         b0 + b1 * predictor,  # Linear growth below threshold
         b0 + b1 * alpha)      # Plateau beyond threshold
}

Creating a Self-Starting Function for nls

Since the model is piecewise linear, we can estimate starting values using:

  1. A linear regression on the first half of sorted predictor values to estimate b0 and b1.

  2. The last predictor value used in the regression as the plateau threshold (α)

# Define initialization function for self-starting plateau model
nonlinModelInit <- function(mCall, LHS, data) {
  # Sort data by increasing predictor value
  xy <- sortedXyData(mCall[['predictor']], LHS, data)
  n <- nrow(xy)
  
  # Fit a simple linear model using the first half of the sorted data
  lmFit <- lm(xy[1:(n / 2), 'y'] ~ xy[1:(n / 2), 'x'])
  
  # Extract initial estimates
  b0 <- coef(lmFit)[1]  # Intercept
  b1 <- coef(lmFit)[2]  # Slope
  alpha <- xy[(n / 2), 'x']  # Last x-value in the fitted linear range
  
  # Return initial parameter estimates
  value <- c(b0, b1, alpha)
  names(value) <- mCall[c('b0', 'b1', 'alpha')]
  value
}

Combining Model and Self-Start Function

# Define a self-starting nonlinear model for nls
SS_nonlinModel <- selfStart(nonlinModel,
                            nonlinModelInit,
                            c('b0', 'b1', 'alpha'))

The nls function is used to estimate parameters separately for each soil depth (30 cm and 60 cm).

# Fit the model for depth = 30 cm
sep30_nls <- nls(ryp ~ SS_nonlinModel(predictor = no3, b0, b1, alpha),
                  data = dat[dat$depth == 30,])

# Fit the model for depth = 60 cm
sep60_nls <- nls(ryp ~ SS_nonlinModel(predictor = no3, b0, b1, alpha),
                  data = dat[dat$depth == 60,])

We generate separate plots for 30 cm and 60 cm depths, showing both confidence and prediction intervals.

# Set plotting layout
par(mfrow = c(1, 2))

# Plot model fit for 30 cm depth
plotFit(
  sep30_nls,
  interval = "both",
  pch = 19,
  shade = TRUE,
  col.conf = "skyblue4",
  col.pred = "lightskyblue2",
  data = dat[dat$depth == 30,],
  main = "Results at 30 cm Depth",
  ylab = "Relative Yield Percent",
  xlab = "Soil NO3 Concentration",
  xlim = c(0, 120)
)

# Plot model fit for 60 cm depth
plotFit(
  sep60_nls,
  interval = "both",
  pch = 19,
  shade = TRUE,
  col.conf = "lightpink4",
  col.pred = "lightpink2",
  data = dat[dat$depth == 60,],
  main = "Results at 60 cm Depth",
  ylab = "Relative Yield Percent",
  xlab = "Soil NO3 Concentration",
  xlim = c(0, 120)
)

summary(sep30_nls)
#> 
#> Formula: ryp ~ SS_nonlinModel(predictor = no3, b0, b1, alpha)
#> 
#> Parameters:
#>       Estimate Std. Error t value Pr(>|t|)    
#> b0     15.1943     2.9781   5.102 6.89e-07 ***
#> b1      3.5760     0.1853  19.297  < 2e-16 ***
#> alpha  23.1324     0.5098  45.373  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 8.258 on 237 degrees of freedom
#> 
#> Number of iterations to convergence: 6 
#> Achieved convergence tolerance: 3.608e-09
summary(sep60_nls)
#> 
#> Formula: ryp ~ SS_nonlinModel(predictor = no3, b0, b1, alpha)
#> 
#> Parameters:
#>       Estimate Std. Error t value Pr(>|t|)    
#> b0      5.4519     2.9785    1.83   0.0684 .  
#> b1      5.6820     0.2529   22.46   <2e-16 ***
#> alpha  16.2863     0.2818   57.80   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 7.427 on 237 degrees of freedom
#> 
#> Number of iterations to convergence: 5 
#> Achieved convergence tolerance: 8.571e-09

Modeling Soil Depths Together and Comparing Models

Instead of fitting separate models for different soil depths, we first fit a combined model where all observations share a common slope, intercept, and plateau. We then test whether modeling the two depths separately provides a significantly better fit.

  1. Fitting a Reduced (Combined) Model

The reduced model assumes that all soil depths follow the same nonlinear relationship.

# Fit the combined model (common parameters across all depths)
red_nls <- nls(
  ryp ~ SS_nonlinModel(predictor = no3, b0, b1, alpha), 
  data = dat
)

# Display model summary
summary(red_nls)
#> 
#> Formula: ryp ~ SS_nonlinModel(predictor = no3, b0, b1, alpha)
#> 
#> Parameters:
#>       Estimate Std. Error t value Pr(>|t|)    
#> b0      8.7901     2.7688   3.175   0.0016 ** 
#> b1      4.8995     0.2207  22.203   <2e-16 ***
#> alpha  18.0333     0.3242  55.630   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 9.13 on 477 degrees of freedom
#> 
#> Number of iterations to convergence: 7 
#> Achieved convergence tolerance: 7.126e-09

# Visualizing the combined model fit
par(mfrow = c(1, 1))
plotFit(
  red_nls,
  interval = "both",
  pch = 19,
  shade = TRUE,
  col.conf = "lightblue4",
  col.pred = "lightblue2",
  data = dat,
  main = 'Results for Combined Model',
  ylab = 'Relative Yield Percent',
  xlab = 'Soil NO3 Concentration'
)

  1. Examining Residuals for the Combined Model

Checking residuals helps diagnose potential lack of fit.

library(nlstools)

# Residual diagnostics using nlstools
resid <- nlsResiduals(red_nls)

# Plot residuals
plot(resid)

If there is a pattern in the residuals (e.g., systematic deviations based on soil depth), this suggests that a separate model for each depth may be necessary.

  1. Testing Whether Depths Require Separate Models

To formally test whether soil depth significantly affects the model parameters, we introduce a parameterization where depth-specific parameters are increments from a baseline model (30 cm depth):

β02=β01+d0β12=β11+d1α2=α1+da

where:

  • β01,β11,α1 are parameters for 30 cm depth.

  • d0,d1,da represent depth-specific differences for 60 cm depth.

  • If d0,d1,da are significantly different from 0, the two depths should be modeled separately.

  1. Defining the Full (Depth-Specific) Model
nonlinModelF <- function(predictor, soildep, b01, b11, a1, d0, d1, da) {
  
  # Define parameters for 60 cm depth as increments from 30 cm parameters
  b02 <- b01 + d0
  b12 <- b11 + d1
  a2 <- a1 + da
  
  # Compute model output for 30 cm depth
  y1 <- ifelse(
    predictor <= a1, 
    b01 + b11 * predictor, 
    b01 + b11 * a1
  )
  
  # Compute model output for 60 cm depth
  y2 <- ifelse(
    predictor <= a2, 
    b02 + b12 * predictor, 
    b02 + b12 * a2
  )
  
  # Assign correct model output based on depth
  y <- y1 * (soildep == 30) + y2 * (soildep == 60)
  
  return(y)
}
  1. Fitting the Full (Depth-Specific) Model

The starting values are taken from the separately fitted models for each depth.

Soil_full <- nls(
  ryp ~ nonlinModelF(
    predictor = no3,
    soildep = depth,
    b01,
    b11,
    a1,
    d0,
    d1,
    da
  ),
  data = dat,
  start = list(
    b01 = 15.2,   # Intercept for 30 cm depth
    b11 = 3.58,   # Slope for 30 cm depth
    a1 = 23.13,   # Plateau cutoff for 30 cm depth
    d0 = -9.74,   # Intercept difference (60 cm - 30 cm)
    d1 = 2.11,    # Slope difference (60 cm - 30 cm)
    da = -6.85    # Plateau cutoff difference (60 cm - 30 cm)
  )
)

# Display model summary
summary(Soil_full)
#> 
#> Formula: ryp ~ nonlinModelF(predictor = no3, soildep = depth, b01, b11, 
#>     a1, d0, d1, da)
#> 
#> Parameters:
#>     Estimate Std. Error t value Pr(>|t|)    
#> b01  15.1943     2.8322   5.365 1.27e-07 ***
#> b11   3.5760     0.1762  20.291  < 2e-16 ***
#> a1   23.1324     0.4848  47.711  < 2e-16 ***
#> d0   -9.7424     4.2357  -2.300   0.0219 *  
#> d1    2.1060     0.3203   6.575 1.29e-10 ***
#> da   -6.8461     0.5691 -12.030  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 7.854 on 474 degrees of freedom
#> 
#> Number of iterations to convergence: 1 
#> Achieved convergence tolerance: 3.742e-06
  1. Model Comparison: Does Depth Matter?
  • If d0,d1,da are significantly different from 0, the depths should be modeled separately.

  • The p-values for these parameters indicate whether depth-specific modeling is necessary.

References

Schabenberger, Oliver, and Francis J Pierce. 2001. Contemporary Statistical Models for the Plant and Soil Sciences. CRC press.