Week 7 Model evaluation II

There are multiple ways to evaluate models. What we have learned so far is Information Criteria (e.g., AIC and BIC). Instead of using NLL (which quantifies goodness-of-fit), we also punish model complexity given number of parameters.
Today, we will learn three more methods to evaluate models:
1. Ex post simulation
2. model recovery (using two or two more models)
3. parameter recovery (focusing on one model)

# Week 7: Model evaluation II
# First and Last name
# date
  • To start the new project, clear working environment with rm(list=ls()).
# clear working environment
rm(list=ls())
  • Load the package
# Open "rtdists" package
library(rtdists)
  • Load data
# Load data from the "rtdists" package
data(rr98)
# Double-click rr98 in your Environment panel (right-top window)
# In the dataset, there are 3 participants, who completed ~ 4000 trials in each condition (speed/accuracy).
  • Preprocess data to make sure the data and data structure are suitable for follow-up model fitting.
# Preprocess data
rr98 <- rr98[!rr98$outlier,]  # remove outliers, as in original paper
rr98[,'RT'] <- rr98$rt
rr98[,'accuracy'] = 0 # add a new column labeled as accuracy, where incorrect responses are coded as 0
rr98[rr98$correct, 'accuracy'] = 1 # and correct responses are coded as 1
rr98$choice = rr98$response
rr98$response = as.character(rr98$response)
rr98[rr98$correct == 1, "response"] = "upper"
rr98[rr98$correct == 0, "response"] = "lower"
head(rr98)

7.1 Ex Post Simulation

After the model comparison using AIC and/or BIC, we only recognize the relatively good model with relative balance of goodness-of-fit and complexity). To further verify if the model captures the important behavioral patterns, we use the estimated parameter values to run the simulation and ideally, reproduce the patterns of interest.

# Extract the data we need for simulating data using DDM1 (with varied drift rate)
# if drift rate is divided by instruction manipulation rather than strength and the order of manipulations, then the only information influencing model simulation is number of trials for each manipulation.
nTrial.speed <- sum(rr98['instruction'] == 'speed')
nTrial.accuracy <- sum(rr98['instruction'] == 'accuracy')
nTrial.speed
nTrial.accuracy

7.1.1 DDM1: Simulate data

First, we simulate data with estimated parameters using rdiffusion, which requires number of data and parameters.

# load the result 
load("Week6_DDMresults.Rdata")
# Simulate data with existing function
DDM1simu.speed <- rdiffusion(nTrial.speed, v = results.DDM1[1,2], a = results.DDM1[1,4], t0 = results.DDM1[1,5]/1000)
DDM1simu.accuracy <- rdiffusion(nTrial.accuracy, v = results.DDM1[1,3], a = results.DDM1[1,4], t0 = results.DDM1[1,5]/1000)

Second, summarize the accuracy rate.

#summarize accuracy rate for simulated data and real data
sum(DDM1simu.speed["response"] == "upper")/length(DDM1simu.speed[,1])
sum(DDM1simu.accuracy["response"] == "upper")/length(DDM1simu.speed[,1])
sum(rr98["instruction"] == "speed" & rr98["response"] == "upper")/sum(rr98["instruction"] == "speed")
sum(rr98["instruction"] == "accuracy" & rr98["response"] == "upper")/sum(rr98["instruction"] == "accuracy")

Third, we can then visualize the simulated and real data via either histogram or ttest or any function you want to compare the real and simulated data. Agian, ideally the simulated data from the model should be able to reproduce important behavioral patterns/effects.

# Visualize the simulated and real data (adapt from Jannnick's code)
#Plot RTs distributions
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic

###RTs in speed condition
## real data
ID.speed = rr98["instruction"] == "speed"
hist(rr98[ID.speed,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Speed condition (real data)"), col = c("gray"))
abline(v=mean(rr98[ID.speed,"rt"]*1000),lwd=5, lty=2) #Add abline to visualize the mean of RT 
## simulated data
hist(DDM1simu.speed[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Speed condition (simulated data)"), col = c("blue"))
#Add abline to visualize the mean of RT 
abline(v=mean(DDM1simu.speed[,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT 

###RTs in accuracy condition
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
ID.accuracy = rr98["instruction"] == "accuracy"
hist(rr98[ID.accuracy,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Accuracy condition (real data)"), col = c("gray"))
abline(v=mean(rr98[ID.accuracy,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT 
## simulated data
hist(DDM1simu.accuracy[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Accuracy condition (simulated data)"), col = c("green"))
abline(v=mean(DDM1simu.accuracy[,"rt"]*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT 


###RTs in both conditions
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
hist(rr98[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Real data regardless of condition"), col = c("gray"))
abline(v=mean(rr98[,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT 
## simulated data
hist(c(DDM1simu.speed[,"rt"], DDM1simu.accuracy[,"rt"])*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Simulated data regardless of condition"), col = c("yellow"))
abline(v=mean(c(DDM1simu.speed[,"rt"], DDM1simu.accuracy[,"rt"])*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT 

7.1.2 DDM2: Simulate data

Here, we repeat the same procedure to test DDM2.

# Simulate data with existing function
DDM2simu.speed <- rdiffusion(nTrial.speed, v = results.DDM2[1,2], a = results.DDM2[1,3], t0 = results.DDM2[1,5]/1000)
DDM2simu.accuracy <- rdiffusion(nTrial.accuracy, v = results.DDM2[1,2], a = results.DDM2[1,4], t0 = results.DDM2[1,5]/1000)
#summarize accuracy rate for simulated data and real data
sum(DDM2simu.speed["response"] == "upper")/length(DDM2simu.speed[,1])
sum(DDM2simu.accuracy["response"] == "upper")/length(DDM2simu.speed[,1])
sum(rr98["instruction"] == "speed" & rr98["response"] == "upper")/sum(rr98["instruction"] == "speed")
sum(rr98["instruction"] == "accuracy" & rr98["response"] == "upper")/sum(rr98["instruction"] == "accuracy")
# Visualize the simulated and real data (adapt from Jannnick's code)
#Plot RTs distributions
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic

###RTs in speed condition
## real data
ID.speed = rr98["instruction"] == "speed"
hist(rr98[ID.speed,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Speed condition (real data)"), col = c("gray"))
#Add abline to visualize the mean of RT 
abline(v=mean(rr98[ID.speed,"rt"]*1000),lwd=5, lty=2)
## simulated data
hist(DDM2simu.speed[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Speed condition (simulated data)"), col = c("blue"))
#Add abline to visualize the mean of RT 
abline(v=mean(DDM2simu.speed[,"rt"]*1000),lwd=5, lty=2)

###RTs in accuracy condition
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
ID.accuracy = rr98["instruction"] == "accuracy"
hist(rr98[ID.accuracy,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Accuracy condition (real data)"), col = c("gray"))
abline(v=mean(rr98[ID.accuracy,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT 
## simulated data
hist(DDM2simu.accuracy[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Accuracy condition (simulated data)"), col = c("green"))
abline(v=mean(DDM2simu.accuracy[,"rt"]*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT 


###RTs in both conditions
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
hist(rr98[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2. Real data regardless of condition"), col = c("gray"))
abline(v=mean(rr98[,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT 
## simulated data
hist(c(DDM2simu.speed[,"rt"], DDM2simu.accuracy[,"rt"])*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2. Simulated data regardless of condition"), col = c("yellow"))
abline(v=mean(c(DDM2simu.speed[,"rt"], DDM2simu.accuracy[,"rt"])*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT 

According to the visual inspection, the difference between real RT distribution and simulated RT distribution are similar if we didn’t split the RT distribution based on the instruction manipulation. This is the case for both DDM1 and DDM2. When we split the data into speed condition and accuracy condition, the difference is more obvious if data is simulated by DDM1, which is not worse model in the model space.

7.2 Model recovery

Model recovery/ identifiability assesses whether the model selection criteria (e.g., AIC, BIC) can correctly identify the “true” model used to generate the synthetic data. If each model in the model space generates and captures unique behavioral patterns, then it is not difficult to find out the “true” model given simulated data/patterns. But of course, the recovery ability also relies on the model selection criteria. For example,BIC prefers simpler models more than AIC (Wagenmakers and Farrell, 2004). In this section, we will walk through the model recovery:
- Choose and build two (or more) models in your model space
- Simulate data with each model and with proper range of parameter values
- Fit simulated dataset to all the model and compute model comaprison criteria (e.g., nLL/AIC/BIC …)
- Summarize model recovery with a confusion matrix, which quantifies the probability or frequency that each model is the best fit to data generated from the other models (Wilson, R. C., & Collins, A. G., 2019)

7.2.1 Build up models

In this section, we will only include two models in our model space. These two models are original DDM (oDDM) with only three free parameters and extended DDM (eDDM) with varied thresholds.

## First model: oDDM with only three free parameters: v, a, t0
oDDM <- function(pars, dat)
{
  log_lik <- sum(log(ddiffusion(rt=dat[dat["response"] == "upper", "rt"],
                                response="upper",  v=pars[1], a=pars[2], t0=pars[3], z=pars[2]/2, s= 1)))
  log_lik <- log_lik +
    sum(log(ddiffusion(rt=dat[dat["response"] == "lower", "rt"],
                       response="lower",  v=pars[1], a=pars[2], t0=pars[3], z=pars[2]/2, s= 1)))
  if (any((-2*log_lik == 0))) return(1e6)
  return(-2*log_lik)
}

Extend the model above by splitting threshold or a based on instruction manipulations.

## Second model: DDM2, which extends the oDDM by splitting threshold based on instruction manipulations
# parameters: v, a_speed, a_accuracy, t0
# Note: order of parameter matters!! always keep tracking the order of parameter.
eDDM <- function(pars, dat)
{
  log_lik <- sum(log(ddiffusion(rt=dat[dat["response"] == "upper" & dat["instruction"] == "speed", "rt"],
                                response="upper",  v=pars[1], a=pars[2], t0=pars[4])))
  log_lik <- log_lik +
    sum(log(ddiffusion(rt=dat[dat["response"] == "upper" & dat["instruction"] == "accuracy", "rt"],
                       response="upper",  v=pars[1], a=pars[3], t0=pars[4])))
  log_lik <- log_lik +
    sum(log(ddiffusion(rt=dat[dat["response"] == "lower" & dat["instruction"] == "speed", "rt"],
                       response="lower",  v=pars[1], a=pars[2], t0=pars[4])))
  log_lik <- log_lik +
    sum(log(ddiffusion(rt=dat[dat["response"] == "lower" & dat["instruction"] == "accuracy", "rt"],
                       response="lower",  v=pars[1], a=pars[3], t0=pars[4])))
  if (any((-2*log_lik == 0))) return(1e6)
  return(-2*log_lik)
}

7.2.2 Simulation

Imagine you are running an experiment with only one subject, who is instructed to make decision (e.g., monitor is bright or dark) as fast as possible for 500 trials and to make decision as accurate as possible for the rest of 500 trials.
Like Ex-Post simulation introduced in week7, we simulate data based on the models of interest.
Unlike Ex-Post simulation, we determine number of trials and sets of parameters by ourselves.
1. Determine number of trials

# number of trials in each experiment 
nTrial.speed <- 500
nTrial.accuracy <- 500
  1. Determine a set of parameter values
# Determine a set of parameter values
ParameterSet <-c(runif(n=1, min=0.5,  max=1),
                 runif(n=1, min=0.5,  max=2),
                 runif(n=1, min=0.1,  max=0.3))
  1. Simulate data given model and set of parameter values
# Simulate data given model and set of parameter values
# pre-allocation
DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
DDMsimu.accuracy <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))

# Simulation
DDMsimu.speed <- rdiffusion(nTrial.speed, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])
DDMsimu.accuracy <- rdiffusion(nTrial.accuracy, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])

# Organize simulated data better by including the condition
DDMsimu.speed$instruction <- "speed"
DDMsimu.accuracy$instruction <- "accuracy"

7.2.3 Simulation and model fitting

What if we conduct the same experiment with 200 synthetic subjects and with two hypothetical models? Simple! We just need to repeat the same simulation procedure 200 times for each model. This repetition can be addressed by the for function. In order to mimic the individual difference on information processing, we use different sets of parameter values for each synthetic subject in each model. The simulation for each subject is then followed by a model fitting.
This exercise will take ~10 min. You will see the current progress on the Console window.

## Simulation and model fitting
# predetermine 
nObservation <-200 # number of samples
nParam.oDDM <-3; # number of parameters in oDDM
nParam.eDDM <-4; # number of parameters in eDDM

# pre-allocation
AIC.oDDM<- matrix(data=NA,nrow=nObservation,ncol=2)
AIC.eDDM<- matrix(data=NA,nrow=nObservation,ncol=2)
BIC.oDDM<- matrix(data=NA,nrow=nObservation,ncol=2)
BIC.eDDM<- matrix(data=NA,nrow=nObservation,ncol=2)

# for loop to run the experiment N times. Each time we simulate data with both oDDM and eDDM.
for (kSim in 1:nObservation) {
  print(kSim/nObservation) # update current progress in the Console window
  # ----------------------
  # Simulate data with original DDM
  ParameterSet <-c(runif(n=1, min=0.5,  max=1),
                          runif(n=1, min=0.5,  max=2),
                          runif(n=1, min=0.1,  max=0.3))
  
  # pre-allocation
  DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
  DDMsimu.accuracy <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))
  
  # simulation
  DDMsimu.speed <- rdiffusion(nTrial.speed, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])
  DDMsimu.accuracy <- rdiffusion(nTrial.accuracy, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])
  DDMsimu.speed$instruction <- "speed"
  DDMsimu.accuracy$instruction <- "accuracy"
  oDDMsimu.All <- rbind(DDMsimu.speed,DDMsimu.accuracy)
  
  
  
  # ----------------------
  # Simulate data with extended function
  eParameterSet <-c(runif(n=1, min=0.5,  max=1),
                           runif(n=1, min=0.5,  max=2),
                           runif(n=1, min=0.5,  max=2),
                           runif(n=1, min=0.1,  max=0.3))
  
  # pre-allocation
  DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
  DDMsimu.accuracy <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))
  
  # simulation
  DDMsimu.speed <- rdiffusion(nTrial.speed, v = eParameterSet[1], a = eParameterSet[2], t0 = eParameterSet[4])
  DDMsimu.accuracy <- rdiffusion(nTrial.accuracy, v = eParameterSet[1], a = eParameterSet[3], t0 = eParameterSet[4])
  DDMsimu.speed$instruction <- "speed"
  DDMsimu.accuracy$instruction <- "accuracy"
  eDDMsimu.All <- rbind(DDMsimu.speed,DDMsimu.accuracy)
  
  
  
  # ----------------------
  # model fitting (original DDM)
  for (kDDMdata in 1:2) {
    if (kDDMdata ==1){
      data <- oDDMsimu.All
    } else {
      data<- eDDMsimu.All
    }
    
    # 1. set up lower and upper boundary for each parameter
    # Also, check if the same range is used for the same parameter
    lb <-c(-5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
    ub <-c(5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
    
    # 2. Create optimization function with an additional input: number of repetition.
    # (The code is adapted from Mikhail Spektor's).
    results.oDDM<-setNames(data.frame(matrix(ncol = 5, nrow = 0)), c("nRepetition","v", "a","t0 (ms)", "nll"))
    run <- 1
    optimization_runs <-3
    
    # Run the parameter estimation for several times (this will take few seconds/minutes to complete)
    while (run <= optimization_runs){ 
      starting_values <- c(runif(n=1, min=-5,  max=5), 
                           runif(n=1, min=0.5,  max=5),
                           runif(n=1, min=0,  max=0.1))
      while (oDDM(starting_values,data) == Inf) {starting_values <- c(runif(n=1, min=-5,  max=5),
                                                                      runif(n=1, min=0.5,  max=5),
                                                                      runif(n=1, min=0,  max=0.1))}
      
      # parameter estimation procedure using nlminb
      fitDDM <- nlminb(start=starting_values, objective=oDDM,
                       lower = lb, 
                       upper= ub, 
                       dat=data)# the data is the one we simulated above
      deviance <- fitDDM[["objective"]]
      params <- fitDDM[["par"]]
      
      # store results for each iterate
      results.oDDM[run,] <- c(run,
                              round(params[1],2),
                              round(params[2],2),
                              round(params[3],2), 
                              deviance)
      run <- run + 1}
    
    
    
    # ----------------------
    # model fitting (extended DDM)
    # 1. set up lower and upper boundary for each parameter
    # Also, check if the same range is used for the same parameter
    lb <-c(-5,0.5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
    ub <-c(5,5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
    
    # 2. Create optimization function with an additional input: number of repetition.
    # (The code is adapted from Mikhail Spektor's).
    results.eDDM<-setNames(data.frame(matrix(ncol = 6, nrow = 0)), c("nRepetition","v","a_speed","a_accuracy", "t0 (ms)", "nll"))
    run <- 1
    optimization_runs <-3
    
    # Run the parameter estimation for several times (this will take few seconds/minutes to complete)
    while (run <= optimization_runs){ 
      starting_values <- c(runif(n=1, min=-5,  max=5),
                           runif(n=1, min=0.5,  max=5), # selected from the same range for the same parameter
                           runif(n=1, min=0.5,  max=5), # selected from the same range for the same parameter
                           runif(n=1, min=0,  max=0.1))
      while (eDDM(starting_values,data) == Inf) {starting_values <- c(runif(n=1, min=-5,  max=5),
                                                                      runif(n=1, min=0.5,  max=5),
                                                                      runif(n=1, min=0.5,  max=5),
                                                                      runif(n=1, min=0,  max=0.1))}
      # parameter estimation procedure using nlminb
      fiteDDM <- nlminb(start=starting_values, objective=eDDM,
                        lower = lb, 
                        upper= ub, 
                        dat=data) 
      deviance <- fiteDDM[["objective"]]
      params <- fiteDDM[["par"]]
      
      # store results for each iterate
      results.eDDM[run,] <- c(run,
                              round(params[1],4),
                              round(params[2],4),
                              round(params[3],4),
                              round(params[4],4), 
                              deviance)
      run <- run + 1
      } 
    
    
    
    # identify the lowest nLL and extract the parameter and compute AIC and BIC
    nLLlowes.oDDM<- min(results.oDDM[,5])
    nLLlowes.eDDM<- min(results.eDDM[,6])
    AIC.oDDM[kSim, kDDMdata] <- nLLlowes.oDDM+2*nParam.oDDM # AIC values when true model is oDDM and fitting oDDM
    AIC.eDDM[kSim, kDDMdata] <- nLLlowes.eDDM+2*nParam.eDDM # AIC values when true model is oDDM and fitting eDDM
    BIC.oDDM[kSim, kDDMdata] <- nLLlowes.oDDM+nParam.oDDM*log(length(oDDMsimu.All[,1])) 
    BIC.eDDM[kSim, kDDMdata] <- nLLlowes.eDDM+nParam.eDDM*log(length(eDDMsimu.All[,1])) 
    
    # AIC.oDDM[, 1] are AIC values when true model is oDDM and fitting oDDM
    # AIC.eDDM[, 1] are AIC values when true model is oDDM and fitting eDDM
    # AIC.oDDM[, 2] are AIC values when true model is eDDM and fitting oDDM
    # AIC.eDDM[, 2] are AIC values when true model is eDDM and fitting eDDM
  }
}
## Summarize model recovery with the confusion matrix
# plot the confusion matrix
library(yardstick) # Install it if you cannot find the package in your R
library(ggplot2) # Install it if you cannot find the package in your R

# Create targets and predictions data frame
TP<- data.frame(
  "simulated" = c(rep(1,kSim),rep(2,kSim)), # suppose you have two models, 1= oDDM, 2=eDDM
  "estimated" = c(apply(cbind(AIC.oDDM[,1],AIC.eDDM[,1]),MARGIN =  1, FUN = which.min), #identify the minimum AIC for each synthetic subject when the true model is oDDM. 
                  apply(cbind(AIC.oDDM[,2],AIC.eDDM[,2]),MARGIN =  1, FUN = which.min)),#identify the minimum AIC for each synthetic subject when the true model is eDDM
  stringsAsFactors = FALSE
)

# transform values to factors
TP$simulated <- as.factor(TP$simulated)
TP$estimated <- as.factor(TP$estimated)

# The confusion matrix 
cm <- conf_mat(TP, simulated, estimated)
autoplot(cm, type = "heatmap")+ theme(legend.position = "right")+ labs(fill="Frequency")

7.3 Parameter recovery

To evaluate a single model and to test if there is an unique mapping between possible data pattern and a corresponding set of parameter estimates, the parameter recovery test is needed. Unlike model recovery, we focus on one model and correlation between true parameter values and estimated parameter values.

# predetermine
nObservation <-200
nParam.eDDM <-4;

# pre-allocation
eParameterSet<- setNames(data.frame(matrix(ncol = 4, nrow = 0)), c("v","a_speed","a_accuracy", "t0"))
Recovery.eDDM <-setNames(data.frame(matrix(ncol = 4, nrow = 0)), c("v","a_speed","a_accuracy", "t0"))

for (kSim in 1:nObservation){
  # ----------------------
  # Simulate data with extended function
  eParameterSet[kSim,] <-c(runif(n=1, min=0.5,  max=1),
                    runif(n=1, min=0.5,  max=2),
                    runif(n=1, min=0.5,  max=2),
                    runif(n=1, min=0.1,  max=0.3))
  
  # pre-allocation
  DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
  DDMsimu.accuracy <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))
  
  # simulation
  DDMsimu.speed <- rdiffusion(nTrial.speed, v = eParameterSet[kSim,1], a = eParameterSet[kSim,2], t0 = eParameterSet[kSim,4])
  DDMsimu.accuracy <- rdiffusion(nTrial.accuracy, v = eParameterSet[kSim,1], a = eParameterSet[kSim,3], t0 = eParameterSet[kSim,4])
  DDMsimu.speed$instruction <- "speed"
  DDMsimu.accuracy$instruction <- "accuracy"
  eDDMsimu.All <- rbind(DDMsimu.speed,DDMsimu.accuracy)
  
  
  # ----------------------
  # model fitting (extended DDM)
  # 1. set up lower and upper boundary for each parameter
  # Also, check if the same range is used for the same parameter
  lb <-c(-5,0.5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
  ub <-c(5,5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
  
  # 2. Create optimization function with an additional input: number of repetition.
  # (The code is adapted from Mikhail Spektor's).
  results.eDDM<-setNames(data.frame(matrix(ncol = 6, nrow = 0)), c("nRepetition","v","a_speed","a_accuracy", "t0 (ms)", "nll"))
  run <- 1
  optimization_runs <-3
  
  # Run the parameter estimation for several times (this will take few seconds/minutes to complete)
  while (run <= optimization_runs){ 
    starting_values <- c(runif(n=1, min=-5,  max=5),
                         runif(n=1, min=0.5,  max=5), # selected from the same range for the same parameter
                         runif(n=1, min=0.5,  max=5), # selected from the same range for the same parameter
                         runif(n=1, min=0,  max=0.1))
    while (eDDM(starting_values,data) == Inf) {starting_values <- c(runif(n=1, min=-5,  max=5),
                                                                    runif(n=1, min=0.5,  max=5),
                                                                    runif(n=1, min=0.5,  max=5),
                                                                    runif(n=1, min=0,  max=0.1))}
    # parameter estimation procedure using nlminb
    fiteDDM <- nlminb(start=starting_values, objective=eDDM,
                      lower = lb, 
                      upper= ub, 
                      dat=eDDMsimu.All) 
    deviance <- fiteDDM[["objective"]]
    params <- fiteDDM[["par"]]
    
    # save results for each iterate
    results.eDDM[run,] <- c(run,
                            round(params[1],4),
                            round(params[2],4),
                            round(params[3],4),
                            round(params[4]*1000), 
                            deviance)
    run <- run + 1
  } 
  
  # identify the lowest nLL and extract the parameter and compute AIC and BIC
  id<- which.min(results.eDDM[,5])
  Recovery.eDDM[kSim,] <- results.eDDM[id,2:5]
}

Next, we can run the correlation analysis

#the correlation between true and recovered/estimated parameters
round(cor(Recovery.eDDM,eParameterSet), 2)