Week 7 Model evaluation II
There are multiple ways to evaluate models. What we have learned so far is Information Criteria (e.g., AIC and BIC). Instead of using NLL (which quantifies goodness-of-fit), we also punish model complexity given number of parameters.
Today, we will learn three more methods to evaluate models:
1. Ex post simulation
2. model recovery (using two or two more models)
3. parameter recovery (focusing on one model)
# Week 7: Model evaluation II
# First and Last name
# date
- To start the new project, clear working environment with
rm(list=ls())
.
# clear working environment
rm(list=ls())
- Load the package
# Open "rtdists" package
library(rtdists)
- Load data
# Load data from the "rtdists" package
data(rr98)
# Double-click rr98 in your Environment panel (right-top window)
# In the dataset, there are 3 participants, who completed ~ 4000 trials in each condition (speed/accuracy).
- Preprocess data to make sure the data and data structure are suitable for follow-up model fitting.
# Preprocess data
<- rr98[!rr98$outlier,] # remove outliers, as in original paper
rr98 'RT'] <- rr98$rt
rr98[,'accuracy'] = 0 # add a new column labeled as accuracy, where incorrect responses are coded as 0
rr98[,$correct, 'accuracy'] = 1 # and correct responses are coded as 1
rr98[rr98$choice = rr98$response
rr98$response = as.character(rr98$response)
rr98$correct == 1, "response"] = "upper"
rr98[rr98$correct == 0, "response"] = "lower"
rr98[rr98head(rr98)
7.1 Ex Post Simulation
After the model comparison using AIC and/or BIC, we only recognize the relatively good model with relative balance of goodness-of-fit and complexity). To further verify if the model captures the important behavioral patterns, we use the estimated parameter values to run the simulation and ideally, reproduce the patterns of interest.
# Extract the data we need for simulating data using DDM1 (with varied drift rate)
# if drift rate is divided by instruction manipulation rather than strength and the order of manipulations, then the only information influencing model simulation is number of trials for each manipulation.
<- sum(rr98['instruction'] == 'speed')
nTrial.speed <- sum(rr98['instruction'] == 'accuracy')
nTrial.accuracy
nTrial.speed nTrial.accuracy
7.1.1 DDM1: Simulate data
First, we simulate data with estimated parameters using rdiffusion, which requires number of data and parameters.
# load the result
load("Week6_DDMresults.Rdata")
# Simulate data with existing function
<- rdiffusion(nTrial.speed, v = results.DDM1[1,2], a = results.DDM1[1,4], t0 = results.DDM1[1,5]/1000)
DDM1simu.speed <- rdiffusion(nTrial.accuracy, v = results.DDM1[1,3], a = results.DDM1[1,4], t0 = results.DDM1[1,5]/1000) DDM1simu.accuracy
Second, summarize the accuracy rate.
#summarize accuracy rate for simulated data and real data
sum(DDM1simu.speed["response"] == "upper")/length(DDM1simu.speed[,1])
sum(DDM1simu.accuracy["response"] == "upper")/length(DDM1simu.speed[,1])
sum(rr98["instruction"] == "speed" & rr98["response"] == "upper")/sum(rr98["instruction"] == "speed")
sum(rr98["instruction"] == "accuracy" & rr98["response"] == "upper")/sum(rr98["instruction"] == "accuracy")
Third, we can then visualize the simulated and real data via either histogram or ttest or any function you want to compare the real and simulated data. Agian, ideally the simulated data from the model should be able to reproduce important behavioral patterns/effects.
# Visualize the simulated and real data (adapt from Jannnick's code)
#Plot RTs distributions
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
###RTs in speed condition
## real data
= rr98["instruction"] == "speed"
ID.speed hist(rr98[ID.speed,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Speed condition (real data)"), col = c("gray"))
abline(v=mean(rr98[ID.speed,"rt"]*1000),lwd=5, lty=2) #Add abline to visualize the mean of RT
## simulated data
hist(DDM1simu.speed[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Speed condition (simulated data)"), col = c("blue"))
#Add abline to visualize the mean of RT
abline(v=mean(DDM1simu.speed[,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT
###RTs in accuracy condition
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
= rr98["instruction"] == "accuracy"
ID.accuracy hist(rr98[ID.accuracy,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Accuracy condition (real data)"), col = c("gray"))
abline(v=mean(rr98[ID.accuracy,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT
## simulated data
hist(DDM1simu.accuracy[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Accuracy condition (simulated data)"), col = c("green"))
abline(v=mean(DDM1simu.accuracy[,"rt"]*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT
###RTs in both conditions
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
hist(rr98[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Real data regardless of condition"), col = c("gray"))
abline(v=mean(rr98[,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT
## simulated data
hist(c(DDM1simu.speed[,"rt"], DDM1simu.accuracy[,"rt"])*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM1: varied drift. Simulated data regardless of condition"), col = c("yellow"))
abline(v=mean(c(DDM1simu.speed[,"rt"], DDM1simu.accuracy[,"rt"])*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT
7.1.2 DDM2: Simulate data
Here, we repeat the same procedure to test DDM2.
# Simulate data with existing function
<- rdiffusion(nTrial.speed, v = results.DDM2[1,2], a = results.DDM2[1,3], t0 = results.DDM2[1,5]/1000)
DDM2simu.speed <- rdiffusion(nTrial.accuracy, v = results.DDM2[1,2], a = results.DDM2[1,4], t0 = results.DDM2[1,5]/1000) DDM2simu.accuracy
#summarize accuracy rate for simulated data and real data
sum(DDM2simu.speed["response"] == "upper")/length(DDM2simu.speed[,1])
sum(DDM2simu.accuracy["response"] == "upper")/length(DDM2simu.speed[,1])
sum(rr98["instruction"] == "speed" & rr98["response"] == "upper")/sum(rr98["instruction"] == "speed")
sum(rr98["instruction"] == "accuracy" & rr98["response"] == "upper")/sum(rr98["instruction"] == "accuracy")
# Visualize the simulated and real data (adapt from Jannnick's code)
#Plot RTs distributions
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
###RTs in speed condition
## real data
= rr98["instruction"] == "speed"
ID.speed hist(rr98[ID.speed,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Speed condition (real data)"), col = c("gray"))
#Add abline to visualize the mean of RT
abline(v=mean(rr98[ID.speed,"rt"]*1000),lwd=5, lty=2)
## simulated data
hist(DDM2simu.speed[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Speed condition (simulated data)"), col = c("blue"))
#Add abline to visualize the mean of RT
abline(v=mean(DDM2simu.speed[,"rt"]*1000),lwd=5, lty=2)
###RTs in accuracy condition
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
= rr98["instruction"] == "accuracy"
ID.accuracy hist(rr98[ID.accuracy,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Accuracy condition (real data)"), col = c("gray"))
abline(v=mean(rr98[ID.accuracy,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT
## simulated data
hist(DDM2simu.accuracy[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2: varied threshold. Accuracy condition (simulated data)"), col = c("green"))
abline(v=mean(DDM2simu.accuracy[,"rt"]*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT
###RTs in both conditions
## real data
par(mfrow=c(2,1)) #show multiple plots in 1 Graphic
hist(rr98[,"rt"]*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2. Real data regardless of condition"), col = c("gray"))
abline(v=mean(rr98[,"rt"]*1000),lwd=5, lty=2)#Add abline to visualize the mean of RT
## simulated data
hist(c(DDM2simu.speed[,"rt"], DDM2simu.accuracy[,"rt"])*1000,xlab="RT (ms)", xlim=c(200,1000), breaks="FD", main=paste("DDM2. Simulated data regardless of condition"), col = c("yellow"))
abline(v=mean(c(DDM2simu.speed[,"rt"], DDM2simu.accuracy[,"rt"])*1000),lwd=5,lty=2)#Add abline to visualize the mean of RT
According to the visual inspection, the difference between real RT distribution and simulated RT distribution are similar if we didn’t split the RT distribution based on the instruction manipulation. This is the case for both DDM1 and DDM2. When we split the data into speed condition and accuracy condition, the difference is more obvious if data is simulated by DDM1, which is not worse model in the model space.
7.2 Model recovery
Model recovery/ identifiability assesses whether the model selection criteria (e.g., AIC, BIC) can correctly identify the “true” model used to generate the synthetic data. If each model in the model space generates and captures unique behavioral patterns, then it is not difficult to find out the “true” model given simulated data/patterns. But of course, the recovery ability also relies on the model selection criteria. For example,BIC prefers simpler models more than AIC (Wagenmakers and Farrell, 2004). In this section, we will walk through the model recovery:
- Choose and build two (or more) models in your model space
- Simulate data with each model and with proper range of parameter values
- Fit simulated dataset to all the model and compute model comaprison criteria (e.g., nLL/AIC/BIC …)
- Summarize model recovery with a confusion matrix, which quantifies the probability or frequency that each model is the best fit to data generated from the other models (Wilson, R. C., & Collins, A. G., 2019)
7.2.1 Build up models
In this section, we will only include two models in our model space. These two models are original DDM (oDDM) with only three free parameters and extended DDM (eDDM) with varied thresholds.
## First model: oDDM with only three free parameters: v, a, t0
<- function(pars, dat)
oDDM
{<- sum(log(ddiffusion(rt=dat[dat["response"] == "upper", "rt"],
log_lik response="upper", v=pars[1], a=pars[2], t0=pars[3], z=pars[2]/2, s= 1)))
<- log_lik +
log_lik sum(log(ddiffusion(rt=dat[dat["response"] == "lower", "rt"],
response="lower", v=pars[1], a=pars[2], t0=pars[3], z=pars[2]/2, s= 1)))
if (any((-2*log_lik == 0))) return(1e6)
return(-2*log_lik)
}
Extend the model above by splitting threshold or a based on instruction manipulations.
## Second model: DDM2, which extends the oDDM by splitting threshold based on instruction manipulations
# parameters: v, a_speed, a_accuracy, t0
# Note: order of parameter matters!! always keep tracking the order of parameter.
<- function(pars, dat)
eDDM
{<- sum(log(ddiffusion(rt=dat[dat["response"] == "upper" & dat["instruction"] == "speed", "rt"],
log_lik response="upper", v=pars[1], a=pars[2], t0=pars[4])))
<- log_lik +
log_lik sum(log(ddiffusion(rt=dat[dat["response"] == "upper" & dat["instruction"] == "accuracy", "rt"],
response="upper", v=pars[1], a=pars[3], t0=pars[4])))
<- log_lik +
log_lik sum(log(ddiffusion(rt=dat[dat["response"] == "lower" & dat["instruction"] == "speed", "rt"],
response="lower", v=pars[1], a=pars[2], t0=pars[4])))
<- log_lik +
log_lik sum(log(ddiffusion(rt=dat[dat["response"] == "lower" & dat["instruction"] == "accuracy", "rt"],
response="lower", v=pars[1], a=pars[3], t0=pars[4])))
if (any((-2*log_lik == 0))) return(1e6)
return(-2*log_lik)
}
7.2.2 Simulation
Imagine you are running an experiment with only one subject, who is instructed to make decision (e.g., monitor is bright or dark) as fast as possible for 500 trials and to make decision as accurate as possible for the rest of 500 trials.
Like Ex-Post simulation introduced in week7, we simulate data based on the models of interest.
Unlike Ex-Post simulation, we determine number of trials and sets of parameters by ourselves.
1. Determine number of trials
# number of trials in each experiment
<- 500
nTrial.speed <- 500 nTrial.accuracy
- Determine a set of parameter values
# Determine a set of parameter values
<-c(runif(n=1, min=0.5, max=1),
ParameterSet runif(n=1, min=0.5, max=2),
runif(n=1, min=0.1, max=0.3))
- Simulate data given model and set of parameter values
# Simulate data given model and set of parameter values
# pre-allocation
<-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))
DDMsimu.accuracy
# Simulation
<- rdiffusion(nTrial.speed, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])
DDMsimu.speed <- rdiffusion(nTrial.accuracy, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])
DDMsimu.accuracy
# Organize simulated data better by including the condition
$instruction <- "speed"
DDMsimu.speed$instruction <- "accuracy" DDMsimu.accuracy
7.2.3 Simulation and model fitting
What if we conduct the same experiment with 200 synthetic subjects and with two hypothetical models? Simple! We just need to repeat the same simulation procedure 200 times for each model. This repetition can be addressed by the for function. In order to mimic the individual difference on information processing, we use different sets of parameter values for each synthetic subject in each model. The simulation for each subject is then followed by a model fitting.
This exercise will take ~10 min. You will see the current progress on the Console window.
## Simulation and model fitting
# predetermine
<-200 # number of samples
nObservation <-3; # number of parameters in oDDM
nParam.oDDM <-4; # number of parameters in eDDM
nParam.eDDM
# pre-allocation
<- matrix(data=NA,nrow=nObservation,ncol=2)
AIC.oDDM<- matrix(data=NA,nrow=nObservation,ncol=2)
AIC.eDDM<- matrix(data=NA,nrow=nObservation,ncol=2)
BIC.oDDM<- matrix(data=NA,nrow=nObservation,ncol=2)
BIC.eDDM
# for loop to run the experiment N times. Each time we simulate data with both oDDM and eDDM.
for (kSim in 1:nObservation) {
print(kSim/nObservation) # update current progress in the Console window
# ----------------------
# Simulate data with original DDM
<-c(runif(n=1, min=0.5, max=1),
ParameterSet runif(n=1, min=0.5, max=2),
runif(n=1, min=0.1, max=0.3))
# pre-allocation
<-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))
DDMsimu.accuracy
# simulation
<- rdiffusion(nTrial.speed, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])
DDMsimu.speed <- rdiffusion(nTrial.accuracy, v = ParameterSet[1], a = ParameterSet[2], t0 = ParameterSet[3])
DDMsimu.accuracy $instruction <- "speed"
DDMsimu.speed$instruction <- "accuracy"
DDMsimu.accuracy<- rbind(DDMsimu.speed,DDMsimu.accuracy)
oDDMsimu.All
# ----------------------
# Simulate data with extended function
<-c(runif(n=1, min=0.5, max=1),
eParameterSet runif(n=1, min=0.5, max=2),
runif(n=1, min=0.5, max=2),
runif(n=1, min=0.1, max=0.3))
# pre-allocation
<-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))
DDMsimu.accuracy
# simulation
<- rdiffusion(nTrial.speed, v = eParameterSet[1], a = eParameterSet[2], t0 = eParameterSet[4])
DDMsimu.speed <- rdiffusion(nTrial.accuracy, v = eParameterSet[1], a = eParameterSet[3], t0 = eParameterSet[4])
DDMsimu.accuracy $instruction <- "speed"
DDMsimu.speed$instruction <- "accuracy"
DDMsimu.accuracy<- rbind(DDMsimu.speed,DDMsimu.accuracy)
eDDMsimu.All
# ----------------------
# model fitting (original DDM)
for (kDDMdata in 1:2) {
if (kDDMdata ==1){
<- oDDMsimu.All
data else {
} <- eDDMsimu.All
data
}
# 1. set up lower and upper boundary for each parameter
# Also, check if the same range is used for the same parameter
<-c(-5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
lb <-c(5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
ub
# 2. Create optimization function with an additional input: number of repetition.
# (The code is adapted from Mikhail Spektor's).
<-setNames(data.frame(matrix(ncol = 5, nrow = 0)), c("nRepetition","v", "a","t0 (ms)", "nll"))
results.oDDM<- 1
run <-3
optimization_runs
# Run the parameter estimation for several times (this will take few seconds/minutes to complete)
while (run <= optimization_runs){
<- c(runif(n=1, min=-5, max=5),
starting_values runif(n=1, min=0.5, max=5),
runif(n=1, min=0, max=0.1))
while (oDDM(starting_values,data) == Inf) {starting_values <- c(runif(n=1, min=-5, max=5),
runif(n=1, min=0.5, max=5),
runif(n=1, min=0, max=0.1))}
# parameter estimation procedure using nlminb
<- nlminb(start=starting_values, objective=oDDM,
fitDDM lower = lb,
upper= ub,
dat=data)# the data is the one we simulated above
<- fitDDM[["objective"]]
deviance <- fitDDM[["par"]]
params
# store results for each iterate
<- c(run,
results.oDDM[run,] round(params[1],2),
round(params[2],2),
round(params[3],2),
deviance)<- run + 1}
run
# ----------------------
# model fitting (extended DDM)
# 1. set up lower and upper boundary for each parameter
# Also, check if the same range is used for the same parameter
<-c(-5,0.5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
lb <-c(5,5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
ub
# 2. Create optimization function with an additional input: number of repetition.
# (The code is adapted from Mikhail Spektor's).
<-setNames(data.frame(matrix(ncol = 6, nrow = 0)), c("nRepetition","v","a_speed","a_accuracy", "t0 (ms)", "nll"))
results.eDDM<- 1
run <-3
optimization_runs
# Run the parameter estimation for several times (this will take few seconds/minutes to complete)
while (run <= optimization_runs){
<- c(runif(n=1, min=-5, max=5),
starting_values runif(n=1, min=0.5, max=5), # selected from the same range for the same parameter
runif(n=1, min=0.5, max=5), # selected from the same range for the same parameter
runif(n=1, min=0, max=0.1))
while (eDDM(starting_values,data) == Inf) {starting_values <- c(runif(n=1, min=-5, max=5),
runif(n=1, min=0.5, max=5),
runif(n=1, min=0.5, max=5),
runif(n=1, min=0, max=0.1))}
# parameter estimation procedure using nlminb
<- nlminb(start=starting_values, objective=eDDM,
fiteDDM lower = lb,
upper= ub,
dat=data)
<- fiteDDM[["objective"]]
deviance <- fiteDDM[["par"]]
params
# store results for each iterate
<- c(run,
results.eDDM[run,] round(params[1],4),
round(params[2],4),
round(params[3],4),
round(params[4],4),
deviance)<- run + 1
run
}
# identify the lowest nLL and extract the parameter and compute AIC and BIC
<- min(results.oDDM[,5])
nLLlowes.oDDM<- min(results.eDDM[,6])
nLLlowes.eDDM<- nLLlowes.oDDM+2*nParam.oDDM # AIC values when true model is oDDM and fitting oDDM
AIC.oDDM[kSim, kDDMdata] <- nLLlowes.eDDM+2*nParam.eDDM # AIC values when true model is oDDM and fitting eDDM
AIC.eDDM[kSim, kDDMdata] <- nLLlowes.oDDM+nParam.oDDM*log(length(oDDMsimu.All[,1]))
BIC.oDDM[kSim, kDDMdata] <- nLLlowes.eDDM+nParam.eDDM*log(length(eDDMsimu.All[,1]))
BIC.eDDM[kSim, kDDMdata]
# AIC.oDDM[, 1] are AIC values when true model is oDDM and fitting oDDM
# AIC.eDDM[, 1] are AIC values when true model is oDDM and fitting eDDM
# AIC.oDDM[, 2] are AIC values when true model is eDDM and fitting oDDM
# AIC.eDDM[, 2] are AIC values when true model is eDDM and fitting eDDM
} }
## Summarize model recovery with the confusion matrix
# plot the confusion matrix
library(yardstick) # Install it if you cannot find the package in your R
library(ggplot2) # Install it if you cannot find the package in your R
# Create targets and predictions data frame
<- data.frame(
TP"simulated" = c(rep(1,kSim),rep(2,kSim)), # suppose you have two models, 1= oDDM, 2=eDDM
"estimated" = c(apply(cbind(AIC.oDDM[,1],AIC.eDDM[,1]),MARGIN = 1, FUN = which.min), #identify the minimum AIC for each synthetic subject when the true model is oDDM.
apply(cbind(AIC.oDDM[,2],AIC.eDDM[,2]),MARGIN = 1, FUN = which.min)),#identify the minimum AIC for each synthetic subject when the true model is eDDM
stringsAsFactors = FALSE
)
# transform values to factors
$simulated <- as.factor(TP$simulated)
TP$estimated <- as.factor(TP$estimated)
TP
# The confusion matrix
<- conf_mat(TP, simulated, estimated)
cm autoplot(cm, type = "heatmap")+ theme(legend.position = "right")+ labs(fill="Frequency")
7.3 Parameter recovery
To evaluate a single model and to test if there is an unique mapping between possible data pattern and a corresponding set of parameter estimates, the parameter recovery test is needed. Unlike model recovery, we focus on one model and correlation between true parameter values and estimated parameter values.
# predetermine
<-200
nObservation <-4;
nParam.eDDM
# pre-allocation
<- setNames(data.frame(matrix(ncol = 4, nrow = 0)), c("v","a_speed","a_accuracy", "t0"))
eParameterSet<-setNames(data.frame(matrix(ncol = 4, nrow = 0)), c("v","a_speed","a_accuracy", "t0"))
Recovery.eDDM
for (kSim in 1:nObservation){
# ----------------------
# Simulate data with extended function
<-c(runif(n=1, min=0.5, max=1),
eParameterSet[kSim,] runif(n=1, min=0.5, max=2),
runif(n=1, min=0.5, max=2),
runif(n=1, min=0.1, max=0.3))
# pre-allocation
<-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.speed)), c("rt", "response"))
DDMsimu.speed <-setNames(data.frame(matrix(ncol = 2, nrow = nTrial.accuracy)), c("rt", "response"))
DDMsimu.accuracy
# simulation
<- rdiffusion(nTrial.speed, v = eParameterSet[kSim,1], a = eParameterSet[kSim,2], t0 = eParameterSet[kSim,4])
DDMsimu.speed <- rdiffusion(nTrial.accuracy, v = eParameterSet[kSim,1], a = eParameterSet[kSim,3], t0 = eParameterSet[kSim,4])
DDMsimu.accuracy $instruction <- "speed"
DDMsimu.speed$instruction <- "accuracy"
DDMsimu.accuracy<- rbind(DDMsimu.speed,DDMsimu.accuracy)
eDDMsimu.All
# ----------------------
# model fitting (extended DDM)
# 1. set up lower and upper boundary for each parameter
# Also, check if the same range is used for the same parameter
<-c(-5,0.5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
lb <-c(5,5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
ub
# 2. Create optimization function with an additional input: number of repetition.
# (The code is adapted from Mikhail Spektor's).
<-setNames(data.frame(matrix(ncol = 6, nrow = 0)), c("nRepetition","v","a_speed","a_accuracy", "t0 (ms)", "nll"))
results.eDDM<- 1
run <-3
optimization_runs
# Run the parameter estimation for several times (this will take few seconds/minutes to complete)
while (run <= optimization_runs){
<- c(runif(n=1, min=-5, max=5),
starting_values runif(n=1, min=0.5, max=5), # selected from the same range for the same parameter
runif(n=1, min=0.5, max=5), # selected from the same range for the same parameter
runif(n=1, min=0, max=0.1))
while (eDDM(starting_values,data) == Inf) {starting_values <- c(runif(n=1, min=-5, max=5),
runif(n=1, min=0.5, max=5),
runif(n=1, min=0.5, max=5),
runif(n=1, min=0, max=0.1))}
# parameter estimation procedure using nlminb
<- nlminb(start=starting_values, objective=eDDM,
fiteDDM lower = lb,
upper= ub,
dat=eDDMsimu.All)
<- fiteDDM[["objective"]]
deviance <- fiteDDM[["par"]]
params
# save results for each iterate
<- c(run,
results.eDDM[run,] round(params[1],4),
round(params[2],4),
round(params[3],4),
round(params[4]*1000),
deviance)<- run + 1
run
}
# identify the lowest nLL and extract the parameter and compute AIC and BIC
<- which.min(results.eDDM[,5])
id<- results.eDDM[id,2:5]
Recovery.eDDM[kSim,] }
Next, we can run the correlation analysis
#the correlation between true and recovered/estimated parameters
round(cor(Recovery.eDDM,eParameterSet), 2)