Week 5 Parameter Estimation2
We will run the model fitting again and improve the optimization method a bit. Briefly say, we will repeat the parameter estimation with a number of different starting values and check if the estimates are always the same.
This optimization procedure
# Week 5: Parameter estimation
# First and Last name
# date
- To start the new project, clear working environment with
rm(list=ls())
.
# clear working environment
rm(list=ls())
- Check if you have installed packages
# Open "rtdists" package
library(rtdists)
5.1 Maximum Likelihood (ML)
Toss a coin 20 times (N=20) and observe 10 Heads. What is the likelihood of observing the data? To compute it, we can compute likelihood for each hypothetical θ. Here we compute likelihood for θ = .5 and θ = .75.
<- .5^20
Likelihood.theta50<- .75^10 *.25^10
Likelihood.theta75<-Likelihood.theta50/Likelihood.theta75
Likelihood.Ratio Likelihood.Ratio
## [1] 17.75773
We can compute the Likelihood by our created likelihood function (see week2 for details).
# Create a LF function given binomial model
# inputs: theta, x and N
# output: Likelihood
<- function(LF.theta,x,N){ dbinom(x,N,LF.theta) }
LF <- LF(0.5, 10, 20)
LF50 <- LF(0.75, 10, 20)
LF75 LF50
## [1] 0.1761971
LF75
## [1] 0.009922275
<-LF50/LF75
LFratio LFratio
## [1] 17.75773
From the examples above, you might notice that the likelihood values are meaningful when you make comparison between them (e.g., likelihood ratio).
5.2 ML in DDM
# Load data from the "rtdists" package
data(rr98)
# Double-click rr98 in your Environment panel (right-top window)
# In the dataset, there are 3 participants, who completed ~ 4000 trials in each condition (speed/accuracy).
# Preprocess data
<- rr98[!rr98$outlier,] # remove outliers, as in original paper
rr98 'RT'] <- rr98$rt
rr98[,'accuracy'] = 0 # add a new column labeled as accuracy, where incorrect responses are coded as 0
rr98[,$correct, 'accuracy'] = 1 # and correct responses are coded as 1
rr98[rr98$choice = rr98$response
rr98$response = as.character(rr98$response)
rr98$correct == 1, "response"] = "upper"
rr98[rr98$correct == 0, "response"] = "lower"
rr98[rr98head(rr98)
# Create a deviance function to quantify the difference between predicted and actual data given model and parameter values
# That is, we compute deviance as log likelihood: how likely the set of parameter values is true given data and model?
# Note1: The log likelihood is computed separately for correct and incorrect trials.
# Note2: The ddiffusion model is the model, which requires rt, response and DDM-related parameters
<- function(pars, dat)
diffusion_deviance2
{<- sum(log(ddiffusion(rt=dat[dat["accuracy"] == 1, "rt"],
log_lik response="upper", v=pars[1], a=pars[2], t0=pars[3], z=pars[2]/2, s= 1)))
<- log_lik +
log_lik sum(log(ddiffusion(rt=dat[dat["accuracy"] == 0, "rt"],
response="lower", v=pars[1], a=pars[2], t0=pars[3], z=pars[2]/2, s= 1)))
if (any((-2*log_lik == 0))) return(1e6)
return(-2*log_lik)
}
Last week, we estimated parameters for each subject as followed:
# Initialization
<-0
n
# Pre-allocation (i.e., create an empty data frame and name each column)
<-setNames(data.frame(matrix(ncol = 7, nrow = 0)), c("name", "Mean RT (ms)","Mean Accuracy","v", "a","t0 (ms)", "nll"))
results
# Main for loop to estimate parameters for each subject
for (pp in unique(rr98$id)) {
print(pp) # display the current subject in the Console window, so that you know the current progress
= rr98[rr98$id == pp,]
data_pp
<- nlminb(start=starting_values,
fit objective=diffusion_deviance2,
lower=lb,# the lower boundary for each parameter.
upper=ub, # the upper boundary for each parameter.
dat=data_pp)
<- fit[["objective"]]
deviance <- fit[["par"]]
params
# store result for the first to the last subject
<- n+1 #update the current number of subject
n <- c(pp, #subject's name
results[n,] round(agg.id[n,2]*1000), #Mean RT (ms)
3], #Mean Accuracy
agg.id[n,round(params[1],2), #drift rate
round(params[2],2), #threshold
round(params[3]*1000), #non-decision time (ms)
#negative log likelihood
deviance)
} results
This week, we will repeat the optimization procedure more than once and check if the results are consistent.
# 1. set up lower and upper boundary for each parameter
<-c(-5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
lb <-c(5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
ub
# 2. Create optimization function with an additional input: number of repetition.
# (The code is adapted from Mikhail Spektor's).
<-setNames(data.frame(matrix(ncol = 5, nrow = 0)), c("nRepetition","v", "a","t0 (ms)", "nll"))
results<- 1
run <-10
optimization_runs
# Run the parameter estimation for several times (this will take few seconds/minutes to complete)
while (run <= optimization_runs){
<- c(runif(n=1, min=-5, max=5),
starting_values runif(n=1, min=0.5, max=5),
runif(n=1, min=0, max=0.1))
<- nlminb(start=starting_values, objective=diffusion_deviance2,
fit lower = lb,
upper= ub,
dat=rr98)
<- fit[["objective"]]
deviance <- fit[["par"]]
params
# save results (i.e., run#, a, v, t0, Discrepancy) for each iterate
<- c(run,
results[run,] round(params[1],2),
round(params[2],2),
round(params[3]*1000),
deviance)<- run + 1}
run
# display results in the Console window
results