# Week 5 Parameter Estimation2

We will run the model fitting again and improve the optimization method a bit. Briefly say, we will repeat the parameter estimation with a number of different starting values and check if the estimates are always the same.

# Week 5: Parameter estimation
# First and Last name
# date
• To start the new project, clear working environment with rm(list=ls()).
# clear working environment
rm(list=ls())
• Check if you have installed packages
# Open "rtdists" package
library(rtdists)

## 5.1 Maximum Likelihood (ML)

Toss a coin 20 times (N=20) and observe 10 Heads. What is the likelihood of observing the data? To compute it, we can compute likelihood for each hypothetical θ. Here we compute likelihood for θ = .5 and θ = .75.

Likelihood.theta50<- .5^20
Likelihood.theta75<- .75^10 *.25^10
Likelihood.Ratio<-Likelihood.theta50/Likelihood.theta75
Likelihood.Ratio
##  17.75773

We can compute the Likelihood by our created likelihood function (see week2 for details).

# Create a LF function given binomial model
# inputs: theta, x and N
# output: Likelihood
LF <- function(LF.theta,x,N){ dbinom(x,N,LF.theta) }
LF50 <- LF(0.5, 10, 20)
LF75 <- LF(0.75, 10, 20)
LF50
##  0.1761971
LF75
##  0.009922275
LFratio <-LF50/LF75
LFratio
##  17.75773

From the examples above, you might notice that the likelihood values are meaningful when you make comparison between them (e.g., likelihood ratio).

## 5.2 ML in DDM

# Load data from the "rtdists" package
data(rr98)
# Double-click rr98 in your Environment panel (right-top window)
# In the dataset, there are 3 participants, who completed ~ 4000 trials in each condition (speed/accuracy).
# Preprocess data
rr98 <- rr98[!rr98$outlier,] # remove outliers, as in original paper rr98[,'RT'] <- rr98$rt
rr98[,'accuracy'] = 0 # add a new column labeled as accuracy, where incorrect responses are coded as 0
rr98[rr98$correct, 'accuracy'] = 1 # and correct responses are coded as 1 rr98$choice = rr98$response rr98$response = as.character(rr98$response) rr98[rr98$correct == 1, "response"] = "upper"
rr98[rr98$correct == 0, "response"] = "lower" head(rr98) # Create a deviance function to quantify the difference between predicted and actual data given model and parameter values # That is, we compute deviance as log likelihood: how likely the set of parameter values is true given data and model? # Note1: The log likelihood is computed separately for correct and incorrect trials. # Note2: The ddiffusion model is the model, which requires rt, response and DDM-related parameters diffusion_deviance2 <- function(pars, dat) { log_lik <- sum(log(ddiffusion(rt=dat[dat["accuracy"] == 1, "rt"], response="upper", v=pars, a=pars, t0=pars, z=pars/2, s= 1))) log_lik <- log_lik + sum(log(ddiffusion(rt=dat[dat["accuracy"] == 0, "rt"], response="lower", v=pars, a=pars, t0=pars, z=pars/2, s= 1))) if (any((-2*log_lik == 0))) return(1e6) return(-2*log_lik) } Last week, we estimated parameters for each subject as followed: # Initialization n<-0 # Pre-allocation (i.e., create an empty data frame and name each column) results<-setNames(data.frame(matrix(ncol = 7, nrow = 0)), c("name", "Mean RT (ms)","Mean Accuracy","v", "a","t0 (ms)", "nll")) # Main for loop to estimate parameters for each subject for (pp in unique(rr98$id)) {
print(pp) # display the current subject in the Console window, so that you know the current progress

data_pp = rr98[rr98\$id == pp,]

fit <- nlminb(start=starting_values,
objective=diffusion_deviance2,
lower=lb,# the lower boundary for each parameter.
upper=ub, # the upper boundary for each parameter.
dat=data_pp)
deviance <- fit[["objective"]]
params <- fit[["par"]]

# store result for the first to the last subject
n <- n+1 #update the current number of subject
results[n,] <- c(pp,                      #subject's name
round(agg.id[n,2]*1000), #Mean RT (ms)
agg.id[n,3],             #Mean Accuracy
round(params,2),      #drift rate
round(params,2),      #threshold
round(params*1000),   #non-decision time (ms)
deviance)                #negative log likelihood
}
results

This week, we will repeat the optimization procedure more than once and check if the results are consistent.

# 1. set up lower and upper boundary for each parameter
lb <-c(-5,0.5,0.1) # Lower Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time
ub <-c(5,5,0.5) # Upper Boundary for each parameter. Order is important. Here, drift rate, threshold and non-decision time

# 2. Create optimization function with an additional input: number of repetition.
# (The code is adapted from Mikhail Spektor's).
results<-setNames(data.frame(matrix(ncol = 5, nrow = 0)), c("nRepetition","v", "a","t0 (ms)", "nll"))
run <- 1
optimization_runs <-10

# Run the parameter estimation for several times (this will take few seconds/minutes to complete)
while (run <= optimization_runs){
starting_values <- c(runif(n=1, min=-5,  max=5),
runif(n=1, min=0.5,  max=5),
runif(n=1, min=0,  max=0.1))
fit <- nlminb(start=starting_values, objective=diffusion_deviance2,
lower = lb,
upper= ub,
dat=rr98)
deviance <- fit[["objective"]]
params <- fit[["par"]]

# save results (i.e., run#, a, v, t0, Discrepancy) for each iterate
results[run,] <- c(run,
round(params,2),
round(params,2),
round(params*1000),
deviance)
run <- run + 1}

# display results in the Console window
results