Chapter 9 Advanced
9.1 Reinforcement Learning EAMs (RL-EAMs)
Data from experiment 1 of https://elifesciences.org/articles/63055 55 participants performing 52 trials on each of 4 pairs of stimuli with different levels of probabilistic reinforcement for or choosing one or other pair member.
rm(list=ls())
setwd("~/emc_advanced/")
source("emc/emc.R")
print(load("Data/mileticExp1.RData"))
## [1] "dat"
We need to first do a couple of operations on the data.
<- dat[!dat$excl,] # Excluded participants
data $pp <- factor(as.character(data$pp))
data<- data[!is.na(data$rt),]
data # Ignore presentation side, low/high reward pairs are
# 0.6(0.8-0.2) = tT, 0.4(0.7-0.3) = eD, 0.3(.65 - 0.35) = hJ 0.2(0.6 - 0.4) = kK
# Make S factor in order of increasing ease (.2,.3,.4,.6)
$S <- factor(paste(data$low_stim,data$high_stim,sep="_"),levels=c("k_K","h_J","e_D","t_T"))
data<- data[,c("pp","TrialNumber","S","reward","choiceIsHighP","rt")]
data $reward <- data$reward/100
datanames(data) <- c("subjects","trials","S","reward","R","rt")
$R <- factor(data$R,labels=c("low","high"))
datahead(data)
## subjects trials S reward R rt
## 3121 7 1 e_D 1 high 0.750092
## 3122 7 2 t_T 0 low 0.866725
## 3123 7 3 e_D 0 high 1.100030
## 3124 7 4 h_J 0 high 0.708232
## 3125 7 5 k_K 1 high 1.174540
## 3126 7 6 h_J 1 high 1.483390
Looking at the data, the two lowest probability pairs not much different;
round(tapply(data$R=="high",data$S,mean),2)
## k_K h_J e_D t_T
## 0.66 0.67 0.74 0.86
This example collapses across stimulus side, so just designates stimuli as high vs. low (reward probability).
9.1.1 Racing diffusion model with reinforcement learning
First we need to load in this particular RL model.
## [1] "dat"
source("models/RACE/RDM/rdmRL.R")
We now need to add a special component of design for adaptive models here, with only a component for adapting stimulus value, but architecture allows other components. Note that the adapt design requires reward Fcovariate
and Ffunctions
making one column for each stimulus (e.g., two columns for 2AFC) and corresponding columns names (p_ + stimulus name
) with the probability of getting a reward.
<- list(
adapt stimulus=list(
# 2AFC stimulus components whose value is to be adapted
targets=matrix(c("K","k","J","h","D","e","T","t"),nrow=2,ncol=4,
dimnames=list(c("high","low"),c("k_K","h_J","e_D","t_T"))),
# adaptive equation, creating "Q" values for each stimulus component
init_par=c("q0"),
adapt_par=c("alpha"),
feedback=c("reward"),
adapt_fun=function(Qlast,adapt_par,reward) # new Q value
+ adapt_par*(reward - Qlast),
Qlast # Output equation, uses Q values and parameters to update rates for stimulus components
output_par_names=c("v0","w"),
output_name = c("v"),
output_fun=function(output_pars,Q) # rate based on Q value
1] + output_pars[,2]*Q
output_pars[,
),fixed_pars=c("B","t0","A") # Non-adapted parameters
)
<- function(d){factor(unlist(lapply(strsplit(as.character(d$S),"_"),function(x){x[[1]]})))}
sLow <- function(d){factor(unlist(lapply(strsplit(as.character(d$S),"_"),function(x){x[[2]]})))}
sHigh <- function(d){levels(d$S) <- 1-c(.6,.65,.7,.8); as.numeric(as.character(d$S))}
pLow <- function(d){levels(d$S) <- c(.6,.65,.7,.8); as.numeric(as.character(d$S))}
pHigh
<- function(d){d$lR=="high"} # "correct" (higher expected reward) response
matchfun
<- make_design(
design Ffactors=list(subjects=levels(data$subjects),S=levels(data$S)),
Ffunctions=list(low=sLow,high=sHigh,p_low=pLow,p_high=pHigh),
Fcovariates = c("reward"),
Rlevels=levels(data$R),matchfun=matchfun,
Flist=list(v0~1,v~1,B~1,A~1,t0~1,alpha~1,w~1,q0~1),
constants = c(A=log(0),v=log(0)),
adapt=adapt, # Special adapt component
model=rdmRL)
## v0 B t0 alpha w q0
## 0 0 0 0 0 0
## attr(,"map")
## attr(,"map")$v0
## v0
## 551 1
##
## attr(,"map")$v
## v
## 551 1
##
## attr(,"map")$B
## B
## 551 1
##
## attr(,"map")$A
## A
## 551 1
##
## attr(,"map")$t0
## t0
## 551 1
##
## attr(,"map")$alpha
## alpha
## 551 1
##
## attr(,"map")$w
## w
## 551 1
##
## attr(,"map")$q0
## q0
## 551 1
You will note that this is quite a few complex functions that are needed for this part, but notice that above, we only adapt some parameters (v
) and keep others fixed. Further, note the alpha
, q0
and w
parameters that are used by the RL model.
Now we add covariates to the data (as it is used by adapt so we cannot wait until make_samplers
adds it).
<- add_Ffunctions(data,design)
data head(data)
## subjects trials S reward R rt low high p_low p_high
## 3121 7 1 e_D 1 high 0.750092 e D 0.30 0.70
## 3122 7 2 t_T 0 low 0.866725 t T 0.20 0.80
## 3123 7 3 e_D 0 high 1.100030 e D 0.30 0.70
## 3124 7 4 h_J 0 high 0.708232 h J 0.35 0.65
## 3125 7 5 k_K 1 high 1.174540 k K 0.40 0.60
## 3126 7 6 h_J 1 high 1.483390 h J 0.35 0.65
Let’s now check that the likelihood is correct.
<- sampled_p_vector(design,doMap = FALSE)
p_vector # Average parameter table from Militic for Exp 1
"v0"] <- log(1.92)
p_vector["B"] <- log(2.16)
p_vector["t0"] <- log(.1)
p_vector["w"] <- log(3.09)
p_vector["alpha"] <- log(0.12)
p_vector["q0"] <- log(.1) # was fixed to zero in estimated model
p_vector[
# All identical subjects
<- make_data(p_vector,design=design,trials=52)
simdata
plot_defective_density(simdata,layout=c(2,2),factors=c("S"))
#This is slow to compute so view pdf vignettes/ReinforcementLearning/samples/profile1.pdf
Let’s now create the model and check that the likelihood recovers using profile plots.
<- design_model(simdata,design)
dadmRL # log_likelihood_race(p_vector,dadm=dadmRL)
par(mfrow=c(2,3))
profile_pmwg(pname="v0",p=p_vector,p_min=.3,p_max=1,dadm=dadmRL)
profile_pmwg(pname="B",p=p_vector,p_min=.5,p_max=1,dadm=dadmRL)
profile_pmwg(pname="t0",p=p_vector,p_min=-3,p_max=-2,dadm=dadmRL)
profile_pmwg(pname="alpha",p=p_vector,p_min=-2.5,p_max=-1.5,dadm=dadmRL)
profile_pmwg(pname="w",p=p_vector,p_min=.75,p_max=1.5,dadm=dadmRL)
profile_pmwg(pname="q0",p=p_vector,p_min=-2.75,p_max=-2,dadm=dadmRL)
If you run these plots, you’ll see that q0
is worst estimated, in some runs down biased, in other runs up biased.
Let’s now try a larger value of q0
"q0"] <- log(1) # was fixed to zero in estimated model
p_vector[<- make_data(p_vector,design=design,trials=52)
simdata1
plot_defective_density(simdata1,layout=c(2,2),factors=c("S"))
<- design_model(simdata1,design)
dadmRL # head(log_likelihood_race(p_vector,dadm=dadmRL))
par(mfrow=c(2,3))
profile_pmwg(pname="v0",p=p_vector,p_min=.3,p_max=1,dadm=dadmRL)
profile_pmwg(pname="B",p=p_vector,p_min=.5,p_max=1,dadm=dadmRL)
profile_pmwg(pname="t0",p=p_vector,p_min=-3,p_max=-2,dadm=dadmRL)
profile_pmwg(pname="alpha",p=p_vector,p_min=-2.5,p_max=-1.5,dadm=dadmRL)
profile_pmwg(pname="w",p=p_vector,p_min=.75,p_max=1.5,dadm=dadmRL)
profile_pmwg(pname="q0",p=p_vector,p_min=-.5,p_max=.5,dadm=dadmRL)
q0
is now well estimated.
Now we can fit the RL-RDM to real data. Here we use ms resolution as no compression is possible (i.e., trial by trial variables and parameter functions means each trial is updated, so there is little to no compression).
<- make_samplers(data,design,type="standard",rt_resolution=.001)
miletic1_rdm # save(miletic1_rdm,file="miletic1_rdm.RData")
# run by run_miletic1_rdm.R
First, let’s load in the samples;
print(load("~/Desktop/emc_advanced/vignettes/ReinforcementLearning/samples/miletic1_rdm.RData"))
## [1] "adapted" "miletic1_rdm" "ppmiletic1_rdm"
Let’s now check the sampling
# Sampling looks good after 500, giving 4500 good samples
check_run(miletic1_rdm,subfilter = 500)
We can also plot the fit of the data;
# Fit aggregated over participants and trials fairly good, although some misfit for t_T
plot_fit(data,ppmiletic1_rdm,layout=c(2,2),factors="S")
# Priors clearly dominated
<- plot_density(miletic1_rdm,layout=c(2,3),selection="mu",subfilter=500)
ciMiletic1_rdm # Estimates fairly similar to Miletic et al.'s estimates with q0 fixed at zero.
# Plot observed rt and response probability averaged over subjects
plot_adapt(data = data,design=design,do_plot="R",ylim=c(0,1))
plot_adapt(data = data,design=design,do_plot="rt",ylim=c(0.3,1.05))
# Get adaptive estimates and fits to allow plotting of 95% CIs
# Base estimates on posterior means for each participant
<- parameters_data_frame(miletic1_rdm,selection="alpha",subfilter=500,stat=mean)
pars # # This is slow so use pre-computed. Also watch out for RAM use!
# adapted <- plot_adapt(data = attr(miletic1_rdm,"data_list")[[1]],design=design,
# do_plot=NULL,reps=500,n_cores=11,p_vector=pars)
# Plot adaptive estimates
plot_adapt(adapted=adapted,do_plot="learn",ylim=c(0.1,.85))
plot_adapt(adapted=adapted,do_plot="par",ylim=c(2,5))
# Plot trial fits to rt and response probability averaged over subjects
plot_adapt(adapted=adapted,do_plot="R",ylim=c(0,1))
plot_adapt(adapted=adapted,do_plot="rt",ylim=c(0.45,1.15))
# There is some misfit early in learning
# save(adapted,miletic1_rdm,ppmiletic1_rdm,
# file="vignettes/ReinforcementLearning/samples/miletic1_rdm.RData")
#### Parameter recovery study ----
# Simulate data from the posterior means
<- post_predict(miletic1_rdm,use_par="mean",n_post=1,subfilter=500)
miletic1_rdm_simdat # Looks like the real thing.
plot_adapt(data = miletic1_rdm_simdat,design=design,do_plot="R",ylim=c(0,1))
plot_adapt(data = miletic1_rdm_simdat,design=design,do_plot="rt",ylim=c(0.3,1.05))
# If we provide parameters we can see the average learning and rates
plot_adapt(data = miletic1_rdm_simdat,design=design,do_plot="learn",ylim=c(0,1),
p_vector=attributes(miletic1_rdm_simdat)$pars)
plot_adapt(data = miletic1_rdm_simdat,design=design,do_plot="par",ylim=c(2,5),
p_vector=attributes(miletic1_rdm_simdat)$pars)
# can we recover these?
<- make_samplers(miletic1_rdm_simdat,design,type="standard")
miletic1_rdm_sim # save(miletic1_rdm_sim,file="miletic1_rdm_sim.RData")
# run in run_miletic1_rdm_sim.R
# Looks fine over last 1500
print(load("vignettes/ReinforcementLearning/samples/miletic1_rdm_sim.RData"))
check_run(miletic1_rdm_sim,subfilter=500)
# Excellent stationary fit
plot_fit(miletic1_rdm_simdat,ppmiletic1_rdm_sim,layout=c(2,2),factors="S")
# Some updating but likely shrinkage influential
<- plot_density(miletic1_rdm_sim,selection="alpha",layout=c(2,3),
tabs pars=attributes(attr(miletic1_rdm_sim,"data_list")[[1]])$pars)
# Note mapped=TRUE does not work for adaptive models.
# OK recovery, shrinkage understandable given small sample size per participant
plot_alpha_recovery(tabs,layout=c(2,3))
# Coverage is fairly decent , maybe a bit under
plot_alpha_recovery(tabs,layout=c(2,3),do_rmse=TRUE,do_coverage=TRUE)
# Plot observed rt and response probability averaged over subjects
plot_adapt(data = miletic1_rdm_simdat,design=design,do_plot="R",ylim=c(0,1))
plot_adapt(data = miletic1_rdm_simdat,design=design,do_plot="rt",ylim=c(0.45,1.15))
# Get adaptive estimates and fits, which can be done by providing the samples
# object. We can provided the posterior mean parameters as before by and
# appropriate setting of p_vector (other statistics such as "median" can be
# used) along with a subfilter if needed. Again stored to avoid slow compute
# adapted_sim <- plot_adapt(samples = miletic1_rdm_sim,do_plot=NULL,reps=500,n_cores=11,
# p_vector="mean",subfilter=500)
# Plot adaptive estimates
plot_adapt(adapted=adapted_sim,do_plot="learn",ylim=c(0.1,.85))
plot_adapt(adapted=adapted_sim,do_plot="par",ylim=c(2,5))
# Plot trial fits to rt and response probability averaged over subjects
plot_adapt(adapted=adapted_sim,do_plot="R",ylim=c(0,1))
plot_adapt(adapted=adapted_sim,do_plot="rt",ylim=c(0.45,1.15))
# As expected when fitting the true model the misfit has disappeared.
# A better approach to fully represent uncertainty is to sample parameters
# from the posterior (one per rep) as below.
# adapted_sim_sample <- plot_adapt(samples = miletic1_rdm_sim,do_plot=NULL,reps=500,n_cores=11,
# p_vector="sample",subfilter=500)
# Plot adaptive estimates
plot_adapt(adapted=adapted_sim_sample,do_plot="learn",ylim=c(0.1,.85))
plot_adapt(adapted=adapted_sim_sample,do_plot="par",ylim=c(2,5))
# Plot trial fits to rt and response probability averaged over subjects
plot_adapt(adapted=adapted_sim_sample,do_plot="R",ylim=c(0,1))
plot_adapt(adapted=adapted_sim_sample,do_plot="rt",ylim=c(0.45,1.15))
# In this case there is very little change to the credible intervals.
# save(adapted_sim,adapted_sim_sample,miletic1_rdm_sim,ppmiletic1_rdm_sim,
# file="vignettes/ReinforcementLearning/samples/miletic1_rdm_sim.RData")