Week 8 Reinforcement learning

In this chapter, you will learn another famous cognitive model: Reinforcement learning model. Like what we did in Week3, we will start from “simulation”. Simulation is a good practice to quantify and visualize model predictions. Typically, we can generate data given model and experimental design and then observe how a model predicts behavioral patterns.

# Week 8: Reinforcement learning model (RL model)
# First and Last name
# date
  • To start the new project, clear working environment with rm(list=ls()).
# clear working environment
rm(list=ls())

8.1 Model: Q-learning

In the RL model, two main parameters are needed. They are learning rate (alpha) and temperature parameter (beta). Having these two parameters is not sufficient for R to generate data. You need to tell R (or any other programming software) the value of each parameters and the rule of using them given experimental design.
In terms of task, learning task usually consist of two steps: Valuation and Selection
- Valuation: individuals evaluate and use outcome to update value for the option/action.

- Selection: individuals select one of options/actions.

8.2 simulation

Suppose the task contains a fixed pair of two neutral options: A and B. Each option is associated with a fixed probability and outcome. In particular,
- A is associated with 80% of getting 1€ and 20% of getting nothing;
- B is associated with 20% of getting 1€ and 80% of getting nothing;
In each trial, participants were asked to select one option between A and B. After the selection and before the end of the trial, the outcome for the chosen option will be displayed on the screen. (see Figure 8.1)

Example of one trial of reinforcment learning task

Figure 8.1: Example of one trial of reinforcment learning task

8.2.1 one trial

# Inputs
# ----------------
# PARAMETERS
alpha      <- 0.4;
beta       <- 3;

# NUMBER OF TRIALS
ntrials <- 24;


# Task related information
# ----------------
# INITIAL VALUE
Q0  <- c(0.5, 0.5); # initial value for option B and option A

# REWARD HISTORY
RA <- runif(ntrials,0,1)<0.8; # reward history for option A given its P(reward) = 0.8
RB <- runif(ntrials,0,1)<0.2; # reward history for option B given its P(reward) = 0.2
rew  <- data.frame(RB,RA);      # Outcome history of two options
Rmag <- 1;                    # reward magnitude 

# Pre-allocation
# ----------------
Qt  = matrix(data=NA,nrow=ntrials+1,ncol=2);# store the Q-values for option B (col 1) and option A (col 2)
PE  = matrix(data=NA,nrow=ntrials,ncol=1);# store the Prediction error
ch  = matrix(data=NA,nrow=ntrials,ncol=1);# store the choice : 1 = B; 2 = A
PA  = matrix(data=NA,nrow=ntrials,ncol=1);# store the modelled proba. of choosing A
r   = matrix(data=NA,nrow=ntrials,ncol=1);# store the obtained reward

# initialize Q values
# ----------------
Qt[1,]  = Q0; # for the first trial, Q values are the initialized values

# value simulation
# ----------------
for (t in 1:ntrials) {
    
    # Selection
    # - the modelled proba. of choosing A
    PA[t]   = 1./(1+exp(-beta*(Qt[t,2]-Qt[t,1])));  
    # -  if PA is larger than a random number, then choose A
    choice  = runif(1)<PA[t];
    ch[t]   = 1 + 1*choice ; # the choice. 1 = B; 2 = A
    
    # valuation: outcome
    # -  if the reward history for the chosen option is "True", then receive reward. 0 Otherwise.
    r[t] = Rmag*rew[t,ch[t]]; 
    
    # valuation: compute prediction error
    PE[t] = r[t] - Qt[t,ch[t]];
    
    # valuation: update value for the next trial
    Qt[t+1,ch[t]] = Qt[t,ch[t]]+ alpha*PE[t];     # column ch(t) = chosen (1 or 2; B or A)
    
    # valuation: keep track of unchosen option value for the next trial
    Qt[t+1,3-ch[t]] = Qt[t,3-ch[t]];              # column 3-ch(t) = unchosen (2 or 1; A or B)
}

Plot the simulation results for predicted values

# Plot the simulation results for predicted values
matplot(1:ntrials, Qt[1:ntrials,], pch = c(2,9), type="b", col=c("blue", "red") , lty=c(1,1), xlab="trials", ylab="Q-values", main="simulation results for predicted value", xaxt="n")

# Add a legend
legend("topleft", c("OptionB", "OptionA"), pch = c(2,9), lty=c(1,1), col=c("blue", "red"), cex=0.8)

# improve the quality of figure
xtick<-seq(0, ntrials+1, by=1)
axis(side=1, at=xtick, labels = FALSE)
text(x=xtick,  par("usr")[3], 
     labels = xtick, srt = 0, pos = 1, xpd = TRUE)

Plot the simulation results for predicted Probability of choosing option A

# Plot the simulation results for predicted values
ntrials = 24
plot(1:ntrials, ch[1:ntrials]-1, pch=16,col="red",xlab="trials", ylab="Prob(choose #A)", main="simulation results for P(A) and actual reward") # check the usage
#plot(1:ntrials, PA[1:ntrials], pch = 4,type="b", col="black" , lty=1, )
points(1:ntrials, PA[1:ntrials], col="black",pch=4)
lines(1:ntrials, PA[1:ntrials], col="black", lty=3)

# Add a legend
legend("center", c("Choice","prob(A)"), lty=c(0,3), pch=c(16,4), col=c("red","black"), cex=0.8)

# improve the quality of figure
xtick<-seq(0, ntrials+1, by=1)
axis(side=1, at=xtick, labels = FALSE)
text(x=xtick,  par("usr")[3], 
     labels = xtick, srt = 0, pos = 1, xpd = TRUE)

### one session

# Inputs
# ----------------
# PARAMETERS
alpha      <- 0.4;
beta       <- 3;

# NUMBER OF TRIALS
ntrials <- 24;
nSess   <- 12;


# Pre-allocation
# ----------------
Qt  = array(rep(NA,(ntrials+1)*2*nSess), dim=c(ntrials+1,2,nSess)) # store the Q-values for option B (col 
PE  = matrix(data=NA,nrow=ntrials,ncol=nSess);# store the Prediction error
ch  = matrix(data=NA,nrow=ntrials,ncol=nSess);# store the choice : 1 = B; 2 = A
PA  = matrix(data=NA,nrow=ntrials,ncol=nSess);# store the modelled proba. of choosing A
r   = matrix(data=NA,nrow=ntrials,ncol=nSess);# store the obtained reward

# value simulation
# ----------------
for (kSess in 1:nSess) {
  # Task related information for each session
  # ----------------
  # INITIAL VALUE
  Q0  <- c(0.5, 0.5); # initial value for option B and option A
  
  # REWARD HISTORY
  RA <- runif(ntrials,0,1)<0.8; # reward history for option A given its P(reward) = 0.8
  RB <- runif(ntrials,0,1)<0.2; # reward history for option B given its P(reward) = 0.2
  rew  <- data.frame(RB,RA);      # Outcome history of two options
  Rmag <- 1;                    # reward magnitude 
  
  # initialize Q values
  # ----------------
  Qt[1,,kSess]  = Q0; # for the first trial, Q values are the initialized values
  
  
  
  for (t in 1:ntrials) {
    # Selection
    # - the modelled proba. of choosing A
    PA[t,kSess]   = 1./(1+exp(-beta*(Qt[t,2,kSess]-Qt[t,1,kSess])));  
    # -  if PA is larger than a random number, then choose A
    choice  = runif(1)<PA[t,kSess];
    ch[t,kSess]   = 1 + 1*choice ; # the choice. 1 = B; 2 = A
    
    # valuation: outcome
    # -  if the reward history for the chosen option is "True", then receive reward. 0 Otherwise.
    r[t,kSess] = Rmag*rew[t,ch[t,kSess]]; 
    
    # valuation: compute prediction error
    PE[t,kSess] = r[t,kSess] - Qt[t,ch[t,kSess],kSess];
    
    # valuation: update value for the next trial
    Qt[t+1,ch[t,kSess],kSess] = Qt[t,ch[t,kSess],kSess]+ alpha*PE[t,kSess]; # column ch(t,kSess) = chosen (1 or 2; B or A)
    
    # valuation: keep track of unchosen option value for the next trial
    Qt[t+1,3-ch[t,kSess],kSess] = Qt[t,3-ch[t,kSess],kSess]; # column 3-ch(t) = unchosen (2 or 1; A or B)
  }
}
save(ch,r,nSess,ntrials,alpha,beta,Qt,PE,file="Week8_RL_sim_data_sess.Rdata")

Plot the simulation results for predicted values

 load("Week8_RL_sim_data_sess.Rdata")
# Plot the simulation results for predicted values
matplot(1:ntrials, data.frame(rowMeans(Qt[1:ntrials,1,]),rowMeans(Qt[1:ntrials,2,])), pch = c(2,9), type="b", col=c("blue", "red") , lty=c(1,1), xlab="trials", ylab="Q-values", main="simulation results for predicted value", xaxt="n")

# Add a legend
legend("topleft", c("OptionB", "OptionA"), pch = c(16,4), lty=c(1,1), col=c("blue", "red"), cex=0.8)

# improve the quality of figure
xtick<-seq(0, ntrials+1, by=1)
axis(side=1, at=xtick, labels = FALSE)
text(x=xtick,  par("usr")[3], 
     labels = xtick, srt = 0, pos = 1, xpd = TRUE)

Plot the simulation results for predicted Probability of choosing option A

 load("Week8_RL_sim_data_sess.Rdata")
ntrials = 24
# Plot actual choices 
plot(1:ntrials, rowMeans(ch[1:ntrials,]-1), pch=16,col="red",xlab="trials", ylab="Prob(choose #A)", main="simulation results for P(A) and actual reward") 

# Plot predicted Probability of choosing option A
points(1:ntrials, rowMeans(PA[1:ntrials,]), col="black",pch=4)
lines(1:ntrials, rowMeans(PA[1:ntrials,]), col="black", lty=3)

# Add a legend
legend("center", c("Choice","prob(A)"), lty=c(0,3), pch=c(16,4), col=c("red","black"), cex=0.8)

# improve the quality of figure
xtick<-seq(0, ntrials+1, by=1)
axis(side=1, at=xtick, labels = FALSE)
text(x=xtick,  par("usr")[3], 
     labels = xtick, srt = 0, pos = 1, xpd = TRUE)