18 Day 18 (March 30)
18.1 Announcements
Statistics seminar today (4-5pm; link)
Project work day on Tuesday (April 4)
Observation about your project work flow
- Programming before writing out model
- Personal habit seems to be to start/focus with what you are familiar with
Writing tips (or ways to fake it till you make it)
- Punctuate equations
- Use a professional typesetting language
- Don’t start your description of your statistical analysis/methods with what you are most familiar with
18.2 Time series
- Read Ch. 16 and Ch. 28
What is a time series?
- One damned thing after another (maybe Fisher)
- Spatio-temporal data collected at a single point in space over multiple time points
- Spatio-temporal data aggregated over space
- Any data set/model that has dependence in residual errors over time
- Phenomenological vs. mechanistic models
- Recall my retirement example ? (see Day 1 and Day 2 notes)
- Population growth example of whooping cranes
<-"https://www.dropbox.com/s/sr2411umm053vcq/Butler%20et%20al.%20Table%201.csv?dl=1"
url<- read.csv(url)
df1 plot(df1$Winter, df1$N, xlab = "Year", ylab = "Population Size",
ylim = c(0, 300), typ = "b", lwd = 1.5, pch = "*")
- Example: population growth in continuous time
- Ordinary differential equations
- The expected number of whooping cranes at any given time can be calculated by\[\lambda(t+\Delta t)=\lambda(t)+b(t)-d(t)\:.\]At time t, let the births equal \(b(t)=\beta\Delta t\lambda(t)\) and deaths equal \(d(t)=\alpha\Delta t\lambda(t)\). Then write the equation above as \[\lambda(t+\Delta t)=\lambda(t)+\beta\Delta t\lambda(t)-\alpha\Delta t\lambda(t)\:.\] Now define the growth rate as \(\gamma=\beta-\alpha\) and rewrite as \[\lambda(t+\Delta t)=\lambda(t)+\gamma\Delta t\lambda(t)\:.\] Next write the equation as\[\frac{\lambda(t+\Delta t)-\lambda(t)}{\Delta t}=\gamma\lambda(t)\] and let \[\Delta t\rightarrow0\lim_{\Delta t\rightarrow0}\frac{\lambda(t+\Delta t)-\lambda(t)}{\Delta t}=\gamma\lambda(t)\:.\] Finally replace \(\lim_{\Delta t\rightarrow0}\frac{\lambda(t+\Delta t)-\lambda(t)}{\Delta t}\) with the differential operator\[\frac{d\lambda(t)}{dt}=\gamma\lambda(t)\:.\]
- Solving requires an ODE and initial conditions
- The expected number of whooping cranes in 1949?
- Analytical solution to ODEs \[\lambda(t)=\lambda_{0}e^{\gamma t}\]
- Numerical solution to ODEs
- Finite difference method (replace \(\frac{d\lambda(t)}{dt}\) with \(\frac{\lambda_{t+\Delta t}-\lambda_{t}}{\Delta t}\)) \[\frac{d\lambda(t)}{dt}=\gamma\lambda\] \[\frac{\lambda_{t+\Delta t}-\lambda_{t}}{\Delta t}=\gamma\lambda\] \[\lambda_{t+\Delta t}=\lambda_{t}+\gamma\Delta t\lambda_{t}\]
- Write out Bayesian statistical model for whooping cranes
<- df1$N
y <- 10000 #Number of MCMC samples to draw
m.draws <- as.data.frame(matrix(,m.draws+1,2))
samples colnames(samples) <- c("gamma","lambda0")
1,] <- c(0,10) #Starting values for gamma & lambda0
samples[
#Monitor acceptance rate for Metropolis-Hastings algorithm
<- as.data.frame(matrix(0,m.draws+1,2))
accept colnames(accept) <- c("gamma","lambda0")
<- -10 # uniform prior for gamma
a <- 2
b
<- 0 # uniform prior for lambda0
c <- 100
d
<- 0.0007
gamma.tune <- 0.7
lambda0.tune
<- function(lambda0,gamma,t.max,dt,t.keep){
calc.lambda <- matrix(,t.max/dt+1,1)
lambda 1,] <- lambda0
lambda[for(i in 1:(t.max/dt)){
+1,] <- lambda[i,]*(1+dt*gamma)
lambda[i
}-1,])[t.keep]
(lambda[
}
<- length(y)
t.max <- 1/12
dt <- seq(1,t.max/dt,by=1/dt)
t.keep
set.seed(4403)
<- proc.time()
ptm for(k in 1:m.draws){
<- samples[k,1]
gamma <- samples[k,2]
lambda0 <- calc.lambda(lambda0,gamma,t.max,dt,t.keep)
lambda
#Sample gamma
<- rnorm(1,gamma,gamma.tune)
gamma.star if(gamma.star > a & gamma.star < b){
<- calc.lambda(lambda0,gamma.star,t.max,dt,t.keep)
lambda.star <- sum(dpois(y,lambda.star,log=TRUE)) + dunif(gamma.star,a,b,log=TRUE)
mh1 <- sum(dpois(y,lambda,log=TRUE)) + dunif(gamma,a,b,log=TRUE)
mh2 <- min(1,exp(mh1 - mh2))
R if (R > runif(1)){gamma <- gamma.star;lambda <- lambda.star; accept[k+1,1] <- 1}}
#Sample lambda0
<- rnorm(1,lambda0,lambda0.tune)
lambda0.star if(lambda0.star > c & lambda0.star < d){
<- calc.lambda(lambda0.star,gamma,t.max,dt,t.keep)
lambda.star <- sum(dpois(y,lambda.star,log=TRUE)) + dunif(lambda0.star,c,d,log=TRUE)
mh1 <- sum(dpois(y,lambda,log=TRUE)) + dunif(lambda0,c,d,log=TRUE)
mh2 <- min(1,exp(mh1 - mh2))
R if (R > runif(1)){lambda0 <- lambda0.star; accept[k+1,2] <- 1}}
+1,1] <- gamma
samples[k+1,2] <- lambda0
samples[kif(k%%1000==0){print(k)}
}
## [1] 1000
## [1] 2000
## [1] 3000
## [1] 4000
## [1] 5000
## [1] 6000
## [1] 7000
## [1] 8000
## [1] 9000
## [1] 10000
proc.time() - ptm
## user system elapsed
## 14.927 0.125 15.087
colMeans(accept[-1,])
## gamma lambda0
## 0.4272 0.4332
#Make sure to check trace plots
plot(samples[,1],typ="l",xlab="k",ylab=expression(gamma),xaxt='n')
plot(samples[,2],typ="l",xlab="k",ylab=expression(lambda[0]))
<- 1000
burn.in # E() of gamma & lambda0
colMeans(samples[-c(1:burn.in),])
## gamma lambda0
## 0.04261365 22.25786503
# 95% Equal-tailed CIs
apply(samples[-c(1:burn.in),],2,FUN=quantile,prob=c(0.025,0.975))
## gamma lambda0
## 2.5% 0.04100095 20.56473
## 97.5% 0.04430795 23.89963
<- length(y) + 20
t.max <- 1/12
dt <- which(seq(1950,2030,by=dt)>=2010)
t.keep
<- matrix(,m.draws+1,length(t.keep))
samples.pred
for(k in 1:(m.draws+1)){
<- samples[k,1]
gamma <- samples[k,2]
lambda0 <- calc.lambda(lambda0,gamma,t.max,dt,t.keep)
lambda <- rpois(length(t.keep),lambda)
y.new <- y.new
samples.pred[k,]
}
par(mfrow=c(1,1))
plot(seq(2010,2030,by=dt),colMeans(samples.pred[-c(1:burn.in),]),typ="l",xlab = "Year", ylab = "Population Size",xlim=c(1950,2030),ylim=c(0,700))
<- apply(samples.pred[-c(1:burn.in),],2,FUN=quantile,prob=c(0.025))
lwr.CI <- apply(samples.pred[-c(1:burn.in),],2,FUN=quantile,prob=c(0.975))
upper.CI polygon(c(seq(2010,2030,by=dt),rev(seq(2010,2030,by=dt))),c(lwr.CI,rev(upper.CI)),col=rgb(0.5,0.5,0.5,0.3),border=NA)
points(df1$Winter, df1$N, typ = "p", lwd = 1.5, pch = "*")
legend(x = 1960, y = 600, cex = 1.3, legend = c(expression("E("*y[new]*"|"*y[obs]*")"),"95% CI"), bty = "n",
lty = 1, lwd = 2, col = c("black",rgb(0.5,0.5,0.5,0.5)))
# Posterior distribution of when population size is >500
<- seq(2010,2030,by=dt)[apply(samples.pred[-c(1:burn.in),],1,FUN=function(x){which(x>=500)[1]})]
date.deriv hist(date.deriv,col="grey",freq=FALSE,main="",xlab=expression("Date when "* y[pred] *">500|"*y[obs]),
ylab=expression("[Date when "* y[pred] *">500|"*y[obs]*"]"))