Chapter 4 Predictive Model

4.1 Pred. Data

rm(list=ls())
library(tidyverse)
library(purrr)
library(Metrics)
library(stringr)
library(MatrixModels)
#library(rlang)
load("./_dat/NL_VEN_dat2.RData")

dt.pred <- dt_final %>%
  nest(data = everything()) %>%
  crossing(yy=2013:2015) %>%
  mutate(dat_subset = purrr::map2(.x = data, .y = yy, .f = ~mutate(.x, PV = ifelse(Year>.y,NA,PV),
                                                                   PF = ifelse(Year>.y,NA,PF))))

# Scaling
dt.pred <- dt_final %>%
  group_by(index) %>%
  mutate(viirs = scale_this(viirs),
         mei = scale_this(mei),
         tmmx = scale_this(tmmx),
         pr = scale_this(pr),
         yy = Year,
         Year = Year-2012) %>% #start year on 2012
  ungroup() %>%
  nest(data = everything()) %>%
  crossing(yy=2013:2015) %>%
  mutate(dat_subset = purrr::map2(.x = data, .y = yy, 
                                  .f = ~mutate(.x, PV = ifelse(yy>.y,NA,PV),
                                               PF = ifelse(yy>.y,NA,PF))))

#a <- dt.pred$dat_subset[[1]]

4.2 Pred. Functions

library(INLA)
library(spdep)
library(purrr)
devtools::source_gist("https://gist.github.com/gcarrascoe/89e018d99bad7d3365ec4ac18e3817bd")

inla.batch <- function(formula, dat1 = dt_final) {
  result = inla(formula, data=dat1, family="nbinomial", offset=log(pop), verbose = F, 
                #control.inla=list(strategy="gaussian"),
                control.compute=list(config=T, dic=T, cpo=T, waic=T),
                control.fixed = list(correlation.matrix=T),
                control.predictor=list(link=1,compute=TRUE),
                control.family = list(control.link=list(model="log"))
                )
  return(result)
}

inla.batch.safe <- possibly(inla.batch, otherwise = NA_real_)

4.3 Pred. P. vivax

# MODEL
pv_2.p<-c(formula = PV ~ 1,
        formula = PV ~ 1 + Year,
        formula = PV ~ 1 + Year + viirs,
        formula = PV ~ 1 + Year + viirs + mei,
        formula = PV ~ 1 + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + viirs + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + viirs + mei + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + viirs + mei + tmmx + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + viirs + mei + tmmx + pr + f(index, model = "bym", graph = "./map.graph")
  )

names(pv_2.p)<-pv_2.p

# ------------------------------------------
# PV estimation
# ------------------------------------------
INLA:::inla.dynload.workaround()
dt.pred.pv <- dt.pred %>%
  crossing(formula=pv_2.p) %>%
  mutate(inla = purrr::map2(.x = dat_subset, .y = formula, .f = ~inla.batch.safe(dat1 = .x, formula = .y)))

#summary(dt.pred.pv$inla[[1]])
dt.pred.pv2 <- dt.pred.pv %>%
  group_by(yy) %>%
  mutate(id = 1:n()) %>%
  ungroup() %>%
  filter(!is.na(inla)) %>%
  mutate(pred = purrr::map(.x = inla, .f = ~.$summary.fitted.values$`0.5quant`),
         data_preds = purrr::map2(.x = data, .y = pred, .f = ~mutate(.x, pred = .y)),
         data_preds2 = purrr::map2(.x = data_preds, .y = dat_subset, .f = ~mutate(.x, input = .y$PV)),
         data_preds3 = purrr::map(.x = data_preds2, .f = ~filter(.x, is.na(input))),
         rmse = purrr::map_dbl(.x = data_preds3, .f = ~rmse(actual = .$PV, predicted = .$pred)),
         mae = purrr::map_dbl(.x = data_preds3, .f = ~mae(actual = .$PV, predicted = .$pred)),
         msle = purrr::map_dbl(.x = data_preds3, .f = ~msle(actual = .$PV, predicted = .$pred))
         ) %>%
  group_by(yy) %>%
  mutate(#id = 1:n(),
         ff = paste(id,as.character(formula), sep = "_"),
         #ff = ifelse(id==3, "5_PV ~ 1 + Year + f(viirs)", ff),
         #ff = ifelse(id==4, "5_PV ~ 1 + Year + f(viirs) + f(mei)", ff),
         ff = ifelse(id==1, "01_I", ff),
         ff = ifelse(id==2, "02_I + Y", ff),
         ff = ifelse(id==3, "03_I + Y + NL", ff),
         ff = ifelse(id==4, "04_I + Y + NL + MEI", ff),
         ff = ifelse(id==5, "05_I + S", ff),
         ff = ifelse(id==6, "06_I + S + Y", ff),
         ff = ifelse(id==7, "07_I + S + Y + NL", ff),
         ff = ifelse(id==8, "08_I + S + Y + NL + MEI", ff),
         ff = ifelse(id==9, "09_I + S + Y + NL + MEI + TM", ff),
         ff = ifelse(id==10, "10_I + S + Y + NL + MEI + TM + PR", ff)) %>%
  dplyr::select(yy, ff, rmse:msle)

#a <- dt.pred.pv2$data_preds3[[1]]

library(colorspace)
p.a <- dt.pred.pv2 %>%
  pred_plot(metric = rmse)

p.b <- dt.pred.pv2 %>%
  pred_plot(metric = mae, trim = T)

p.c <- dt.pred.pv2 %>%
  pred_plot(metric = msle, trim = T)

library(cowplot)
plot_grid(p.a, p.b, p.c, ncol = 3, rel_widths = c(4.5,2,2))

4.4 Pred. P. falciparum

# MODEL
pf_2.p<-c(formula = PF ~ 1,
        formula = PF ~ 1 + Year,
        formula = PF ~ 1 + Year + viirs,
        formula = PF ~ 1 + Year + viirs + mei,
        formula = PF ~ 1 + f(index, model = "bym", graph = "./map.graph"),
        formula = PF ~ 1 + Year + f(index, model = "bym", graph = "./map.graph"),
        formula = PF ~ 1 + Year + viirs + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + viirs + mei + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + viirs + mei + tmmx + f(index, model = "bym", graph = "./map.graph"),
        formula = PV ~ 1 + Year + viirs + mei + tmmx + pr + f(index, model = "bym", graph = "./map.graph")
  )

names(pf_2.p)<-pf_2.p

# ------------------------------------------
# PF estimation
# ------------------------------------------
INLA:::inla.dynload.workaround()
dt.pred.pf <- dt.pred %>%
  crossing(formula=pf_2.p) %>%
  mutate(inla = purrr::map2(.x = dat_subset, .y = formula, .f = ~inla.batch.safe(dat1 = .x, formula = .y)))

#summary(dt.pred.pf$inla[[1]])
dt.pred.pf2 <- dt.pred.pf %>%
  group_by(yy) %>%
  mutate(id = 1:n()) %>%
  ungroup() %>%
  filter(!is.na(inla)) %>%
  mutate(pred = purrr::map(.x = inla, .f = ~.$summary.fitted.values$`0.5quant`),
         data_preds = purrr::map2(.x = data, .y = pred, .f = ~mutate(.x, pred = .y)),
         data_preds2 = purrr::map2(.x = data_preds, .y = dat_subset, .f = ~mutate(.x, input = .y$PF)),
         data_preds3 = purrr::map(.x = data_preds2, .f = ~filter(.x, is.na(input))),
         rmse = purrr::map_dbl(.x = data_preds3, .f = ~rmse(actual = .$PF, predicted = .$pred)),
         mae = purrr::map_dbl(.x = data_preds3, .f = ~mae(actual = .$PF, predicted = .$pred)),
         msle = purrr::map_dbl(.x = data_preds3, .f = ~msle(actual = .$PF, predicted = .$pred))
         ) %>%
  group_by(yy) %>%
  mutate(#id = 1:n(),
         ff = paste(id,as.character(formula), sep = "_"),
         ff = ifelse(id==1, "01_I", ff),
         ff = ifelse(id==2, "02_I + Y", ff),
         ff = ifelse(id==3, "03_I + Y + NL", ff),
         ff = ifelse(id==4, "04_I + Y + NL + MEI", ff),
         ff = ifelse(id==5, "05_I + S", ff),
         ff = ifelse(id==6, "06_I + S + Y", ff),
         ff = ifelse(id==7, "07_I + S + Y + NL", ff),
         ff = ifelse(id==8, "08_I + S + Y + NL + MEI", ff),
         ff = ifelse(id==9, "09_I + S + Y + NL + MEI + TM", ff),
         ff = ifelse(id==10, "10_I + S + Y + NL + MEI + TM + PR", ff)) %>%
  dplyr::select(yy, ff, rmse:msle)

#a <- dt.pred.pv2$data_preds3[[1]]

library(colorspace)
f.a<-dt.pred.pf2 %>%
  pred_plot(metric = rmse)

f.b<-dt.pred.pf2 %>%
  pred_plot(metric = mae, trim = T)

f.c<-dt.pred.pf2 %>%
  pred_plot(metric = msle, trim = T)

library(cowplot)
plot_grid(f.a, f.b, f.c, ncol = 3, rel_widths = c(4.5,2,2))