Chapter 2 Aim1: PREDICTION CAPACITY [Random Effects model]

2.1 Data

``````rm(list=ls())

library(raster)
library(sf)
library(rasterVis)
library(tidyverse)
library(haven)
library(stringr)
library(scales)
library(cowplot)
library(ggpubr)``````
``````dat <- readRDS(file.path(wd, "/dat_NL_MAL.RData")) %>%
filter(nrohab>20) %>%
mutate(fal_rate = 100*fal/nrohab,
viv_rate = 100*viv/nrohab)

dat_year <- dat %>%
ungroup() %>%
group_by(year, id_loc) %>%
filter(nrohab>20) %>%
summarise(provinc = first(province.x),
fal = sum(fal),
viv = sum(viv),
nrohab = mean(nrohab),
MEI_ike_udel = mean(MEI_ike_udel),
nl_viirs_2 = mean(nl_viirs_2, na.rm=T),
nl_viirs_2_max = max(nl_viirs_2, na.rm=T)) %>%
mutate(fal_rate = 1000*fal/nrohab,
viv_rate = 1000*viv/nrohab,
bin_viv = ifelse(viv>0, 1,0),
bin_fal = ifelse(fal>0, 1,0))

dat_year_2 <- dat_year %>%
filter(nl_viirs_2_max < quantile(dat_year\$nl_viirs_2_max, 0.99),
!is.na(nl_viirs_2)) %>%
mutate(nl_viirs_2_max_sq = nl_viirs_2_max^2,
drop = ifelse(viv>nrohab,1,0),
drop = ifelse(fal>nrohab,1,drop)) %>%
ungroup() %>%
group_by(id_loc) %>%
mutate(drop2 = max(drop, na.rm = T)) %>%
filter(drop==0)

db <- readRDS("~/Dropbox/Work/SESYNC/Projects/Malaria/Village Level [Javier]/Analysis/output/data_sf.RData") %>%
mutate(id2 = row_number())

villages <- db %>%
dplyr::select(id_loc, id2) %>%
inner_join(st_set_geometry(db, NULL) %>%
dplyr::select(id_loc, nrohab, village.x, commune, province.x, uid, id2) %>%
distinct(id_loc, .keep_all = T), by=c("id_loc", "id2")) %>%
filter(id_loc!="")

filter(i==1) %>%
st_transform("+proj=longlat +datum=WGS84 +no_defs")

d <- read_csv("~/Dropbox/Work/SESYNC/Projects/Malaria/Village Level [Javier]/Analysis/output/dist_amazon.csv") %>%
dplyr::select(id_loc, distance) %>%
mutate(distance = distance*111111)

villages <- villages %>%
inner_join(d, by = "id_loc")

# NOT RUN
a <- dat_year_2 %>%
distinct(id_loc, .keep_all = T)
rm(a)``````

2.2 Model

``````library(lme4)
library(skimr)
library(broom.mixed)

fit1 <- glmer.nb(viv ~ nl_viirs_2_max + (1 + nl_viirs_2_max | id_loc) + offset(log(nrohab)), dat_year_2)

coeff1 <- ranef(fit1) %>%
augment() %>%
mutate(sig = ifelse(lb<0 & ub>0,0,1),
nl_viirs_2_max = estimate,
id_loc = as.character(level))

coeff1_s <- coeff1 %>%
filter(sig==1 & variable == "nl_viirs_2_max") %>%
select(id_loc, nl_viirs_2_max)

coeff1 <- coeff1 %>%
filter(variable == "nl_viirs_2_max") %>%
select(id_loc, nl_viirs_2_max)

skim(coeff1, nl_viirs_2_max)

fit3 <- glmer.nb(viv ~ nl_viirs_2_max + provinc + (1 + nl_viirs_2_max | id_loc) + offset(log(nrohab)), dat_year_2)

coeff3 <- ranef(fit3) %>%
augment() %>%
mutate(sig = ifelse(lb<0 & ub>0,0,1),
nl_viirs_2_max2 = estimate,
id_loc = as.character(level))

coeff3_s <- coeff3 %>%
filter(sig==1 & variable == "nl_viirs_2_max") %>%
select(id_loc, nl_viirs_2_max2)

coeff3 <- coeff3 %>%
filter(variable == "nl_viirs_2_max") %>%
select(id_loc, nl_viirs_2_max2)

skim(coeff3, nl_viirs_2_max2)``````

2.3 Model sumamry

``````library(jtools)
summ(fit1)``````
 Observations 10629 Dependent variable viv Type Mixed effects generalized linear model Family Negative Binomial(0.6838) Link log
 AIC 51608 BIC 51651.6
Fixed Effects
Est. S.E. z val. p
(Intercept) -4.05 0.06 -73.40 0.00
nl_viirs_2_max -0.21 0.11 -1.88 0.06
Random Effects
Group Parameter Std. Dev.
id_loc (Intercept) 1.99
id_loc nl_viirs_2_max 0.17
Grouping Variables
Group # groups ICC
id_loc 1555 0.38
``summ(fit3)``
 Observations 10629 Dependent variable viv Type Mixed effects generalized linear model Family Negative Binomial(0.6485) Link log
 AIC 51330.6 BIC 51425.1
Fixed Effects
Est. S.E. z val. p
(Intercept) -5.22 0.14 -36.01 0.00
nl_viirs_2_max -0.34 0.14 -2.47 0.01
provincDATEM DEL MARA_ON 1.60 0.19 8.38 0.00
provincLORETO 1.95 0.19 10.14 0.00
provincMARISCAL RAMON CASTILLA 1.66 0.21 7.92 0.00
provincMAYNAS 1.55 0.17 9.13 0.00
provincPUTUMAYO 0.50 0.31 1.58 0.11
provincREQUENA -0.39 0.23 -1.69 0.09
provincUCAYALI -3.10 0.48 -6.44 0.00
Random Effects
Group Parameter Std. Dev.
id_loc (Intercept) 1.77
id_loc nl_viirs_2_max 0.46
Grouping Variables
Group # groups ICC
id_loc 1555 0.23

2.4 Output

``````ggplot() +
geom_density(data = coeff1, aes(nl_viirs_2_max, fill = "viirs"), col = NA, alpha = .7) +
geom_density(data = coeff3, aes(nl_viirs_2_max2, fill = "viirs + province"), col = NA, alpha = .7) +
labs(fill = "Model", x = "viirs beta coefficients", y = "Density") +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_manual(values = c("viirs" = "#9AC0CD", "viirs + province" = "#8B475D")) +
theme_bw()``````

``````ggplot() +
geom_density(data = coeff1_s, aes(nl_viirs_2_max, fill = "viirs"), col = NA, alpha = .7) +
geom_density(data = coeff3_s, aes(nl_viirs_2_max2, fill = "viirs + province"), col = NA, alpha = .7) +
labs(fill = "Model", x = "viirs beta coefficients", y = "Density") +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_manual(values = c("viirs" = "#9AC0CD", "viirs + province" = "#8B475D")) +
theme_bw()``````

2.4.1 Spatial distribution

``````coeff3.sf <- villages %>%
inner_join(coeff3, by="id_loc") %>%
rename(NOMBPROV = province.x,
NOMBDIST = commune)

coeff3_s.sf <- villages %>%
inner_join(coeff3_s, by="id_loc") %>%
rename(NOMBPROV = province.x,
NOMBDIST = commune)

``````## Reading layer `MapLOR_DIST' from data source `/Users/gcarrasco/Dropbox/Work/Climate and Malaria/Analysis/Data/MapLOR_DIST/MapLOR_DIST.shp' using driver `ESRI Shapefile'
## Simple feature collection with 49 features and 19 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -77.82596 ymin: -8.716139 xmax: -69.94904 ymax: -0.03860597
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs``````
``````l <- st_read("~/Dropbox/Resources/GIS/per_admbnda_adm1_2018/per_admbnda_adm1_2018.shp") %>%
``````## Reading layer `per_admbnda_adm1_2018' from data source `/Users/gcarrasco/Dropbox/Resources/GIS/per_admbnda_adm1_2018/per_admbnda_adm1_2018.shp' using driver `ESRI Shapefile'
## Simple feature collection with 25 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -81.32823 ymin: -18.35093 xmax: -68.65228 ymax: -0.03860597
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs``````
``````#river
bb <- st_bbox(l)
bb[1] <- -77.6
river.l <- st_crop(river,bb)

area.sf_coeff_s <- area.sf %>%
inner_join(st_set_geometry(coeff3_s.sf, NULL) %>%
group_by(NOMBDIST) %>%
summarise(coeff = mean(nl_viirs_2_max2)),
by = "NOMBDIST")

area.sf_coeff <- area.sf %>%
inner_join(st_set_geometry(coeff3.sf, NULL) %>%
group_by(NOMBDIST) %>%
summarise(coeff = mean(nl_viirs_2_max2)),
by = "NOMBDIST")``````

2.5 Figures

2.5.1 Figure post1

``````ggplot() +
geom_sf(data = l, size = 0.5, col = "black") +
geom_sf(data = river.l, col="#8B1C62", size = 0.3) +
geom_sf(data = coeff3_s.sf, aes(col=nl_viirs_2_max2, size = nrohab), alpha=.3) +
scale_color_gradient2(low = "#00688B", mid = "white", high = "#CD3700") +
labs(color = "viirs RE slope") +
theme_void() +
theme(axis.text = element_text(size = 15))``````

2.5.2 Figure post2

``````area.sf_top = coeff3_s.sf %>%
mutate(low = ifelse(ntile(nl_viirs_2_max2,100)<3,"low","a"),
high = ifelse(ntile(nl_viirs_2_max2,100)>98,"high","a"),
beta = ifelse(low=="a",high,low)) %>%
filter(beta!="a")

ggplot() +
geom_sf(data = l, fill=NA, size = 0.5, col = "black") +
geom_sf(data = river.l, col="#8B1C62", size = 0.3) +
geom_sf(data = area.sf_top,
aes(col=beta), size = 2, alpha=.5) +
labs(color = "viirs RE slope") +
theme_void() +
theme(axis.text = element_text(size = 15))``````

2.5.3 Figure post3 [skip]

``````# area.sf_top %>%
#   filter(beta=="low") %>%
#   st_set_geometry(NULL) %>%
#   dplyr::select(id_loc) %>%
#   left_join(dat_year_2, by = "id_loc") %>%
#   ggplot(aes(x = year, group = id_loc)) +
#   geom_line(aes(y = viv_rate, col = "viv rate")) +
#   geom_line(aes(y = nl_viirs_2_max*10, col = "viirs")) +
#   scale_color_manual(values = c("viv rate" = "black", "viirs" = "red")) +
#   labs(col = "var", x = "year", y = "", title = "villages with most negative betas") +
#   scale_y_continuous("viv rate", sec.axis = sec_axis(~ . / 10, name = "viirs")) +
#   facet_wrap(.~id_loc, scales = "free_y")
#
# area.sf_top %>%
#   filter(beta=="high") %>%
#   st_set_geometry(NULL) %>%
#   dplyr::select(id_loc) %>%
#   left_join(dat_year_2, by = "id_loc") %>%
#   ggplot(aes(x = year, group = id_loc)) +
#   geom_line(aes(y = viv_rate, col = "viv rate")) +
#   geom_line(aes(y = nl_viirs_2_max*3000, col = "viirs")) +
#   scale_color_manual(values = c("viv rate" = "black", "viirs" = "red")) +
#   labs(col = "var", x = "year", y = "", title = "villages with most positive betas") +
#   scale_y_continuous("viv rate", sec.axis = sec_axis(~ . / 3000, name = "viirs")) +
#   facet_wrap(.~id_loc, scales = "free_y")``````

2.5.4 Figure post4 [Figure 2]

``````(map <- area.sf_coeff_s %>% #mutate(NOMBDIST2 = factor(NOMBDIST, levels=unique(NOMBDIST[order(NOMBPROV)]), ordered=TRUE)) %>%
ggplot() +
geom_sf(aes(fill=coeff), size = 0.5, col = NA) +
geom_sf(data = l, fill=NA, size = 0.5, col = "black") +
geom_sf(data = river.l, aes(color="Amazon River"), size = 0.3, show.legend = "line") +
scale_fill_gradient2(low = "#00688B", mid = "white", high = "#CD3700") +
scale_color_manual(values = "#8B1C62", name = "") +
labs(fill = "Mean Coeff") +
theme_bw() +
theme(axis.text = element_text(size = 10),
legend.position = "bottom"))``````

``````library(cowplot)
f2 <- plot_grid(map, labels = c("A"))

#ggsave(f2, filename = "./_out/f2.png", width = 5, height = 6, dpi = 300)``````
``````library(ggExtra)

a <- dat %>%
filter(!is.na(nl_viirs_2)) %>%
st_set_geometry(NULL) %>%
group_by(id_loc) %>%
summarise(nl_viirs_2 = first(nl_viirs_2)) %>%
mutate(nl_viirs_2 = ifelse(nl_viirs_2<0,0,nl_viirs_2))

b <- coeff3.sf %>% #cambiar
inner_join(a, by= "id_loc") %>%
inner_join(area.sf_coeff %>% # cambiar
st_set_geometry(NULL) %>%
mutate(dist = ifelse(coeff<0, "main river", "small river")) %>%
dplyr::select(NOMBDIST, dist),
by= "NOMBDIST")``````

2.5.5 VIIRS baseline vs. beta

``````b %>%
ggplot(aes(x=nl_viirs_2, y=nl_viirs_2_max2)) +
geom_point(alpha=.3) +
scale_x_continuous(trans="log10", labels = scales::comma_format()) +
labs(x = "viirs baseline", y = "viirs beta") +
geom_smooth(method = "loess", col="red", fill="red") +
stat_smooth(method = "lm", formula = y ~ poly(x, 2), col="blue", fill="blue") +
theme_bw()``````

2.5.6 Distance to Amazon vs. beta [Fig 3a]

``````f3a.p <- b %>%
ggplot(aes(x=distance, y=nl_viirs_2_max2)) +
geom_point(alpha=.3, shape = 3) +
scale_x_continuous(trans="log10", labels = scales::comma_format()) +
labs(x = "Distance to Amazon River (m)", y = "VIIRS beta") +
#geom_smooth(method = "loess", col="red", fill="red") +
stat_smooth(method = "lm", formula = y ~ poly(x, 2), col="#FDE725FF", fill="#FDE725FF") +
theme_bw()

f3a <- ggMarginal(f3a.p, xparams = list(fill = "#481568FF", col = "#440154FF", alpha=.3), #margins = 'x',
yparams = list(fill = "#29AF7FFF", col = "#20A386FF", alpha=.3))

f3a``````

2.5.7 Distance to Amazon vs. VIIRS baseline

``````b %>%
ggplot(aes(x=distance, y=nl_viirs_2)) +
geom_point(alpha=.3) +
scale_x_continuous(trans="log10", labels = scales::comma_format()) +
scale_y_continuous(trans="log10", labels = scales::comma_format()) +
labs(x = "distance to Amazon river", y = "viirs baseline") +
geom_smooth(method = "loess", col="red", fill="red") +
stat_smooth(method = "lm", formula = y ~ poly(x, 2), col="blue", fill="blue") +
theme_bw()``````

2.5.8 Popualtion vs. beta [Fig 3b]

``````f3b.p <- b %>%
ggplot(aes(x=nrohab, y=nl_viirs_2_max2)) +
geom_point(alpha=.3, shape = 3) +
scale_x_continuous(trans="log10", labels = scales::comma_format()) +
labs(x = "Population (hab.)", y = "VIIRS beta") +
#geom_smooth(method = "loess", col="red", fill="red") +
stat_smooth(method = "lm", formula = y ~ poly(x, 2), col="#FDE725FF", fill="#FDE725FF") +
theme_bw()

f3b <- ggMarginal(f3b.p, xparams = list(fill = "#481568FF", col = "#440154FF", alpha=.3),
yparams = list(fill = "#29AF7FFF", col = "#20A386FF", alpha=.3))

library(cowplot)
f3 <- plot_grid(f3a, f3b, labels = c("A","B"))
f3``````

``#ggsave2(f3, filename = "./_out/f3.png", width = 12, height = 7, dpi = 300)``

2.5.9 Popualtion vs. VIIRS baseline

``````b %>%
ggplot(aes(x=nrohab, y=nl_viirs_2)) +
geom_point(alpha=.3) +
scale_x_continuous(trans="log10", labels = scales::comma_format()) +
scale_y_continuous(trans="log10", labels = scales::comma_format()) +
labs(x = "Population", y = "viirs baseline") +
geom_smooth(method = "loess", col="red", fill="red") +
stat_smooth(method = "lm", formula = y ~ poly(x, 2), col="blue", fill="blue") +
theme_bw()``````

2.5.10 Popualtion vs. Distance to Amazon

``````b %>%
ggplot(aes(x=nrohab, y=distance)) +
geom_point(alpha=.3) +
scale_x_continuous(trans="log10", labels = scales::comma_format()) +
scale_y_continuous(trans="log10", labels = scales::comma_format()) +
labs(x = "Population", y = "distance to Amazon river") +
geom_smooth(method = "loess", col="red", fill="red") +
stat_smooth(method = "lm", formula = y ~ poly(x, 2), col="blue", fill="blue") +
theme_bw()``````