Chapter 2 DATA
2.1 Libraries
rm(list=ls())
library(raster)
library(sf)
library(rasterVis)
library(tidyverse)
library(haven)
library(stringr)
library(lubridate)
library(naniar)
2.2 Villages data
db <- readRDS("~/Dropbox/Work/SESYNC/Projects/Malaria/Village Level [Javier]/Analysis/output/data_sf.RData") %>%
mutate(id2 = row_number())
villages <- db %>%
dplyr::select(id_loc, id2) %>%
inner_join(st_set_geometry(db, NULL) %>%
dplyr::select(id_loc, nrohab, village.x, commune, province.x, uid, id2) %>%
distinct(id_loc, .keep_all = T), by=c("id_loc", "id2")) %>%
filter(id_loc!="")
2.3 Malaria Ecology Index (MEI) data
mei <- read_dta("~/Dropbox/Work/Colabs UCSD/Malaria & Develop [McCord]/Data/MEI_Peru_mnthlygrid_1900_2017.dta") %>%
zap_formats() %>%
filter(year>2008, year<2019) %>%
st_as_sf(coords = c("lon", "lat")) %>%
nest(-year, -month) %>%
mutate(sp = map(data, ~as(., "Spatial")),
#change extent or resolution
raster = map(sp, ~rasterize(., raster(extent(-81, -65, -20, 2), res = .5), field = "MEI_ike_udel", fun="sum")),
extracted = map(raster, ~raster::extract(., villages, fun=max, df=TRUE, sp=T)),
data.extracted = map(extracted, ~.@data)) %>%
dplyr::select(year, month, data.extracted) %>%
unnest(cols = c(data.extracted)) %>%
dplyr::select(year, month, id_loc, layer) %>%
rename(MEI_ike_udel=layer) %>%
filter(id_loc!="")
2.4 Malaria Cases data
malaria <- st_set_geometry(db, NULL) %>%
mutate(month = month(fecha_not),year = ano) %>%
filter(year>2008, year<2019, diagno!="MALARIA POR P. MALARIAE", id_loc!="") %>%
group_by(year, month, id_loc, diagno) %>%
summarise(cases = n()) %>%
mutate(dx = ifelse(diagno=="MALARIA P. FALCIPARUM","fal","viv")) %>%
dplyr::select(-diagno) %>%
spread(dx,cases, fill = 0) %>%
ungroup() %>%
group_by(id_loc,year,month) %>%
distinct(.keep_all = T)
2.5 Nighttime Lights (NL) data
2.5.1 VIIRS (Stray corrected):
Monthly 2014-2019 (avg_rad)
nl_viirs_1 <- read.csv("~/Dropbox/Work/Colabs UCSD/Malaria & Develop [McCord]/Data/export_Points.csv", stringsAsFactors = F) %>%
dplyr::select(-system.index) %>%
gather(date, nl_viirs_1, X2014.01.01T00.00.00:X2019.08.01T00.00.00) %>%
mutate(nl_viirs_1 = as.numeric(nl_viirs_1),
year = as.numeric(str_sub(date,2,5)),
month = as.numeric(str_sub(date,7,8))) %>%
dplyr::select(-date, -.geo, -avg_API, -avg_cases, -nrohab) %>%
filter(year>2008, year<2019, id_loc!="")
2.5.2 VIIRS (time series):
Monthly 2012-2019 (avg_rad)
nl_viirs_2 <- read.csv("~/Dropbox/Work/Colabs UCSD/Malaria & Develop [McCord]/Data/export_Points_2.csv", stringsAsFactors = F) %>%
dplyr::select(-system.index) %>%
gather(date, nl_viirs_2, X2012.04.01T00.00.00:X2019.08.01T00.00.00) %>%
mutate(nl_viirs_2 = as.numeric(nl_viirs_2),
year = as.numeric(str_sub(date,2,5)),
month = as.numeric(str_sub(date,7,8))) %>%
dplyr::select(-date, -.geo, -avg_API, -avg_cases, -nrohab) %>%
filter(year>2008, year<2019, id_loc!="")
2.6 Final Dataset
placeholder <- data.frame(id_loc = rep(st_set_geometry(villages, NULL)$id_loc, each=120),
year = rep(2009:2018, each = 12),
month = rep(1:12, each=1))
dat <- villages %>%
full_join(placeholder, by="id_loc") %>%
full_join(mei, by=c("id_loc","year","month")) %>%
full_join(nl_viirs_1, by=c("id_loc","year","month")) %>%
full_join(nl_viirs_2, by=c("id_loc","year","month")) %>%
full_join(malaria, by=c("id_loc","year","month")) %>%
mutate_at(c("fal","viv"), ~(ifelse(is.na(.),0,.))) %>%
mutate(bin_viv = ifelse(viv>0, 1,0),
bin_fal = ifelse(fal>0, 1,0)) %>%
arrange(year)
vis_miss(dat, warn_large_data = F)