Chapter 2 DATA

2.1 Libraries

rm(list=ls())

library(raster)
library(sf)
library(rasterVis)
library(tidyverse)
library(haven)
library(stringr)
library(lubridate)
library(naniar)

2.2 Villages data

db <- readRDS("~/Dropbox/Work/SESYNC/Projects/Malaria/Village Level [Javier]/Analysis/output/data_sf.RData") %>%
  mutate(id2 = row_number())

villages <- db %>%
  dplyr::select(id_loc, id2) %>%
  inner_join(st_set_geometry(db, NULL) %>%
               dplyr::select(id_loc, nrohab, village.x, commune, province.x, uid, id2) %>%
               distinct(id_loc, .keep_all = T), by=c("id_loc", "id2")) %>%
  filter(id_loc!="")

2.3 Malaria Ecology Index (MEI) data

mei <- read_dta("~/Dropbox/Work/Colabs UCSD/Malaria & Develop [McCord]/Data/MEI_Peru_mnthlygrid_1900_2017.dta") %>%
  zap_formats() %>%
  filter(year>2008, year<2019) %>%
  st_as_sf(coords = c("lon", "lat")) %>%
  nest(-year, -month) %>%
  mutate(sp = map(data, ~as(., "Spatial")),
         #change extent or resolution
         raster = map(sp, ~rasterize(., raster(extent(-81, -65, -20, 2), res = .5), field = "MEI_ike_udel", fun="sum")),
         extracted = map(raster, ~raster::extract(., villages, fun=max, df=TRUE, sp=T)),
         data.extracted = map(extracted, ~.@data)) %>%
  dplyr::select(year, month, data.extracted) %>%
  unnest(cols = c(data.extracted)) %>%
  dplyr::select(year, month, id_loc, layer) %>%
  rename(MEI_ike_udel=layer) %>%
  filter(id_loc!="")

2.4 Malaria Cases data

malaria <- st_set_geometry(db, NULL) %>%
  mutate(month = month(fecha_not),year = ano) %>%
  filter(year>2008, year<2019, diagno!="MALARIA POR P. MALARIAE", id_loc!="") %>%
  group_by(year, month, id_loc, diagno) %>%
  summarise(cases = n()) %>%
  mutate(dx = ifelse(diagno=="MALARIA P. FALCIPARUM","fal","viv")) %>%
  dplyr::select(-diagno) %>%
  spread(dx,cases, fill = 0) %>%
  ungroup() %>%
  group_by(id_loc,year,month) %>%
  distinct(.keep_all = T)

2.5 Nighttime Lights (NL) data

2.5.1 VIIRS (Stray corrected):

Monthly 2014-2019 (avg_rad)

nl_viirs_1 <- read.csv("~/Dropbox/Work/Colabs UCSD/Malaria & Develop [McCord]/Data/export_Points.csv", stringsAsFactors = F) %>%
  dplyr::select(-system.index) %>%
  gather(date, nl_viirs_1, X2014.01.01T00.00.00:X2019.08.01T00.00.00) %>%
  mutate(nl_viirs_1 = as.numeric(nl_viirs_1),
         year = as.numeric(str_sub(date,2,5)),
         month = as.numeric(str_sub(date,7,8))) %>%
  dplyr::select(-date, -.geo, -avg_API, -avg_cases, -nrohab) %>%
  filter(year>2008, year<2019, id_loc!="")

2.5.2 VIIRS (time series):

Monthly 2012-2019 (avg_rad)

nl_viirs_2 <- read.csv("~/Dropbox/Work/Colabs UCSD/Malaria & Develop [McCord]/Data/export_Points_2.csv", stringsAsFactors = F) %>%
  dplyr::select(-system.index) %>%
  gather(date, nl_viirs_2, X2012.04.01T00.00.00:X2019.08.01T00.00.00) %>%
  mutate(nl_viirs_2 = as.numeric(nl_viirs_2),
         year = as.numeric(str_sub(date,2,5)),
         month = as.numeric(str_sub(date,7,8))) %>%
  dplyr::select(-date, -.geo, -avg_API, -avg_cases, -nrohab) %>%
  filter(year>2008, year<2019, id_loc!="")

2.6 Final Dataset

placeholder <- data.frame(id_loc = rep(st_set_geometry(villages, NULL)$id_loc, each=120),
                          year = rep(2009:2018, each = 12),
                          month = rep(1:12, each=1))

dat <- villages %>%
  full_join(placeholder, by="id_loc") %>%
  full_join(mei, by=c("id_loc","year","month")) %>%
  full_join(nl_viirs_1, by=c("id_loc","year","month")) %>%
  full_join(nl_viirs_2, by=c("id_loc","year","month")) %>%
  full_join(malaria, by=c("id_loc","year","month")) %>%
  mutate_at(c("fal","viv"), ~(ifelse(is.na(.),0,.))) %>%
  mutate(bin_viv = ifelse(viv>0, 1,0),
         bin_fal = ifelse(fal>0, 1,0)) %>%
  arrange(year)

vis_miss(dat, warn_large_data = F)