Chapter 4 DENGUE DATA
4.1 Init
library(tidyverse)
library(lubridate)
library(rEDM)
library(ISOweek)
library(stringr)
library(cowplot)
library(purrr)
library(naniar)
library(ggsci)
4.2 Data
rm(list = ls())
load("./_data/climate_vars/Data.RData")
<- read_csv("_data/LOR_dengue.csv")
den
<- nl %>%
nl select(-ntl2014)
<- readRDS("./_data/climate_vars/NightTimeLight.rds")
nl2
<- unique(den$ubigeo)
ubi_den <- unique(temp$UBIGEO) %>% as.numeric()
ubi_clim
unique(ubi_clim[! ubi_clim %in% ubi_den]) # Rosa Panduro creado 2014
## [1] 160802
4.3 Assembly Climate
<- do.call("list",mget(ls())) %>%
data_frames enframe() %>%
#filter(name == "nl2") %>%
filter(!name %in% c("den", "ubi_clim","ubi_den", "data_frames")) %>%
mutate(dat_edit = map(.x = value,
.f = ~gather(.x, var_date, value, 5:last_col()) %>%
separate(var_date, into = c("var", "date"),
sep = "(?<=[A-Za-z])(?=[_0-9])") %>%
mutate(date = str_sub(date, -7,-1)) %>%
separate(date, into = c("year", "month"),
sep = "-", convert = T) %>%
mutate(month = ifelse(is.na(month), 1, month)))) %>%
select(name, dat_edit) %>%
unnest(cols = "dat_edit") %>%
select(-name) %>%
spread(var, value) %>%
filter(year<2021) %>%
mutate(UBIGEO = as.numeric(UBIGEO))
# 53 districts * 12 months * 21 years = 13,356
# a <- nl2 %>%
# gather(var_date, value, 5:last_col()) %>%
# separate(var_date,
# into = c("var", "date"),
# sep = "(?<=[A-Za-z])(?=[_0-9])"
# ) %>%
# mutate(date = str_sub(date, -7,-1)) %>%
# separate(date,
# into = c("year", "month"),
# sep = "-",
# convert = T) %>%
# mutate(month = ifelse(is.na(month), 1, month))
vis_miss(data_frames %>%
arrange(year, month))
4.4 Assembly Cases
<- den %>%
den_proto filter(complete.cases(.),
<2021) %>%
yyexpand(ubigeo, yy, mm) %>% # 52 districts * 12 months * 21 years = 13,104
full_join(den %>%
select(ubigeo, yy, mm, tot_casos) %>%
filter(yy<2021), by = c("ubigeo", "yy", "mm")) %>%
mutate(tot_casos = replace_na(tot_casos, 0)) %>%
full_join(den %>%
distinct(ubigeo, departam, provincia, distrito), by = "ubigeo") %>%
rename(UBIGEO = ubigeo,
year = yy,
month = mm,
department = departam,
province = provincia,
district = distrito,
den_cases = tot_casos)
<- den_proto %>%
dat inner_join(data_frames, by = c("UBIGEO", "year", "month")) %>%
mutate(date = ymd(paste(year, month, "01", sep = "-")))
vis_miss(dat %>%
arrange(year, month))
write_csv(dat, "_data/LOR_dengue_clim.csv", na = "")
4.5 EDA
<- dat %>%
top8 group_by(district) %>%
summarise(cases = sum(den_cases)) %>%
arrange(-cases) %>%
slice(1:8) %>%
select(district) %>%
unlist()
<- c("IQUITOS","YURIMAGUAS", "NAUTA", "RAMON CASTILLA",
capitals "REQUENA", "CONTAMANA", "BARRANCA", "PUTUMAYO")
<- dat %>%
plot mutate_at(.vars = c(4, 11:20),
.funs = ~scale_this(.)) %>%
select(7, 21, 4, 11:20) %>%
gather(var, val, 3:13) %>%
filter(district %in% top8) %>%
#filter(district %in% capitals) %>%
filter(var %in% c("pdsi", "pr", "soil", "tmmx", "den_cases"))
<- plot %>%
plot_cases filter(var != "den_cases") %>%
inner_join(plot %>%
filter(var == "den_cases") %>%
select(-var), by = c("district", "date"))
%>%
plot filter(var != "den_cases") %>%
ggplot(aes(x = date, col = var, group = var)) +
geom_line(aes(y = val), size =.3) +
geom_line(dat = plot_cases, aes(y = val.y), col = "black", size =.3) +
facet_wrap(factor(district, levels = top8)~var,
scales = "free_y", ncol = 4) +
scale_x_date(date_breaks = "5 year", date_labels = "%Y")+
theme_bw() +
scale_color_npg() +
theme(legend.position = "bottom")
ggsave("./_out/time_series.png", width = 7, height = 10)