Chapter 1 Data Wrangling
1.1 Socio-demographic data
rm(list=ls())
library(tidyverse)
library(skimr)
library(sf)
icemr <- read.csv("./_data/JASON/ICEMR2.0_P1_longJul_v1_20190406.csv", stringsAsFactors = F) %>%
filter(!is.na(id_muestra)) %>%
dplyr::select(id_muestra, id_house, id_study, edad, nm_sex,
nm_level_study, viaje_ult_mes, lat, long, resultado_micro,
especie_micro, temp_axilar,hist_fever,main_act_ec) %>% # FALTA date_fever
mutate(id_study = as.numeric(id_study),
long = as.numeric(long))
cam <- read.csv("./_data/JASON/Master 20180905_ON.csv", stringsAsFactors = F) %>%
dplyr::select(id_muestra, id_house, id_study, nm_age_int, nm_sex,
nm_level_study, ce_travel, latitud, longitud, resultado_micro,
especie_micro, ce_temp_ax, ce_temp,ce_economic_act) %>%
mutate(latitud = as.numeric(latitud))
colnames(cam) <- colnames(icemr)
1.2 Lab data
1.3 Assembling
d1 <- cam %>%
bind_rows(icemr) %>%
inner_join(sero, by= "id_muestra")
skim(d1)
coord <- st_read("./_data/ser_data.shp")
d2 <- coord %>%
distinct(.keep_all = T) %>%
inner_join(d1, by= "id_house") %>%
mutate(sero = ifelse(SEROPOSITIVE == "Positive",1,0),
fever = ifelse(temp_axilar<37.5,0,1),
area = as.factor(ifelse(as.numeric(as.character(comm))<600,"periurban","rural")),
age_cat = cut(edad, breaks = c(-Inf,5,15,30,50,Inf)),
nm_sex = as.factor(ifelse(as.numeric(as.character(nm_sex))<1,"Female","Male"))) %>%
mutate_at(c("nm_sex", "nm_level_study", "viaje_ult_mes", "resultado_micro", "especie_micro", "fever",
"area"), as.factor)
skim(d2)