Chapter 2 Exploratory Analysis

2.1 Proportions

library(tidyverse)
library(viridis)
library(colorspace)

t1 %>%
ss = ifelse(SEROPOSITIVE=="Positive", "sero+", "sero-"),
out = paste0(mm, " | ",ss)) %>%
ggplot(aes(x = age_cat, fill = out)) +
geom_bar(position = "fill") +
scale_fill_discrete_sequential(palette="ag_Sunset") +
labs(y = "proportion", x = "Age category", fill = "Outcome") +
theme_bw() +
facet_grid(~nm_sex, labeller = labeller(nm_sex = c("0_female" = "Female", "1_male" = "Male")))

t1 %>%
ss = ifelse(SEROPOSITIVE=="Positive", "sero+", "sero-"),
out = paste0(mm, " | ",ss)) %>%
ggplot(aes(x = age_cat, fill = out)) +
geom_bar(position = "fill") +
scale_fill_discrete_sequential(palette="ag_Sunset") +
labs(y = "proportion", x = "Age category", fill = "Outcome") +
theme_bw() +
facet_grid(~fever, labeller = labeller(fever = c("0" = "No Fever", "1" = "Fever")))

t1 %>%
ss = ifelse(SEROPOSITIVE=="Positive", "sero+", "sero-"),
out = paste0(mm, " | ",ss)) %>%
ggplot(aes(x = age_cat, fill = out)) +
geom_bar(position = "fill") +
scale_fill_discrete_sequential(palette="ag_Sunset") +
labs(y = "proportion", x = "Age category", fill = "Outcome") +
theme_bw() +
facet_grid(~area, labeller = labeller(area = c("0_periurban" = "periurban", "1_rural" = "rural")))

2.2 Maps

library(sf)
library(mapview)

d_map<-d2 %>%
group_by(area, id_house) %>%
summarise(p = mean(sero)) %>%
ungroup() %>%
dplyr::mutate(lat = sf::st_coordinates(.)[,2],
long = sf::st_coordinates(.)[,1])

d_map %>%
filter(area == "0_periurban") %>%
mapview(zcol = "p", legend = TRUE)
d_map %>%
filter(area == "1_rural") %>%
mapview(zcol = "p", legend = TRUE)

2.3 Distance plot

2.3.1 Reference Febrile case

library(magrittr)
library(stringr)

d3 <- d2 %>%
filter(fever==1 | hist_fever==1) %>%
distinct(id_house, .keep_all = T)

m <- st_distance(d2,d3) %>%
as.data.frame() %>%
set_colnames(d3\$id_house) %>%
mutate(id_study = d2\$id_study,
id_muestra = d2\$id_muestra) %>%
relocate(id_study, id_muestra) %>%
gather(target,distance_m,-id_study, -id_muestra) %>%
mutate(distance_m = as.numeric(distance_m),
area_o = str_sub(id_study,1,3),
area_t = str_sub(target,1,3)) %>%
group_by(id_study, id_muestra, area_o, area_t) %>%
summarise(min_dist_m = min(distance_m, na.rm = T)) %>%
filter(area_o==area_t)

d4 <- d2 %>%
inner_join(m, by="id_muestra")

d4 %>%
mutate(dist_cat = cut(min_dist_m, breaks = c(-Inf,10,20,30,40,Inf))) %>%
ggplot(aes(x = dist_cat, fill = SEROPOSITIVE)) +
geom_bar(position = "fill") +
scale_fill_discrete_sequential(palette="BluGrn") +
labs(y = "proportion", x = "Distance category (m)") +
theme_bw() +
facet_grid(~area, labeller = labeller(area = c("0_periurban" = "Periurban", "1_rural" = "Rural")))

2.3.2 Reference Seropositive case

library(magrittr)
library(stringr)

d3 <- d2 %>%
filter(sero==1) %>%
distinct(id_house, .keep_all = T)

m <- st_distance(d2,d3) %>%
as.data.frame() %>%
set_colnames(d3\$id_house) %>%
mutate(id_study = d2\$id_study,
id_muestra = d2\$id_muestra) %>%
relocate(id_study, id_muestra) %>%
gather(target,distance_m,-id_study, -id_muestra) %>%
mutate(distance_m = as.numeric(distance_m),
area_o = str_sub(id_study,1,3),
area_t = str_sub(target,1,3)) %>%
group_by(id_study, id_muestra, area_o, area_t) %>%
summarise(min_dist_m = min(distance_m, na.rm = T)) %>%
filter(area_o==area_t)

d4 <- d2 %>%
inner_join(m, by="id_muestra")

d4 %>%
mutate(dist_cat = cut(min_dist_m, breaks = c(-Inf,10,20,30,40,Inf))) %>%
ggplot(aes(x = dist_cat, fill = SEROPOSITIVE)) +
geom_bar(position = "fill") +
scale_fill_discrete_sequential(palette="BluGrn") +
labs(y = "proportion", x = "Distance category (m)") +
theme_bw() +
facet_grid(~area, labeller = labeller(area = c("0_periurban" = "Periurban", "1_rural" = "Rural")))

2.3.3 Reference PCR Positive

library(magrittr)
library(stringr)

d3 <- d2 %>%
filter(pcr_pos==1) %>%
distinct(id_house, .keep_all = T)

m <- st_distance(d2,d3) %>%
as.data.frame() %>%
set_colnames(d3\$id_house) %>%
mutate(id_study = d2\$id_study,
id_muestra = d2\$id_muestra) %>%
relocate(id_study, id_muestra) %>%
gather(target,distance_m,-id_study, -id_muestra) %>%
mutate(distance_m = as.numeric(distance_m),
area_o = str_sub(id_study,1,3),
area_t = str_sub(target,1,3)) %>%
group_by(id_study, id_muestra, area_o, area_t) %>%
summarise(min_dist_m = min(distance_m, na.rm = T)) %>%
filter(area_o==area_t)

d4 <- d2 %>%
inner_join(m, by="id_muestra")

d4 %>%
mutate(dist_cat = cut(min_dist_m, breaks = c(-Inf,10,20,30,40,Inf))) %>%
ggplot(aes(x = dist_cat, fill = SEROPOSITIVE)) +
geom_bar(position = "fill") +
scale_fill_discrete_sequential(palette="BluGrn") +
labs(y = "proportion", x = "Distance category (m)") +
theme_bw() +
facet_grid(~area, labeller = labeller(area = c("0_periurban" = "Periurban", "1_rural" = "Rural")))