Chapter 1 Descriptive Analysis

1.1 Table 1

t1 <- dat %>%
  dplyr::select(area, comm, edad, age_cat, nm_sex, edu_cat,
                work_out, travel_1m,TREATMENT,
                resultado_micro, fever, temp_axilar,
                hist_fever,SEROPOSITIVE)

library(furniture)

t1 %>%
  table1(splitby = ~SEROPOSITIVE, na.rm = F, 
         NAkeep = T, total = T,
         output = "html")
Total Negative Positive
n = 1790 n = 917 n = 873
area
0_periurban 785 (43.9%) 481 (52.5%) 304 (34.8%)
1_rural 1005 (56.1%) 436 (47.5%) 569 (65.2%)
NA 0 (0%) 0 (0%) 0 (0%)
comm
501_Rumococha 250 (14%) 165 (18%) 85 (9.7%)
502_Santo Tomás 273 (15.3%) 132 (14.4%) 141 (16.2%)
503_Quistococha 262 (14.6%) 184 (20.1%) 78 (8.9%)
901_Gamitanacocha 47 (2.6%) 6 (0.7%) 41 (4.7%)
902_Libertad 179 (10%) 45 (4.9%) 134 (15.3%)
903_Primero de Enero 58 (3.2%) 18 (2%) 40 (4.6%)
904_Salvador 270 (15.1%) 144 (15.7%) 126 (14.4%)
905_Lago Yuracyacu 97 (5.4%) 67 (7.3%) 30 (3.4%)
906_Puerto Alegre 166 (9.3%) 84 (9.2%) 82 (9.4%)
907_Urco Miraño 188 (10.5%) 72 (7.9%) 116 (13.3%)
NA 0 (0%) 0 (0%) 0 (0%)
edad
29.0 (22.0) 20.6 (18.8) 37.8 (21.8)
age_cat
[0,5] 178 (9.9%) 147 (16%) 31 (3.6%)
(5,15] 568 (31.7%) 405 (44.2%) 163 (18.7%)
(15,30] 299 (16.7%) 148 (16.1%) 151 (17.3%)
(30,50] 388 (21.7%) 126 (13.7%) 262 (30%)
(50,117] 357 (19.9%) 91 (9.9%) 266 (30.5%)
NA 0 (0%) 0 (0%) 0 (0%)
nm_sex
0_female 973 (54.4%) 536 (58.5%) 437 (50.1%)
1_male 817 (45.6%) 381 (41.5%) 436 (49.9%)
NA 0 (0%) 0 (0%) 0 (0%)
edu_cat
Primary or less 1183 (66.1%) 582 (63.5%) 601 (68.8%)
Secondary or Superior 607 (33.9%) 335 (36.5%) 272 (31.2%)
NA 0 (0%) 0 (0%) 0 (0%)
work_out
Inside 1278 (71.4%) 798 (87%) 480 (55%)
Outside 512 (28.6%) 119 (13%) 393 (45%)
NA 0 (0%) 0 (0%) 0 (0%)
travel_1m
No 1436 (80.2%) 794 (86.6%) 642 (73.5%)
Yes 349 (19.5%) 120 (13.1%) 229 (26.2%)
NA 5 (0.3%) 3 (0.3%) 2 (0.2%)
TREATMENT
No treatment 917 (51.2%) 917 (100%) 0 (0%)
Treatment 873 (48.8%) 0 (0%) 873 (100%)
NA 0 (0%) 0 (0%) 0 (0%)
resultado_micro
Negative 1729 (96.6%) 894 (97.5%) 835 (95.6%)
Positive 38 (2.1%) 12 (1.3%) 26 (3%)
NA 23 (1.3%) 11 (1.2%) 12 (1.4%)
fever
No 1764 (98.5%) 900 (98.1%) 864 (99%)
Yes 26 (1.5%) 17 (1.9%) 9 (1%)
NA 0 (0%) 0 (0%) 0 (0%)
temp_axilar
36.2 (0.5) 36.2 (0.5) 36.2 (0.5)
hist_fever
No 1572 (87.8%) 821 (89.5%) 751 (86%)
Yes 218 (12.2%) 96 (10.5%) 122 (14%)
NA 0 (0%) 0 (0%) 0 (0%)
SEROPOSITIVE
Negative 917 (51.2%) 917 (100%) 0 (0%)
Positive 873 (48.8%) 0 (0%) 873 (100%)
NA 0 (0%) 0 (0%) 0 (0%)
t1 %>%
  table1(splitby = ~area, na.rm = F, 
         NAkeep = T, total = T,
         output = "html")
Total 0_periurban 1_rural
n = 1790 n = 785 n = 1005
area
0_periurban 785 (43.9%) 785 (100%) 0 (0%)
1_rural 1005 (56.1%) 0 (0%) 1005 (100%)
NA 0 (0%) 0 (0%) 0 (0%)
comm
501_Rumococha 250 (14%) 250 (31.8%) 0 (0%)
502_Santo Tomás 273 (15.3%) 273 (34.8%) 0 (0%)
503_Quistococha 262 (14.6%) 262 (33.4%) 0 (0%)
901_Gamitanacocha 47 (2.6%) 0 (0%) 47 (4.7%)
902_Libertad 179 (10%) 0 (0%) 179 (17.8%)
903_Primero de Enero 58 (3.2%) 0 (0%) 58 (5.8%)
904_Salvador 270 (15.1%) 0 (0%) 270 (26.9%)
905_Lago Yuracyacu 97 (5.4%) 0 (0%) 97 (9.7%)
906_Puerto Alegre 166 (9.3%) 0 (0%) 166 (16.5%)
907_Urco Miraño 188 (10.5%) 0 (0%) 188 (18.7%)
NA 0 (0%) 0 (0%) 0 (0%)
edad
29.0 (22.0) 30.6 (21.8) 27.7 (22.2)
age_cat
[0,5] 178 (9.9%) 52 (6.6%) 126 (12.5%)
(5,15] 568 (31.7%) 227 (28.9%) 341 (33.9%)
(15,30] 299 (16.7%) 169 (21.5%) 130 (12.9%)
(30,50] 388 (21.7%) 176 (22.4%) 212 (21.1%)
(50,117] 357 (19.9%) 161 (20.5%) 196 (19.5%)
NA 0 (0%) 0 (0%) 0 (0%)
nm_sex
0_female 973 (54.4%) 461 (58.7%) 512 (50.9%)
1_male 817 (45.6%) 324 (41.3%) 493 (49.1%)
NA 0 (0%) 0 (0%) 0 (0%)
edu_cat
Primary or less 1183 (66.1%) 427 (54.4%) 756 (75.2%)
Secondary or Superior 607 (33.9%) 358 (45.6%) 249 (24.8%)
NA 0 (0%) 0 (0%) 0 (0%)
work_out
Inside 1278 (71.4%) 688 (87.6%) 590 (58.7%)
Outside 512 (28.6%) 97 (12.4%) 415 (41.3%)
NA 0 (0%) 0 (0%) 0 (0%)
travel_1m
No 1436 (80.2%) 768 (97.8%) 668 (66.5%)
Yes 349 (19.5%) 17 (2.2%) 332 (33%)
NA 5 (0.3%) 0 (0%) 5 (0.5%)
TREATMENT
No treatment 917 (51.2%) 481 (61.3%) 436 (43.4%)
Treatment 873 (48.8%) 304 (38.7%) 569 (56.6%)
NA 0 (0%) 0 (0%) 0 (0%)
resultado_micro
Negative 1729 (96.6%) 742 (94.5%) 987 (98.2%)
Positive 38 (2.1%) 20 (2.5%) 18 (1.8%)
NA 23 (1.3%) 23 (2.9%) 0 (0%)
fever
No 1764 (98.5%) 771 (98.2%) 993 (98.8%)
Yes 26 (1.5%) 14 (1.8%) 12 (1.2%)
NA 0 (0%) 0 (0%) 0 (0%)
temp_axilar
36.2 (0.5) 36.2 (0.5) 36.1 (0.5)
hist_fever
No 1572 (87.8%) 719 (91.6%) 853 (84.9%)
Yes 218 (12.2%) 66 (8.4%) 152 (15.1%)
NA 0 (0%) 0 (0%) 0 (0%)
SEROPOSITIVE
Negative 917 (51.2%) 481 (61.3%) 436 (43.4%)
Positive 873 (48.8%) 304 (38.7%) 569 (56.6%)
NA 0 (0%) 0 (0%) 0 (0%)

1.2 Descriptives

1.3 Proportions

library(tidyverse)
library(ggmosaic)
library(innovar)

prop_bar <- function(cat_var, title = NULL, l.pos = "none", labx = "") {
  
  title1 <- if (is.null(title)) {expression(cat_var)} else {title}
  
  var1 <- enquo(cat_var)
  
  t1 %>%
  filter(!is.na(resultado_micro)) %>%
  mutate(area = ifelse(area == "0_periurban", "Periurban", "Rural"),
         nm_sex = ifelse(nm_sex == "0_female", "Female", "Male"),
         var2 = !!var1,
         mm = ifelse(resultado_micro=="Positive", "micro+", "micro-"),
         ss = ifelse(SEROPOSITIVE=="Positive", "sero+", "sero-"),
         out = paste0(mm, " | ",ss),
         age_cat = factor(age_cat),
         out = factor(out),
         out = fct_rev(out)) %>%
    filter(!is.na(var2)) %>%
  
  ggplot() +
  geom_mosaic(aes(x = product(out, age_cat), fill = out)#, 
                  #offset = 0.1
                  ) +
    scale_fill_innova(palette = "mlobo") +
  # scale_fill_discrete_sequential(palette="Heat 2",
  #                                rev = F) +
    # scale_fill_manual(values=c("#1b4332", "#b7e4c7", 
    #                            "#006d77", "#83c5be")) +
  labs(y = "proportion", fill = "Outcome", x = labx) +
  theme_minimal() +
  facet_grid(~var2) +
  ggtitle(title1) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = l.pos)

  
}

f1a <- prop_bar(area, "Area")
f1b <- prop_bar(nm_sex, "Sex")
f1c <- prop_bar(work_out, "Activities outside village")
f1d <- prop_bar(travel_1m, "Travel in the last month")
f1e <- prop_bar(edu_cat, "Education", labx = "Age category")
f1f <- prop_bar(hist_fever, "Fever in last week", labx = "Age category")

library(cowplot)

legend <- get_legend(
  prop_bar(hist_fever, "Fever in last week", "bottom") +
    theme(legend.box.margin = margin(0, 0, 0, 12))
)

lplot <- plot_grid(legend)

plots <- plot_grid(f1a, f1b, f1c, f1d, f1e, f1f, ncol = 2)

p2 <- plot_grid(plots, lplot, ncol = 1, rel_heights = c(9,1))
p2

1.4 Maps

1.4.1 Static

library(maptools)
library(ggspatial)

# library(ggmap)
# map <- get_map(location = st_bbox(d2) %>% as.numeric(), maptype = "watercolor", source = "osm", zoom = 10)
# 
# ggmap(map) + 
#   coord_sf(crs = st_crs(3857)) +
#   geom_sf(data = d2 %>%
#             group_by(id_house) %>%
#             summarise(p = mean(sero)), 
#           aes(col = p), inherit.aes = FALSE, alpha=.4) +
#   scale_color_viridis(option = "magma") +
#   labs(col = "Household \nSeropositivity Rate")


library(basemaps)

# get_maptypes()

set_defaults(map_service = "carto",
             map_type = "light_no_labels")

# basemap_ggplot(d2)

data1 = d2 %>%
            group_by(id_house) %>%
            summarise(p = mean(sero)) %>%
  st_transform(crs = 3857)


buffer <- st_bbox(d2) %>%
  st_as_sfc() %>%
  st_buffer(10000)

p1 <- ggplot() +
  basemap_gglayer(buffer) +
  coord_sf(expand = F) +
  scale_fill_identity() +
  geom_sf(data = data1, aes(col = p)) +
  coord_sf(expand = F) +
  labs(col = "Household \nSeropositivity rate") +
  scale_color_viridis_c(option = "magma") + 
  annotation_scale(location = "br") +
  coord_sf(expand = F) +
  annotation_north_arrow(location = "tl",
                         style = north_arrow_nautical) +
  coord_sf(expand = F) +
  #theme_void() + 
  theme(panel.background = element_rect(fill = "white", 
                                        colour = "black"),
        legend.position = "bottom") +
  guides(colour = guide_colourbar(title.position="top", 
                                  title.hjust = 0.5),
         size = guide_legend(title.position="top", 
                             title.hjust = 0.5))
## Loading basemap 'light_no_labels' from map service 'carto'...
plot_grid(p1, p2, nrow = 1, labels = c("A)", "B)"),
          rel_widths = c(4,5))

ggsave("./_out/fig1.png", width = 14, height = 8,
       dpi = "retina", bg = "white")

1.4.2 Dynamic

library(leaflet)

dat_ci_map <- d2 %>%
  group_by(id_house) %>%
  summarise(p = mean(sero, na.rm = T), area = first(area), age = mean(edad, na.rm = T)) %>%
  filter(!is.na(p)) %>%
  filter(!is.infinite(p)) %>%
  dplyr::mutate(lat = sf::st_coordinates(.)[,2],
                long = sf::st_coordinates(.)[,1])

pal <- colorBin("inferno", bins = seq(0,1,.1), reverse=T) 

dat_ci_map %>%
  leaflet() %>%
  addTiles() %>%
  addCircles(lng = ~long, lat = ~lat, color = ~ pal(p),
             popup = paste("ID Household", dat_ci_map$id_house, "<br>",
                           "Area:", dat_ci_map$area, "<br>",
                           "Mean age:", dat_ci_map$age, "<br>",
                           "Prevalence:", dat_ci_map$p)) %>%
  addLegend("bottomright",
            pal = pal, values = ~p,
            title = "Prevalence") %>%
  addScaleBar(position = c("bottomleft"))