Chapter 3 Analysis Wealth

3.1 Processing - Wealth

rm(list = ls())
library(IC2)
library(codebook)
library(tidyverse)

dat <- read.csv("./_dat/dat_mal_ineq_v3.csv")
source("./mal_ineq_fun.R")

# epiDisplay::tab1(dat$rapid_test)

#codebook_browser(dat)


# ==== NOT RUN =======
# ao <- dat %>%
#   filter(country == "SN") %>%
#   dplyr::select(rapid_test, wealth, w) %>%
#   filter(complete.cases(.))
# 
# ao_ci <- IC2::calcSConc(x = ao$rapid_test, 
#                         y = ao$wealth, 
#                         w = ao$w)
# ==== NOT RUN =======

dat.n <- dat %>%
  group_by(country, REGNAME, cluster_number) %>%
  nest() %>%
  mutate(sample.size.N = map(.x = data, .f = ~dplyr::select(.x, rapid_test, wealth, w) %>%
                               count(name = "N") %>% unlist()),
         sample.size.m = map(.x = data, .f = ~dplyr::select(.x, rapid_test, wealth, w) %>%
                               filter(!is.na(rapid_test)) %>%
                               count(name = "N_m") %>% unlist()),
         dat_s = map(.x = data, .f = ~dplyr::select(.x, rapid_test, wealth, w) %>%
                       filter(complete.cases(.))),
         sample.size = map(.x = dat_s, .f = ~count(.x) %>% unlist()),
         prev = map(.x = dat_s, .f = ~mean(.x$rapid_test, na.rm=T)),
         wealth_cats = map(.x = dat_s, .f = ~distinct(.x, wealth) %>% nrow())) %>%
  # FILTERS =======================
  filter(prev > 0 & prev < 1) %>%
  filter(sample.size>=10) %>%
  filter(wealth_cats>1)

dat_ci.n <- dat.n  %>%
  mutate(ci = map(.x = dat_s, .f = ~IC2::calcSConc(x = .x$rapid_test,
                        y = .x$wealth,
                        w = .x$w)),
         ci_val = map(.x = ci, .f = ~.x$ineq$index),
         h_calc = map(.x = dat_s, .f = ~h_ineq(dat = .x, var_soc = wealth, var_outcome = rapid_test))
         )
  
dat_ci <- dat_ci.n %>%
  dplyr::select(country, cluster_number, sample.size.N, sample.size.m, sample.size, prev, wealth_cats, ci_val, h_calc) %>%
  unnest() %>%
  ungroup() 

# Checks ========
dat_ci %>%
  ggplot(aes(x=c_index, y = ci_val)) +
  geom_point(alpha = .4) +
  labs(x = "hand calculation", y = "package calculation")

# ==== NOT RUN =======
# unique(dat.n$country)
# unique(dat_ci$country)
# 
# range(dat$w)
# range(dat_ci$N_m)
# range(dat_ci$n)
# range(dat_ci$ci_val, na.rm = T)
# 
# hist(dat_ci$N_m)
# hist(dat_ci$n)
# hist(dat_ci$ci_val)
# ==== NOT RUN =======

#saveRDS(dat_ci, "./_dat/dat_ci.rds")

3.2 Spatial data - Wealth

## Reading layer `DHS_adm' from data source `/Users/gabrielcarrasco/Dropbox/Work/Tarik LAB/Malaria Ineq/mal_ineq/_dat/SHP/union/DHS_adm.shp' using driver `ESRI Shapefile'
## Simple feature collection with 147 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -13.30198 ymin: -26.86819 xmax: 50.49459 ymax: 15.7047
## Geodetic CRS:  WGS 84

3.3 Distributions - Wealth

Supplementary Figure 01

library(cowplot)

a <- hist_ineq(dat_ci, sample.size, "Sample Size")
b <- hist_ineq(dat_ci, prev, "Prevalence")
c <- bi_hist_ineq(dat_ci, var_x = prev, var_y = sample.size, lab_x = "Prevalence", lab_y = "Sample Size")

(dist <- plot_grid(a,b,c, labels = c("A)", "B)", "C)"), nrow = 1))

3.4 CI - Wealth

3.4.1 Plots CI - Wealth

Supplementary Figure 10

library(cowplot)

ci_box_10 <- dat_ci %>% filter(sample.size>=10) %>% ci_box(var = ci_val)
ci_box_20 <- dat_ci %>% filter(sample.size>=20) %>% ci_box(var = ci_val)
ci_box_30 <- dat_ci %>% filter(sample.size>=30) %>% ci_box(var = ci_val)

ci_box1 <- plot_grid(ci_box_10, ci_box_20, ci_box_30, labels = c("A)", "B)", "C)"), ncol = 1)

ci_prev_20 <- dat_ci %>% filter(sample.size>=20) %>% ci_prev(var = ci_val)
ci_prev_10 <- dat_ci %>% filter(sample.size>=10) %>% ci_prev(var = ci_val)
ci_prev_30 <- dat_ci %>% filter(sample.size>=30) %>% ci_prev(var = ci_val)

ci_prev1 <- plot_grid(ci_prev_10, ci_prev_20, ci_prev_30, labels = c("A)", "B)", "C)"), ncol = 1)

(sf10 <- plot_grid(ci_box1, ci_prev1, ncol = 2))

3.4.2 Maps CI - Wealth

Supplementary Figure 09

# library(mapview)
# 
# m1 <- dat_ci_map %>%
#   filter(!is.na(ci_val)) %>%
#   mapview(zcol = "ci_val", legend = TRUE, layer.name = "CI (psu)")
# 
# m2 <- dat_ci_adm %>%
#   mapview(zcol = "ci_val", legend = TRUE, layer.name = "CI (Adm)")
# 
# m1 + m2

library(colorspace)

sf9_a <- ggplot() +
  geom_sf(data = sPDF, fill = "grey") + 
  geom_sf(data = dat_ci_map %>%
            filter(!is.na(ci_val)),
          aes(col = ci_val), size = .5, alpha =.6) +
  geom_sf(data = sPDF, fill = NA) + 
  scale_color_continuous_diverging(palette = "Tropic") +
  labs(color = "CI") +
  theme_bw()

sf9_b <- ggplot() +
  geom_sf(data = sPDF, fill = "grey") + 
  geom_sf(data = dat_ci_adm, aes(fill = ci_val),
          size = 0) +
  geom_sf(data = sPDF, fill = NA) + 
  scale_fill_continuous_diverging(palette = "Tropic") +
  labs(fill = "CI") +
  theme_bw()

(sf9 <- plot_grid(sf9_a, sf9_b, ncol = 2, 
                  labels = c("A)","B)")))

3.5 SII - Wealth

3.5.1 Plots SII - Wealth

Supplementary Figure 02

library(cowplot)

sii_box_10 <- dat_ci %>% filter(sample.size>=10) %>% ci_box(var = sii, y_lab = "SII")
sii_box_20 <- dat_ci %>% filter(sample.size>=20) %>% ci_box(var = sii, y_lab = "SII")
sii_box_30 <- dat_ci %>% filter(sample.size>=30) %>% ci_box(var = sii, y_lab = "SII")

sii_box <- plot_grid(sii_box_10, sii_box_20, sii_box_30, labels = c("A)", "B)", "C)"), ncol = 1)

sii_prev_10 <- dat_ci %>% filter(sample.size>=10) %>% ci_prev(var = sii, y_lab = "SII")
sii_prev_20 <- dat_ci %>% filter(sample.size>=20) %>% ci_prev(var = sii, y_lab = "SII")
sii_prev_30 <- dat_ci %>% filter(sample.size>=30) %>% ci_prev(var = sii, y_lab = "SII")

sii_prev <- plot_grid(sii_prev_10, sii_prev_20, sii_prev_30, labels = c("A)", "B)", "C)"), ncol = 1)

(sf2 <- plot_grid(sii_box, sii_prev, ncol = 2))

3.5.2 Maps SII - Wealth

Figure 01_a

Supplementary Figure 07_a

# library(mapview)
# 
# m1 <- dat_ci_map %>%
#   filter(!is.na(sii)) %>%
#   mapview(zcol = "sii", legend = TRUE, layer.name = "SII (psu)")
# 
# m2 <- dat_ci_adm %>%
#   mapview(zcol = "sii", legend = TRUE, layer.name = "SII (Adm)")
# 
# m1 + m2

library(colorspace)

(sf7_a <- ggplot() +
  geom_sf(data = sPDF, fill = "grey") + 
  geom_sf(data = dat_ci_map %>%
            filter(!is.na(sii)),
          aes(col = sii), size = .5, alpha =.6) +
  geom_sf(data = sPDF, fill = NA) + 
  scale_color_continuous_diverging(palette = "Tropic") +
  labs(tag = "A)", color = "SII") +
  theme_bw())

(f1_a <- ggplot() +
  geom_sf(data = sPDF, fill = "grey") + 
  geom_sf(data = dat_ci_adm, aes(fill = sii),
          size = 0) +
  geom_sf(data = sPDF, fill = NA) + 
  scale_fill_continuous_diverging(palette = "Tropic") +
  labs(tag = "A)", fill = "SII") +
  theme_bw())

3.6 RII - Wealth

3.6.1 Plots RII - Wealth

Supplementary Figure 03

library(cowplot)
rii_box_10 <- dat_ci_map %>% filter(sample.size>=10) %>% filter(rii>0) %>% ci_box(var = rii, y_lab = "log RII", r = T) + scale_y_log10()
rii_box_20 <- dat_ci_map %>% filter(sample.size>=20) %>% filter(rii>0) %>% ci_box(var = rii, y_lab = "log RII", r = T) + scale_y_log10()
rii_box_30 <- dat_ci_map %>% filter(sample.size>=30) %>% filter(rii>0) %>% ci_box(var = rii, y_lab = "log RII", r = T) + scale_y_log10()

rii_box <- plot_grid(rii_box_10, rii_box_20, rii_box_30, labels = c("A)", "B)", "C)"), ncol = 1)

rii_prev_10 <- dat_ci_map %>% filter(sample.size>=10) %>% filter(rii>0) %>% ci_prev(var = rii, y_lab = "log RII", r = T) + scale_y_log10()
rii_prev_20 <- dat_ci_map %>% filter(sample.size>=20) %>% filter(rii>0) %>% ci_prev(var = rii, y_lab = "log RII", r = T) + scale_y_log10()
rii_prev_30 <- dat_ci_map %>% filter(sample.size>=30) %>% filter(rii>0) %>% ci_prev(var = rii, y_lab = "log RII", r = T) + scale_y_log10()

rii_prev <- plot_grid(rii_prev_10, rii_prev_20, rii_prev_30, labels = c("A)", "B)", "C)"), ncol = 1)

(sf3 <- plot_grid(rii_box, rii_prev, ncol = 2))

3.6.2 Maps RII - Wealth

Figure 02_a

Supplementary Figure 08_a

# library(mapview)
# 
# m1 <- dat_ci_map %>%
#   filter(!is.na(rii)) %>%
#   mutate(log_rii = log(rii)) %>%
#   filter(log_rii != Inf & log_rii != -Inf) %>%
#   # filter(quantile(rii, .1, na.rm = T)<=rii) %>%
#   # filter(quantile(rii, .9, nna.rm = T)>=rii) %>%
#   mapview(zcol = "log_rii", legend = TRUE, layer.name = "log RII (psu)")
# 
# m2 <- dat_ci_adm %>%
#   filter(!is.na(rii)) %>%
#   mutate(log_rii = log(rii)) %>%
#   filter(log_rii != Inf & log_rii != -Inf) %>%
#   # filter(quantile(rii, .1, na.rm = T)<=rii) %>%
#   # filter(quantile(rii, .9, nna.rm = T)>=rii) %>%
#   mapview(zcol = "log_rii", legend = TRUE, layer.name = "log RII (Adm)")
# 
# m1 + m2

library(colorspace)

(sf8_a <- ggplot() +
  geom_sf(data = sPDF, fill = "grey") + 
  geom_sf(data = dat_ci_map %>%
            filter(!is.na(rii)) %>%
            mutate(log_rii = log(rii)) %>%
            filter(log_rii != Inf & log_rii != -Inf),
          aes(col = log_rii), size = .5, alpha =.6) +
  geom_sf(data = sPDF, fill = NA) + 
  scale_color_continuous_diverging(palette = "Tropic") +
  labs(tag = "A)", color = "log RII") +
  theme_bw())

(f2_a <- ggplot() +
  geom_sf(data = sPDF, fill = "grey") + 
  geom_sf(data = dat_ci_adm %>%
            filter(!is.na(rii)) %>%
            mutate(log_rii = log(rii)) %>%
            filter(log_rii != Inf & log_rii != -Inf), 
          aes(fill = log_rii),
          size = 0) +
  geom_sf(data = sPDF, fill = NA) + 
  scale_fill_continuous_diverging(palette = "Tropic") +
  labs(tag = "A)", fill = "log RII") +
  theme_bw())

3.7 Summary - Wealth

Supplementary Figure 06_a

library(cowplot)
a <- dat_ci_map %>%
  filter(!is.na(rii)) %>%
  mutate(log_rii = log(rii)) %>%
  filter(log_rii != Inf & log_rii != -Inf) %>%
  bi_hist_ineq(var_x = ci_val, var_y = log_rii, lab_x = "Concentration Index", lab_y = "log Relative Index of Inequality")

b <- dat_ci_map %>%
  filter(!is.na(rii)) %>%
  mutate(log_rii = log(rii)) %>%
  filter(log_rii != Inf & log_rii != -Inf) %>%
  bi_hist_ineq(var_x = sii, var_y = log_rii, lab_x = "Slope Index of Inequality", lab_y = "log Relative Index of Inequality (p10th-90th)")

c <- dat_ci_map %>%
  bi_hist_ineq(var_x = ci_val, var_y = sii, lab_x = "Concentration Index", lab_y = "Slope Index of Inequality")

(indexes <- plot_grid(a,b,c, labels = c("A)", "B)","C)"), nrow = 1))

3.7.1 Text - Wealth

# Variability SII (wealth)
dat_ci %>% 
  group_by(country) %>%
  summarise(sd = sd(sii)) %>%
  View()

# Variability RII (wealth)
dat_ci %>% 
  group_by(country) %>%
  summarise(sd = sd(rii)) %>%
  View()

# Variability SII (edu)
dat_edu %>% 
  group_by(country) %>%
  summarise(sd = sd(sii)) %>%
  View()

# Variability RII (edu)
dat_edu %>% 
  group_by(country) %>%
  summarise(sd = sd(rii)) %>%
  View()

# Correlation SII (entre SES)
m <- dat_ci %>%
  rename(sii_w = sii,
         rii_w = rii) %>%
  inner_join(readRDS("./_dat/dat_edu.rds") %>%
               select(REGNAME, country, cluster_number, sii, rii),
             by = c("REGNAME", "country", "cluster_number"))

library(ggpubr)

m %>%
  ggscatter(x = "sii_w", y = "sii",
          add = "reg.line",                         
          conf.int = TRUE,                          
          palette = "jco")+
  stat_cor(method = "spearman") 

formatC(3.4e-06, format = "f", digits = 10)

# Correlation RII  (entre SES)

m %>%
  ggscatter(x = "rii_w", y = "rii",
          add = "reg.line",                         
          conf.int = TRUE,                          
          palette = "jco")+
  stat_cor(method = "spearman") 

formatC(7.6e-11, format = "f", digits = 10)

# Correlation SII por country (entre SES)

corr <- m %>%
  group_by(country) %>%
  nest() %>%
  mutate(cor_sii = map(.x = data, .f = ~cor.test(.x$sii_w, .x$sii,
                                                 method = "spearman")$estimate),
         cor_rii = map(.x = data, .f = ~cor.test(.x$rii_w, .x$rii,
                                                 method = "spearman")$estimate)) %>%
  select(-data) %>%
  unnest()

# Correlation SII (entre vars)

m %>%
  ggscatter(x = "sii_w", y = "rii_w",
          add = "reg.line",                         
          conf.int = TRUE,                          
          palette = "jco")+
  stat_cor(method = "spearman") 

formatC(3.4e-06, format = "f", digits = 10)

# Correlation RII  (entre vars)

m %>%
  ggscatter(x = "sii", y = "rii",
          add = "reg.line",                         
          conf.int = TRUE,                          
          palette = "jco")+
  stat_cor(method = "spearman") 

formatC(7.6e-11, format = "f", digits = 10)

# Correlation SII por country (entre vars)

corr_scales <- m %>%
  group_by(country) %>%
  nest() %>%
  mutate(cor_w = map(.x = data, .f = ~cor.test(.x$sii_w, .x$rii_w,
                                                 method = "spearman")$estimate),
         cor_e = map(.x = data, .f = ~cor.test(.x$sii, .x$rii,
                                                 method = "spearman")$estimate)) %>%
  dplyr::select(-data) %>%
  unnest()