Chapter 4 Analysis Education
4.1 Processing - Education
rm(list = ls())
library(IC2)
library(codebook)
library(tidyverse)
dat <- read.csv("./_dat/dat_mal_ineq_v3.csv")
source("./mal_ineq_fun.R")
# epiDisplay::tab1(dat$rapid_test)
# epiDisplay::tab1(dat$mothers_highest_educational_level)
# epiDisplay::tab1(dat$education)
# epiDisplay::tab1(dat$edu)
#codebook_browser(dat)
# ==== NOT RUN =======
# ao <- dat %>%
# filter(country == "SN") %>%
# dplyr::select(rapid_test, edu, w) %>%
# filter(complete.cases(.))
#
# ao_ci <- IC2::calcSConc(x = ao$rapid_test,
# y = ao$edu,
# w = ao$w)
# ==== NOT RUN =======
dat.n <- dat %>%
group_by(country, REGNAME, cluster_number) %>%
nest() %>%
mutate(sample.size.N = map(.x = data, .f = ~dplyr::select(.x, rapid_test, edu, w) %>%
count(name = "N") %>% unlist()),
sample.size.m = map(.x = data, .f = ~dplyr::select(.x, rapid_test, edu, w) %>%
filter(!is.na(rapid_test)) %>%
count(name = "N_m") %>% unlist()),
dat_s = map(.x = data, .f = ~dplyr::select(.x, rapid_test, edu, w) %>%
filter(complete.cases(.))),
sample.size = map(.x = dat_s, .f = ~count(.x) %>% unlist()),
prev = map(.x = dat_s, .f = ~mean(.x$rapid_test, na.rm=T)),
edu_cats = map(.x = dat_s, .f = ~distinct(.x, edu) %>% nrow())) %>%
# FILTERS =======================
filter(prev > 0 & prev < 1) %>%
filter(sample.size>=10) %>%
filter(edu_cats>1)
dat_edu.n <- dat.n %>%
mutate(ci = map(.x = dat_s, .f = ~IC2::calcSConc(x = .x$rapid_test,
y = .x$edu,
w = .x$w)),
ci_val = map(.x = ci, .f = ~.x$ineq$index),
h_calc = map(.x = dat_s, .f = ~h_ineq(dat = .x, var_soc = edu, var_outcome = rapid_test))
)
dat_edu <- dat_edu.n %>%
dplyr::select(country, cluster_number, sample.size.N, sample.size.m, sample.size, prev, edu_cats, ci_val, h_calc) %>%
unnest() %>%
ungroup()
# Checks ========
dat_edu %>%
ggplot(aes(x=c_index, y = ci_val)) +
geom_point(alpha = .4) +
labs(x = "hand calculation", y = "package calculation")
# ==== NOT RUN =======
# unique(dat.n$country)
# unique(dat_ci$country)
#
# range(dat$w)
# range(dat_ci$N_m)
# range(dat_ci$n)
# range(dat_ci$ci_val, na.rm = T)
#
# hist(dat_ci$N_m)
# hist(dat_ci$n)
# hist(dat_ci$ci_val)
# ==== NOT RUN =======
#saveRDS(dat_edu, "./_dat/dat_edu.rds")
4.2 Spatial data - Education
library(sf)
library(leaflet)
library(stringr)
dat_edu_map <- dat_edu %>%
inner_join(read.csv("./_dat/dat_gps_flat.csv"), by = c("country", "cluster_number")) %>%
st_as_sf(coords = c("LONGNUM", "LATNUM"), crs = 4326) %>%
dplyr::mutate(lat = sf::st_coordinates(.)[,2],
long = sf::st_coordinates(.)[,1])
#saveRDS(dat_edu_map, "./_dat/dat_edu_map_v2.rds")
dat_adm <- dat_edu %>%
group_by(country, REGNAME) %>%
summarise(N = sum(sample.size.N, na.rm = T),
N_m = sum(sample.size.m, na.rm = T),
n = sum(sample.size, na.rm = T),
prev = median(prev, na.rm = T),
ci_val = median(ci_val, na.rm = T),
sii = median(sii, na.rm = T),
rii = median(rii, na.rm = T))
dat_edu_adm <- st_read("./_dat/SHP/union/DHS_adm.shp") %>%
inner_join(dat_adm, by = c("country", "REGNAME"))
## Reading layer `DHS_adm' from data source `/Users/gabrielcarrasco/Dropbox/Work/Tarik LAB/Malaria Ineq/mal_ineq/_dat/SHP/union/DHS_adm.shp' using driver `ESRI Shapefile'
## Simple feature collection with 147 features and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -13.30198 ymin: -26.86819 xmax: 50.49459 ymax: 15.7047
## geographic CRS: WGS 84
4.3 CI - Education
4.3.1 Plots CI -Education
library(cowplot)
ci_box_10 <- dat_edu_map %>% filter(sample.size>=10) %>% ci_box(var = ci_val)
ci_box_20 <- dat_edu_map %>% filter(sample.size>=20) %>% ci_box(var = ci_val)
ci_box_30 <- dat_edu_map %>% filter(sample.size>=30) %>% ci_box(var = ci_val)
(ci_box1 <- plot_grid(ci_box_10, ci_box_20, ci_box_30, labels = c("A)", "B)", "C)"), ncol = 1))
ci_prev_10 <- dat_edu_map %>% filter(sample.size>=10) %>% ci_prev(var = ci_val)
ci_prev_20 <- dat_edu_map %>% filter(sample.size>=20) %>% ci_prev(var = ci_val)
ci_prev_30 <- dat_edu_map %>% filter(sample.size>=30) %>% ci_prev(var = ci_val)
(ci_prev1 <- plot_grid(ci_prev_10, ci_prev_20, ci_prev_30, labels = c("A)", "B)", "C)"), ncol = 1))