Chapter 3 Statistical Model

library(tidyverse)
library(sf)
dat <- d2 %>%
  dplyr::select(id_house, comm, id_study:viaje_ult_mes, main_act_ec:hour_sleep, SEROPOSITIVE, area, 
                age_cat, fever, TREATMENT) %>%
  st_set_geometry(NULL) %>%
  separate(hour_sleep, into = c("hh","mm","ss"), sep = ":") %>%
  mutate(viaje_ult_mes = ifelse(viaje_ult_mes=="9999", NA, viaje_ult_mes),
         viaje_ult_mes = factor(ifelse(viaje_ult_mes=="2", "1_yes", "0_no")),
         SEROPOSITIVE = ifelse(SEROPOSITIVE=="Positive",1,0),
         work_out = factor(ifelse(as.numeric(main_act_ec)>0 & as.numeric(main_act_ec)<6, "1_outside",
                                  "0_inside")),
         study = as.numeric(as.character(nm_level_study)),
         study = ifelse(study==9999,NA,study),
         study_cat = factor(ifelse(study<5,"0_primary_or_lower", "1_secondary_or_higher")),
         sleep_clean = as.numeric(hh),
         sleep_clean = ifelse(sleep_clean<18,NA,sleep_clean),
         sleep_cat = factor(ifelse(sleep_clean<19,"0_before7pm","1_after7pm")),
         tipo_casa = factor(tipo_casa, labels = c("no walls", "2-3 walls", "one room","full house"))
         ) %>% 
  filter(complete.cases(.))

#epiDisplay::tab1(dat$sleep_cat)

3.1 Multilevel model

library(tidyverse)
library(jtools)
library(lme4)

m1 <- glmer(SEROPOSITIVE ~ age_cat + nm_sex + viaje_ult_mes + fever + work_out + tipo_casa + 
              sleep_cat + study_cat + fumigacion + animales_casa + (1|comm/id_house), 
            data = dat, family = binomial)

summ(m1, exp = T, confint = T)
Observations 1612
Dependent variable SEROPOSITIVE
Type Mixed effects generalized linear model
Family binomial
Link logit
AIC 1754.28
BIC 1851.21
Pseudo-R² (fixed effects) 0.27
Pseudo-R² (total) 0.49
Fixed Effects
exp(Est.) 2.5% 97.5% z val. p
(Intercept) 0.22 0.05 0.98 -1.99 0.05
age_cat(5,15] 3.32 1.89 5.84 4.17 0.00
age_cat(15,30] 11.92 6.17 23.04 7.38 0.00
age_cat(30,50] 22.77 11.55 44.91 9.02 0.00
age_cat(50, Inf] 28.23 14.28 55.80 9.61 0.00
nm_sex1_male 1.50 1.14 1.96 2.94 0.00
viaje_ult_mes1_yes 1.29 0.87 1.93 1.26 0.21
fever1 0.71 0.25 2.01 -0.65 0.51
work_out1_outside 1.78 1.19 2.66 2.81 0.01
tipo_casa2-3 walls 0.65 0.23 1.78 -0.84 0.40
tipo_casaone room 0.91 0.32 2.59 -0.18 0.86
tipo_casafull house 0.72 0.25 2.03 -0.63 0.53
sleep_cat1_after7pm 0.62 0.27 1.42 -1.13 0.26
study_cat1_secondary_or_higher 0.67 0.49 0.92 -2.49 0.01
fumigacion 1.00 0.71 1.41 -0.01 0.99
animales_casa 0.83 0.58 1.20 -0.97 0.33
Random Effects
Group Parameter Std. Dev.
id_house:comm (Intercept) 0.71
comm (Intercept) 0.96
Grouping Variables
Group # groups ICC
id_house:comm 558 0.11
comm 10 0.20
m2 <- glmer(SEROPOSITIVE ~ age_cat + nm_sex + viaje_ult_mes + work_out + area + 
               study_cat + (1|comm/id_house), 
            data = dat, family = binomial)

summ(m2, exp = T, confint = T)
Observations 1612
Dependent variable SEROPOSITIVE
Type Mixed effects generalized linear model
Family binomial
Link logit
AIC 1744.67
BIC 1809.29
Pseudo-R² (fixed effects) 0.31
Pseudo-R² (total) 0.51
Fixed Effects
exp(Est.) 2.5% 97.5% z val. p
(Intercept) 0.05 0.02 0.17 -4.91 0.00
age_cat(5,15] 3.40 1.92 6.02 4.21 0.00
age_cat(15,30] 12.42 6.39 24.16 7.42 0.00
age_cat(30,50] 24.02 12.09 47.71 9.08 0.00
age_cat(50, Inf] 29.83 14.97 59.43 9.65 0.00
nm_sex1_male 1.52 1.16 1.99 3.01 0.00
viaje_ult_mes1_yes 1.31 0.87 1.96 1.29 0.20
work_out1_outside 1.77 1.18 2.66 2.75 0.01
area1_rural 2.73 0.77 9.71 1.55 0.12
study_cat1_secondary_or_higher 0.67 0.49 0.92 -2.48 0.01
Random Effects
Group Parameter Std. Dev.
id_house:comm (Intercept) 0.75
comm (Intercept) 0.89
Grouping Variables
Group # groups ICC
id_house:comm 558 0.12
comm 10 0.17

3.2 Model for rural areas (ICEMR)

dat_r <- dat %>%
  filter(area == "1_rural")

m_rural <- glmer(SEROPOSITIVE ~ age_cat + nm_sex + viaje_ult_mes + fever + work_out + tipo_casa +
                   sleep_cat + study_cat + fumigacion + animales_casa + (1|comm/id_house), 
                 data = dat_r, family = binomial)

summ(m_rural, exp = T, confint = T)
Observations 849
Dependent variable SEROPOSITIVE
Type Mixed effects generalized linear model
Family binomial
Link logit
AIC 855.70
BIC 941.10
Pseudo-R² (fixed effects) 0.33
Pseudo-R² (total) 0.55
Fixed Effects
exp(Est.) 2.5% 97.5% z val. p
(Intercept) 0.17 0.01 3.70 -1.13 0.26
age_cat(5,15] 2.94 1.53 5.67 3.23 0.00
age_cat(15,30] 12.35 5.21 29.24 5.71 0.00
age_cat(30,50] 18.92 7.52 47.63 6.24 0.00
age_cat(50, Inf] 27.16 10.52 70.13 6.82 0.00
nm_sex1_male 1.67 1.14 2.45 2.62 0.01
viaje_ult_mes1_yes 1.12 0.73 1.73 0.53 0.59
fever1 0.21 0.03 1.38 -1.62 0.10
work_out1_outside 2.50 1.41 4.43 3.15 0.00
tipo_casa2-3 walls 2.45 0.13 44.97 0.60 0.55
tipo_casaone room 1.30 0.08 21.08 0.18 0.86
tipo_casafull house 1.10 0.07 17.21 0.07 0.95
sleep_cat1_after7pm 0.75 0.29 1.94 -0.60 0.55
study_cat1_secondary_or_higher 0.88 0.52 1.49 -0.46 0.64
fumigacion 0.84 0.49 1.42 -0.66 0.51
animales_casa 0.62 0.35 1.11 -1.60 0.11
Random Effects
Group Parameter Std. Dev.
id_house:comm (Intercept) 0.62
comm (Intercept) 1.11
Grouping Variables
Group # groups ICC
id_house:comm 245 0.08
comm 7 0.25
m_rural_2 <- glmer(SEROPOSITIVE ~ age_cat + nm_sex + fever + work_out + tipo_casa +
                     animales_casa + (1|comm/id_house), 
                 data = dat_r, family = binomial)

summ(m_rural_2, exp = T, confint = T)
Observations 849
Dependent variable SEROPOSITIVE
Type Mixed effects generalized linear model
Family binomial
Link logit
AIC 848.84
BIC 915.26
Pseudo-R² (fixed effects) 0.33
Pseudo-R² (total) 0.56
Fixed Effects
exp(Est.) 2.5% 97.5% z val. p
(Intercept) 0.13 0.01 2.66 -1.32 0.19
age_cat(5,15] 2.85 1.48 5.46 3.15 0.00
age_cat(15,30] 11.91 5.15 27.54 5.79 0.00
age_cat(30,50] 18.36 7.39 45.61 6.27 0.00
age_cat(50, Inf] 26.41 10.24 68.15 6.77 0.00
nm_sex1_male 1.67 1.14 2.44 2.63 0.01
fever1 0.21 0.03 1.37 -1.63 0.10
work_out1_outside 2.56 1.45 4.52 3.25 0.00
tipo_casa2-3 walls 2.44 0.13 46.07 0.59 0.55
tipo_casaone room 1.27 0.08 21.29 0.17 0.87
tipo_casafull house 1.06 0.07 17.31 0.04 0.97
animales_casa 0.57 0.32 0.99 -1.99 0.05
Random Effects
Group Parameter Std. Dev.
id_house:comm (Intercept) 0.64
comm (Intercept) 1.15
Grouping Variables
Group # groups ICC
id_house:comm 245 0.08
comm 7 0.26

3.3 Model for peri-urban areas (CAM)

dat_p <- dat %>%
  filter(area == "0_periurban")

m_peri <- glmer(SEROPOSITIVE ~ age_cat + nm_sex + viaje_ult_mes + fever + work_out + tipo_casa + 
                  sleep_cat + study_cat + fumigacion + animales_casa + (1|comm/id_house), 
                data = dat_p, family = binomial)

summ(m_peri, exp = T, confint = T)
Observations 763
Dependent variable SEROPOSITIVE
Type Mixed effects generalized linear model
Family binomial
Link logit
AIC 897.72
BIC 981.19
Pseudo-R² (fixed effects) 0.25
Pseudo-R² (total) 0.35
Fixed Effects
exp(Est.) 2.5% 97.5% z val. p
(Intercept) 0.14 0.02 1.25 -1.76 0.08
age_cat(5,15] 4.05 1.25 13.15 2.33 0.02
age_cat(15,30] 12.23 3.56 42.02 3.98 0.00
age_cat(30,50] 27.12 7.89 93.16 5.24 0.00
age_cat(50, Inf] 32.64 9.53 111.81 5.55 0.00
nm_sex1_male 1.47 0.99 2.18 1.91 0.06
viaje_ult_mes1_yes 2.14 0.54 8.55 1.08 0.28
fever1 1.38 0.36 5.37 0.47 0.64
work_out1_outside 0.86 0.44 1.68 -0.43 0.66
tipo_casa2-3 walls 0.50 0.17 1.49 -1.24 0.21
tipo_casaone room 1.34 0.47 3.82 0.55 0.58
tipo_casafull house 0.51 0.18 1.47 -1.24 0.21
sleep_cat1_after7pm 0.38 0.07 2.02 -1.13 0.26
study_cat1_secondary_or_higher 0.65 0.43 0.98 -2.05 0.04
fumigacion 1.31 0.83 2.06 1.16 0.25
animales_casa 1.01 0.65 1.59 0.06 0.95
Random Effects
Group Parameter Std. Dev.
id_house:comm (Intercept) 0.71
comm (Intercept) 0.00
Grouping Variables
Group # groups ICC
id_house:comm 313 0.13
comm 3 0.00
m_peri_2 <- glmer(SEROPOSITIVE ~ age_cat + nm_sex + 
                   study_cat + (1|comm/id_house), 
                data = dat_p, family = binomial)

summ(m_peri_2, exp = T, confint = T)
Observations 763
Dependent variable SEROPOSITIVE
Type Mixed effects generalized linear model
Family binomial
Link logit
AIC 896.10
BIC 937.83
Pseudo-R² (fixed effects) 0.20
Pseudo-R² (total) 0.36
Fixed Effects
exp(Est.) 2.5% 97.5% z val. p
(Intercept) 0.06 0.02 0.19 -4.70 0.00
age_cat(5,15] 3.76 1.18 11.97 2.24 0.02
age_cat(15,30] 11.00 3.25 37.20 3.86 0.00
age_cat(30,50] 25.21 7.51 84.57 5.23 0.00
age_cat(50, Inf] 29.69 8.93 98.74 5.53 0.00
nm_sex1_male 1.44 0.99 2.09 1.92 0.06
study_cat1_secondary_or_higher 0.61 0.40 0.93 -2.31 0.02
Random Effects
Group Parameter Std. Dev.
id_house:comm (Intercept) 0.86
comm (Intercept) 0.31
Grouping Variables
Group # groups ICC
id_house:comm 313 0.18
comm 3 0.02

3.4 Summary

plot_summs(m1,m_rural,m_peri, model.names = c("full model", "rural only", "periurban only"))