MetaRE

Author

Sarah Urbut

Published

March 7, 2024

Meta Analysis

Now we want to make a meta analysis

Code

library(dplyr)
library(ggplot2)
library(ggsci)
library(tidyverse)
library(data.table)

UKB=readRDS("results_UKB.rds")
UKB$study="ukb"
MGB=readRDS("results_mgb.rds")
MGB$study="mgb"
AOU=readRDS("results_aou.rds")
AOU$study="aou"

AOU2=readRDS("~/Downloads/results_aou_mid.rds")
AOU2$Population="mid"
AOU2$study="aou"

SAUDI=readRDS("results_saudi.rds")
SAUDI$study="saudi"
SAUDI$Population="mid"

mideast=bind_rows(AOU2,SAUDI)

###BBJ
BBJ_ldl=fread("BBJLDLPRSandCAD.ldlprs.tsv")
BBJ_covs=fread("BBJLDLPRSandCAD.summary.tsv")
BBJ_ss=fread("BBJLDLPRSandCAD.sumstats.tsv")

### match org
BBJ=BBJ_ss
BBJ$Population="EAS"
BBJ=BBJ[,-c(1:3)]
BBJ$PRS_Mean=NA
BBJ$PRS_SD=NA
BBJ$Percent_male=(1-BBJ_covs$Female)
BBJ$LDL_Coef_CI_Lower=BBJ_ldl$beta-1.96*BBJ_ldl$se
BBJ$LDL_Coef_CI_Upper=BBJ_ldl$beta+1.96*BBJ_ldl$se
BBJ$Population_Scale_Factor=BBJ_ldl$prs_scale_factor
BBJ$LDL_Scaled_Coef_CI_Lower=NA
BBJ$LDL_Scaled_Coef_CI_Upper=NA
BBJ$HR=NA
BBJ$HR_CI_Lower=NA
BBJ$HR_CI_Upper=NA
BBJ$OR=BBJ_ss$OR
BBJ$OR_LCI=BBJ_ss$L95
BBJ$OR_UCI=BBJ_ss$U95
BBJ$study="bbj"

BBJ=BBJ


df_r=bind_rows(mideast,BBJ,UKB,MGB,AOU)

df_r$log_or <- log(df_r$OR)
df_r$log_or_lower <- log(df_r$OR_LCI)
df_r$log_or_upper <- log(df_r$OR_UCI)
df_r$se_log_or <- (df_r$log_or_upper - df_r$log_or_lower) / (2 * 1.96)  # 1.96 is the Z-value for 95% CI

df_r$Population[df_r$Population=="afr"]="AFR"
df_r$Population[df_r$Population=="amr"]="AMR"
df_r$Population[df_r$Population=="eas"]="EAS"
df_r$Population[df_r$Population=="eur"]="EUR"
df_r$Population[df_r$Population=="sas"]="SAS"
df_r$Population[df_r$Population=="mid"]="MID"
df_r$Population=factor(df_r$Population)
df_r$study=factor(df_r$study)

df_r=df_r%>%arrange(Population,study)
meta_data=df_r[,c("Population","study","OR","OR_LCI","OR_UCI","log_or","se_log_or","HR", "HR_CI_Lower","HR_CI_Upper")]
rownames(meta_data)=interaction(meta_data$study,meta_data$Population)

#meta_data[,-c(1,2)]=round(meta_data[,-c(1,2)],2)
#write.csv(meta_data[,-c(6,7)],quote = F,row.names = F,"meta_table.csv")

To calculate

τ^{2}

when we are working with summary stats:

$τ^{2}$ = $\frac{m a x (0, \sum (w t) \cdot (l o g_{o} r - p o o l e d_{l} o g_{o} r^{2}) - (k - 1) )}{\sum (w t)}$

Code

library(metafor)

Loading required package: Matrix


Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

Loading required package: metadat

Loading required package: numDeriv


Loading the 'metafor' package (version 4.6-0). For an
introduction to the package please type: help(metafor)

Code

library(dplyr)
meta_data$wt = 1 / (meta_data$se_log_or^2)
results <- meta_data %>%
  group_by(Population) %>%
  summarize(
    pooled_log_or = sum(wt * log_or) / sum(wt),  # Weighted sum of log odds ratios
    pooled_log_or_se = sqrt(1 / sum(wt)),        # Standard error of the pooled log OR
    #tau2 = max(0, sum(wt * (log_or - pooled_log_or)^2) - (sum(wt) - sum(wt^2) / sum(wt))),
    tau2 = max(0, (sum(wt * (log_or - pooled_log_or)^2) - (length(log_or) - 1)) / sum(wt)),
    #I2 = ifelse(tau2 > 0, (tau2 - pooled_log_or_se^2) / (tau2 + pooled_log_or_se^2) * 100, 0)
    I2 = max(0, (tau2 / (tau2 + pooled_log_or_se^2)) * 100)
  ) %>%
  mutate(
    pooled_OR = exp(pooled_log_or),
    pooled_OR_LCI = exp(pooled_log_or - 1.96 * pooled_log_or_se),
    pooled_OR_UCI = exp(pooled_log_or + 1.96 * pooled_log_or_se),
    I2_label = paste0("I² = ", round(I2, 2), "%")  # Create a label for I2
  ) %>%
  ungroup()

# Print the heterogeneity statistics
print(results[, c("Population", "I2")])

# A tibble: 6 × 2
  Population    I2
  <fct>      <dbl>
1 AFR         64.9
2 AMR          0  
3 EAS          0  
4 EUR         98.4
5 MID          0  
6 SAS         86.0

Code

# Create a prettier forest plot using ggplot2
ggplot(results, aes(x = pooled_OR, y = Population,color=Population)) +
  geom_point() +
  geom_errorbarh(aes(xmin =  pooled_OR_LCI, xmax = pooled_OR_UCI), height = 0.2) +
  labs(x = "Pooled Odds Ratio, FE", y = "Population") +
  scale_color_nejm()+
    geom_text(aes(label = I2_label, x = 2.5), hjust = 1, color = "blue") +  # Adjust 'x' for label position
  theme_classic() +
  theme(axis.text.y = element_text(size = 9), axis.title = element_text(size = 10))

REML vs FE

Let’s also try with metafor. The default here is a random effect model in which the weights are a combination of tau and inverse variance weighting.

Code

library(metafor)
library(dplyr)


df_r$log_or <- log(df_r$HR)
df_r$log_or_lower <- log(df_r$HR_CI_Lower)
df_r$log_or_upper <- log(df_r$HR_CI_Upper)
df_r$se_log_or <- (df_r$log_or_upper - df_r$log_or_lower) / (2 * 1.96)  # 1.96 is the Z-value for 95% CI

results_by_population <- meta_data %>%
  group_by(Population) %>%
  do({
    mod <- rma(yi = log_or, sei = se_log_or, data = ., method = "REML")
    data.frame(
      Population = unique(.$Population),
      Pooled_OR = exp(mod$b),
      Lower_CI = exp(mod$ci.lb),
      Upper_CI = exp(mod$ci.ub),
      Tau2 = mod$tau2,
      I2 = mod$I2
    )
  }) %>%
  ungroup()
  
knitr::kable(results_by_population, digits = 3, caption = "Meta-Analysis Results by Population")

Meta-Analysis Results by Population
Population	Pooled_OR	Lower_CI	Upper_CI	Tau2	I2
AFR	1.234	1.015	1.501	0.015	48.004
AMR	1.335	1.133	1.574	0.000	0.000
EAS	1.644	1.354	1.997	0.010	8.640
EUR	1.434	1.129	1.821	0.043	96.669
MID	1.688	1.315	2.167	0.000	0.000
SAS	2.932	0.710	12.101	1.294	85.936

Code

  # Adding annotations for I2 and tau2
results_by_population %>%
  mutate(I2_label = paste0("I² = ", round(I2, 2), "%")) %>%
  ggplot(aes(y = Population, x = Pooled_OR, xmin = Lower_CI, xmax = Upper_CI,color=Population)) +
  geom_point() +
  scale_color_nejm()+
  geom_errorbarh(height = 0.2) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") +
  geom_text(aes(label = I2_label,x=4), hjust = 1, color = "blue", size = 3) +
  
  xlab("Odds Ratio (95% CI), RE") +
  ylab("") +
  theme_classic()

We can also use metaFor for FE analysis: Here the estimated SE will inherently be smaller because we assume intra sutdy heterogeneity negligible.

Code

library(metafor)
library(dplyr)

results_by_population <- meta_data %>%
  group_by(Population) %>%
  do({
    mod <- rma(yi = log_or, sei = se_log_or, data = ., method = "FE")
    data.frame(
      Population = unique(.$Population),
      Pooled_OR = exp(mod$b),
      Lower_CI = exp(mod$ci.lb),
      Upper_CI = exp(mod$ci.ub),
      Tau2 = mod$tau2,
      I2 = mod$I2
    )
  }) %>%
  ungroup()
  
knitr::kable(results_by_population, digits = 3, caption = "Meta-Analysis Results by Population")

Meta-Analysis Results by Population
Population	Pooled_OR	Lower_CI	Upper_CI	I2
AFR	1.269	1.130	1.425	48.086
AMR	1.335	1.133	1.574	0.000
EAS	1.710	1.617	1.809	0.000
EUR	1.626	1.571	1.682	96.915
MID	1.688	1.315	2.167	0.000
SAS	1.995	1.496	2.661	75.418

Code

  # Adding annotations for I2 and tau2
results_by_population %>%
  mutate(I2_label = paste0("I² = ", round(I2, 2), "%")) %>%
  ggplot(aes(y = Population, x = Pooled_OR, xmin = Lower_CI, xmax = Upper_CI,color=Population)) +
  geom_point() +
  scale_color_nejm()+
  geom_errorbarh(height = 0.2) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") +
  geom_text(aes(label = I2_label,x=4), hjust = 1, color = "blue", size = 3) +
  
  xlab("Odds Ratio (95% CI), FE") +
  ylab("") +
  theme_classic()

And now pool all

Code

library(dplyr)

# Assuming your results are stored in a dataframe named 'results'
global_effect <- results %>%
  summarise(
    global_log_or = sum(pooled_log_or / (pooled_log_or_se^2)) / sum(1 / (pooled_log_or_se^2)),
    se_global_log_or = sqrt(1 / sum(1 / (pooled_log_or_se^2)))
  ) %>%
  mutate(
    global_or = exp(global_log_or),
    global_or_lci = exp(global_log_or - 1.96 * se_global_log_or),
    global_or_uci = exp(global_log_or + 1.96 * se_global_log_or)
  )

print(global_effect)

# A tibble: 1 × 5
  global_log_or se_global_log_or global_or global_or_lci global_or_uci
          <dbl>            <dbl>     <dbl>         <dbl>         <dbl>
1         0.481           0.0140      1.62          1.57          1.66

Code

library(ggplot2)

# Prepare data for plotting
plot_data <- results %>%
  select(Population, pooled_OR, pooled_OR_LCI, pooled_OR_UCI) %>%
  mutate(Type = "Population-Specific")

global_plot_data <- data.frame(
  Population = "Global",
  pooled_OR = global_effect$global_or,
  pooled_OR_LCI = global_effect$global_or_lci,
  pooled_OR_UCI = global_effect$global_or_uci,
  Type = "Global"
)

plot_data <- rbind(plot_data, global_plot_data)

# Create the plot
ggplot(plot_data, aes(y = Population, x = pooled_OR, xmin = pooled_OR_LCI, xmax = pooled_OR_UCI, color = Population)) +scale_color_nejm()+
  geom_pointrange(position = position_dodge(width = 0.25), size = 0.5) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "black") +
  scale_x_log10() + # Log scale for better visualization
  theme_minimal() +
  labs(x = "Odds Ratio (95% CI)", y = "", title = "Global and Population-Specific Effects") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right")

Code

Can we estiamte heterogeneity?

Code

data=results
# Calculate weights as the inverse of the variance
data$weight <- 1 / (data$pooled_log_or_se^2)

# Calculate the weighted mean of the effect sizes
weighted_mean_effect <- sum(data$weight * data$pooled_log_or) / sum(data$weight)

# Calculate the Q statistic
data$squared_differences <- data$weight * (data$pooled_log_or - weighted_mean_effect)^2
Q <- sum(data$squared_differences)

# Degrees of freedom for the Q statistic is k - 1
df <- nrow(data) - 1

# Calculate the p-value for the Q statistic under a chi-square distribution
p_value <- pchisq(Q, df, lower.tail = FALSE)

cat("Weighted Mean Effect Size:", weighted_mean_effect, "\n")

Weighted Mean Effect Size: 0.4810095

Code

cat("Q statistic:", Q, "\n")

Q statistic: 28.05673

Code

cat("Degrees of freedom:", df, "\n")

Degrees of freedom: 5

Code

cat("P-value for Q statistic:", p_value, "\n")

P-value for Q statistic: 3.5481e-05

Code

### omit EAS
data=results[results$Population%in%c("AMR","AFR"),]
data$weight <- 1 / (data$pooled_log_or_se^2)
# Calculate the weighted mean of the effect sizes
weighted_mean_effect <- sum(data$weight * data$pooled_log_or) / sum(data$weight)

data$squared_differences <- data$weight * (data$pooled_log_or - weighted_mean_effect)^2
Q <- sum(data$squared_differences)

# Degrees of freedom for the Q statistic is k - 1
df <- nrow(data) - 1

# Calculate the p-value for the Q statistic under a chi-square distribution
p_value <- pchisq(Q, df, lower.tail = FALSE)

cat("Weighted Mean Effect Size:", weighted_mean_effect, "\n")

Weighted Mean Effect Size: 0.255167

Code

cat("Q statistic:", Q, "\n")

Q statistic: 0.2469343

Code

cat("Degrees of freedom:", df, "\n")

Degrees of freedom: 1

Code

cat("P-value for Q statistic:", p_value, "\n")

P-value for Q statistic: 0.6192421

Code

#Bayes analysis:

#First conjugate normal approimation using weighted global mean as prior:
library(brms)
global_log_or_value=global_effect$global_log_or

se_global_log_or_value=global_effect$se_global_log_or

prior_mean=global_log_or_value
prior_variance=se_global_log_or_value^2

# Calculate variance and weights
results <- results %>%
  mutate(
    Variance = pooled_log_or_se^2,
    Weight = 1 / Variance
  )


# Perform Bayesian updating for each population
results <- results %>%
  mutate(
    Posterior_Variance = 1 / (1 / prior_variance + 1 / Variance),
    Posterior_Mean = Posterior_Variance * (prior_mean / prior_variance + pooled_log_or / Variance)
  )


print(results[,c("Population","pooled_log_or","pooled_log_or_se","Posterior_Mean","Posterior_Variance")])

Code

# And now with MCMC sampling

#To get an idea of heterogeneity

meta_model <- brm(
    bf(pooled_log_or | se(pooled_log_or_se) ~ 1 + (1 | Population)),
    data = results,
    family = gaussian(),
    prior = c(set_prior(sprintf("normal(%f, %f)", global_log_or_value, se_global_log_or_value), class = "Intercept"),
        prior(cauchy(0, 1), class = "sd")
    )
)


bayes_model=list()
for(pop in unique(results$Population)) {
  
  pop_data <- filter(results, Population == pop)
  
  bayes_model[[pop]] <- brm(
    bf(pooled_log_or ~ 1),
    data = pop_data,
    family = gaussian(),

      prior = c(set_prior(sprintf("normal(%f, %f)", global_log_or_value, se_global_log_or_value), class = "Intercept")),
     
    chains = 4, iter = 2000, warmup = 1000,silent = 2,
  )
  
  # Print or plot the results for each population as needed
}






meta_model <- brm(
  bf(pooled_log_or | se(pooled_log_or_se) ~ 1 + (1 | Population)),
  data = results,
  family = gaussian(),
  prior = c(
    prior(normal(0, 1), class = "Intercept"),
    prior(cauchy(0, 1), class = "sd")  # Prior for between-population variability
  )
)


library(dplyr)
library(tidyr)

posterior_summary <- lapply(seq(1:length(unique(results$Population))), function(x) {
  cbind(as.data.frame(summary(bayes_model[[x]])$fixed),
        Population = unique(results$Population)[x])
}) %>%
  bind_rows() %>%
  select(Population, Estimate = Estimate, CI.Lower = `l-95% CI`, CI.Upper = `u-95% CI`) %>%
  mutate(Type = "Posterior")

# Ensure Population is a factor for consistent ordering across plots
posterior_summary$Population <- factor(posterior_summary$Population, levels = unique(results$Population))



ps_df=posterior_summary[,c("Population","Estimate","CI.Lower","CI.Upper","Type")]
re_df=results[,c("Population","pooled_log_or","pooled_log_or_se")]
re_df$lower_ci_lo_or=re_df$pooled_log_or-1.96*re_df$pooled_log_or_se
re_df$upper_ci_lo_or=re_df$pooled_log_or+1.96*re_df$pooled_log_or_se

re_df=re_df[,c("Population","pooled_log_or","lower_ci_lo_or","upper_ci_lo_or")]
re_df$Type="Original"

colnames(ps_df)=colnames(re_df)

r=rbind(re_df,ps_df)

ggplot(r, aes(y = Population, x = exp(pooled_log_or), color = Population,shape=Type)) +
  geom_point(position = position_dodge(width = 0.25)) +
  geom_errorbar(aes(xmin = exp(lower_ci_lo_or), xmax = exp(upper_ci_lo_or)),
                width = 0.2, position = position_dodge(width = 0.25)) +
  theme_classic() +
  labs(x = "Bayesian Posterior on Pooled OR (95% CI)", y = "Population",shape="Estimate Type")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


### draw from population
posterior_distributions <- lapply(names(bayes_model), function(pop) {
    as_draws_df(bayes_model[[pop]], variable = "b_Intercept") %>%
        mutate(Population = pop)
}) %>% bind_rows()


ggplot(posterior_distributions, aes(x = exp(b_Intercept), fill = Population)) +
    geom_density(alpha = 0.7) +
    facet_wrap(~Population, scales = "free") +
    labs(x = "Posterior distribution of Intercept (OR)", y = "Density") +
    theme_classic()+scale_fill_nejm()

Fixed-Effect Model (FE)

Assumptions:

Homogeneity: Assumes that all studies are estimating the same underlying effect size. Any observed variability across study estimates is due to within-study sampling error.
Weighting: Studies are weighted by the inverse of their variance. Larger studies (lower variance) have more influence on the pooled estimate.

Mathematical Formulation:

Pooled Estimate: The weighted average of the effect sizes, where weights are wi=1/σi2wi=1/σi2 (σi2σi2 is the variance of the effect size estimate for study ii).
Confidence Interval: Based on the standard error of the pooled estimate, which is determined by the inverse of the square root of the sum of the weights.

Tau-squared (τ²) and I-squared (I²):

τ² (Tau-squared): Not applicable or assumed to be 0 because between-study variability is not modeled in a fixed-effect framework.
I² (I-squared): Although not directly calculated in a traditional FE model, it can be interpreted in the context of how much variability is observed versus what would be expected under the assumption of homogeneity.

Random-Effects Model (REML)

Assumptions:

Heterogeneity: Acknowledges that the true effect sizes may vary across studies due to differences in study populations, interventions, or outcomes.
Weighting: Studies are weighted by the inverse of the sum of their variance and an estimate of between-study variance (τ²). This accounts for both within-study variance and between-study variability.

Mathematical Formulation:

Pooled Estimate: Similar to the FE model but includes between-study variability in the weighting scheme. Weights become wi=1/(σi2+τ2)wi=1/(σi2+τ2).
Confidence Interval: Also considers the between-study variability in its calculation, which can lead to wider confidence intervals than the FE model.

Tau-squared (τ²) and I-squared (I²):

τ² (Tau-squared): Estimated from the data, reflecting the variance of the true effect sizes across studies.
I² (I-squared): Quantifies the proportion of total variation in study estimates due to heterogeneity rather than chance. Calculated using τ² and the within-study variances.

Why Results Differ:

Incorporation of Between-Study Variability: The key difference comes from how each model treats between-study variability. The REML model directly estimates and incorporates τ², leading to different weighting of studies compared to the FE model and potentially wider confidence intervals.
Estimation of Heterogeneity: I² values can differ because they’re calculated based on τ² in the REML model. In the FE model, any observed variability beyond sampling error is not explicitly modeled, so calculating I² in this context reflects how poorly the homogeneity assumption holds.
Pooled Effect Estimate: Differences in weighting schemes can lead to different pooled effect estimates. The REML model can give more weight to smaller studies when there’s significant between-study variability, potentially leading to a different overall effect size than the FE model.

Overall

We’d also like to plot a meta analysis by study:

Code

library(metafor)
library(meta)

Loading 'meta' package (version 7.0-0).
Type 'help(meta)' for a brief overview.
Readers of 'Meta-Analysis with R (Use R!)' should install
older version of 'meta' package: https://tinyurl.com/dt4y5drs

Code

study_data=df_r

study_data$identifier <- paste(study_data$study, study_data$Population, sep = "_")

# Perform the meta-analysis
meta_analysis <- metagen(
  TE = study_data$log_or,
  seTE = study_data$se_log_or,
  studlab = study_data$identifier,
  data = study_data,
  sm = "OR"  # Specify that the summary measure is Odds Ratio
)

# Summary of the meta-analysis
summary(meta_analysis)

              OR            95%-CI %W(common) %W(random)
aou_AFR   1.3351 [1.1718;  1.5212]        5.0       11.9
mgb_AFR   1.0268 [0.8316;  1.2679]        1.9        9.3
ukb_AFR   1.3705 [0.9646;  1.9471]        0.7        5.7
aou_AMR   1.3329 [1.0984;  1.6174]        2.3        9.8
mgb_AMR   1.3017 [1.0285;  1.6474]        1.5        8.5
ukb_AMR   3.3970 [0.8092; 14.2602]        0.0        0.5
aou_EAS   1.1541 [0.5587;  2.3840]        0.2        1.9
bbj_EAS       NA                          0.0        0.0
mgb_EAS   1.8358 [0.6448;  5.2271]        0.1        1.0
ukb_EAS   0.9218 [0.3597;  2.3619]        0.1        1.2
aou_EUR   1.4414 [1.3240;  1.5693]       11.8       13.2
mgb_EUR   1.1717 [1.0750;  1.2772]       11.5       13.2
ukb_EUR   1.6972 [1.6363;  1.7604]       63.6       14.1
aou_MID   1.5448 [0.3006;  7.9384]        0.0        0.4
saudi_MID     NA                          0.0        0.0
aou_SAS   1.0137 [0.3901;  2.6338]        0.1        1.2
mgb_SAS   5.1003 [1.3807; 18.8408]        0.0        0.6
ukb_SAS   1.8027 [1.3748;  2.3637]        1.2        7.5

Number of studies: k = 16

                         OR           95%-CI     z  p-value
Common effect model  1.5442 [1.4998; 1.5899] 29.20 < 0.0001
Random effects model 1.3776 [1.2369; 1.5343]  5.83 < 0.0001

Quantifying heterogeneity:
 tau^2 = 0.0210 [0.0051; 0.1630]; tau = 0.1451 [0.0712; 0.4037]
 I^2 = 85.0% [77.0%; 90.2%]; H = 2.58 [2.08; 3.19]

Test of heterogeneity:
     Q d.f.  p-value
 99.69   15 < 0.0001

Details on meta-analytical method:
- Inverse variance method
- Restricted maximum-likelihood estimator for tau^2
- Q-Profile method for confidence interval of tau^2 and tau

Code

# Forest plot to visualize the results
forest(meta_analysis)

We can also use a fixed and random analysis, but this is completely agnostic to shared populations. The random effects assumes each study:opulation effectively arises from it’s own distritbuion

Code

library(metafor)
fixed_effect_model <- rma(yi = log_or, sei = se_log_or, method = "FE", data = df_r)

Warning: 2 studies with NAs omitted from model fitting.

Code

summary(fixed_effect_model)


Fixed-Effects Model (k = 16)

  logLik  deviance       AIC       BIC      AICc   
-37.2595   99.6887   76.5191   77.2917   76.8048   

I^2 (total heterogeneity / total variability):   84.95%
H^2 (total variability / sampling variability):  6.65

Test for Heterogeneity:
Q(df = 15) = 99.6887, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  0.4345  0.0149  29.2007  <.0001  0.4054  0.4637  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Code

# Random-effects model
random_effects_model <- rma(yi = log_or, sei = se_log_or, method = "REML", data = df_r)

Warning: 2 studies with NAs omitted from model fitting.

Code

summary(random_effects_model)


Random-Effects Model (k = 16; tau^2 estimator: REML)

  logLik  deviance       AIC       BIC      AICc   
 -3.1063    6.2126   10.2126   11.6287   11.2126   

tau^2 (estimated amount of total heterogeneity): 0.0210 (SE = 0.0142)
tau (square root of estimated tau^2 value):      0.1451
I^2 (total heterogeneity / total variability):   78.13%
H^2 (total variability / sampling variability):  4.57

Test for Heterogeneity:
Q(df = 15) = 99.6887, p-val < .0001

Model Results:

estimate      se    zval    pval   ci.lb   ci.ub      
  0.3203  0.0550  5.8281  <.0001  0.2126  0.4281  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Code

# Forest plot to visualize the results
par(mfrow=c(1,2))  # Arrange plots side by side
forest(fixed_effect_model, main="Fixed Effect Model")
forest(random_effects_model, main="Random Effects Model")

Code

#Let's try with a nested Hierarchical structure:
library(metafor)

# Fitting the multi-level random effects model
multi_level_model <- rma.mv(yi = log_or, V = se_log_or^2, random = ~ 1 | study/Population, data = df_r, method = "REML")

# Display the summary of the model
summary(multi_level_model)

library(tidyverse)

random_effects <- ranef(multi_level_model)
# Convert the list to a data frame for plotting
# Note: Adjust the code below if the structure of your list differs

# For study level
df_study <- as.data.frame(random_effects$study) %>%
  rownames_to_column(var = "Study")

# For study/population level
df_population <- as.data.frame(random_effects$`study/Population`) %>%
  rownames_to_column(var = "StudyPopulation") %>%
  separate(StudyPopulation, into = c("Study", "Population"), sep = "/",remove = FALSE)

# Combine the data frames for a unified plot (optional)
df_combined <- bind_rows(
  df_study %>% mutate(Level = "Study"),
  df_population %>% mutate(Level = "Population")
)

ggplot(df_combined, aes(x = intrcpt, y = StudyPopulation, xmin = pi.lb, xmax = pi.ub, color = Level)) +
  geom_point() +
  geom_errorbar() +
  labs(x = "Random Effects Estimate (logOR)", y = "Study / Population", color = "Level") +
  theme_minimal() +
  theme(legend.position = "bottom")

Let’s run a Bayesian meta analysis:

Code


library(brms)
bayes_model <- brm(
bf(log_or ~ 0 + Population + (1|study), sigma ~ 0 + Population),
data = meta_data,
family = gaussian(),
prior = c(
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
chains = 4, iter = 8000, warmup = 2000,
control = list(adapt_delta = 0.99)
)

Code

# Meta-analysis for all populations (fixed effects)
res_all <- rma(yi = log_or, sei = se_log_or, data = data.frame(df_r), method = "FE")

# Meta-analysis for each ethnicity (fixed effects)
meta_analysis_population <- function(pop) {
  pop_data <- subset(data, Population == pop)
  rma(yi = log_or, sei = se_log_or, data = pop_data, method = "FE")
}

# Get results for each population
results <- lapply(unique(data$Population), meta_analysis_population)
names(results) <- unique(data$Population)

# Display summaries
cat("\nMeta-analysis for all populations:\n")
print(summary(res_all))

for (pop in names(results)) {
  cat("\nMeta-analysis for population:", pop, "\n")
  print(summary(results[[pop]]))
}

Code

# Function to transform log OR to OR for forest plot
transform_to_or <- function(x) {
  exp(x)
}

# Forest plot for all populations and individual studies in OR scale
forest(res_all, 
       transf = transform_to_or,
       #atransf = exp,
       xlab="Odds Ratio", 
       slab=paste(data$Population, data$study, sep=": "), 
       main="Forest Plot of All Studies and Pooled Effect (Fixed Effects)")

# Adding individual ethnicity pooled effects to the plot
for (pop in names(results)) {
  addpoly(results[[pop]], 
          transf = transform_to_or,
          row = 17 - which(names(results) == pop) * 0.5, 
          mlab = paste("Pooled Effect:", pop))
}

# Add an overall pooled effect
addpoly(res_all, 
        transf = transform_to_or,
        row = 0, 
        mlab = "Overall Pooled Effect")

Without SAS

Code

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(metafor)

# Create data frame excluding the problematic study (SAS:mgb)
data <- meta_data

# Add log(OR) and SE(log(OR)) to the data frame
data <- data %>%
  mutate(log_or = log(OR),
         se_log_or = (log(OR_UCI) - log(OR_LCI)) / (2 * 1.96))

# Meta-analysis for each ethnicity (fixed effects)
meta_analysis_population <- function(pop) {
  pop_data <- subset(data, Population == pop)
  res <- rma(yi = log_or, sei = se_log_or, data = pop_data, method = "FE")
  pooled_OR <- exp(coef(res))
  pooled_LCI <- exp(coef(res) - 1.96 * sqrt(vcov(res)))
  pooled_UCI <- exp(coef(res) + 1.96 * sqrt(vcov(res)))
  return(data.frame(Population = pop, study = "Pooled Effect", OR = pooled_OR, OR_LCI = pooled_LCI, OR_UCI = pooled_UCI))
}

# Apply meta-analysis for each population and combine results
pooled_results <- unique(data$Population) %>%
  lapply(meta_analysis_population) %>%
  bind_rows()

# Combine original data with pooled results
colnames(pooled_results)=c("Population","study","OR","OR_LCI","OR_UCI")
combined_data <- bind_rows(data, pooled_results)

# Perform global meta-analysis (fixed effects)
res_global <- rma(yi = log_or, sei = se_log_or, data = data, method = "FE")
global_OR <- exp(coef(res_global))
global_LCI <- exp(coef(res_global) - 1.96 * sqrt(vcov(res_global)))
global_UCI <- exp(coef(res_global) + 1.96 * sqrt(vcov(res_global)))
global_effect <- data.frame(Population = "Global", study = "Overall Pooled Effect", OR = global_OR, OR_LCI = global_LCI, OR_UCI = global_UCI)


colnames(global_effect)=c("Population","study","OR","OR_LCI","OR_UCI")

# Combine global effect with combined data
final_data <- bind_rows(combined_data, global_effect)

# Order data for plotting
final_data <- final_data %>%
  arrange(Population, desc(study == "Pooled Effect"))

# Display the final data
print(final_data)

             Population                 study        OR    OR_LCI    OR_UCI
intrcpt...1         AFR         Pooled Effect  1.269009 1.1300978  1.424994
aou.AFR             AFR                   aou  1.330000 1.1590580  1.523098
mgb.AFR             AFR                   mgb  0.990000 0.7516421  1.304601
ukb.AFR             AFR                   ukb  1.400000 0.9793945  2.020129
intrcpt...5         AMR         Pooled Effect  1.335442 1.1328604  1.574250
aou.AMR             AMR                   aou  1.340000 1.0966035  1.650002
mgb.AMR             AMR                   mgb  1.300000 0.9814195  1.725766
ukb.AMR             AMR                   ukb  2.430000 0.5411120 11.516530
intrcpt...9         EAS         Pooled Effect  1.710216 1.6167753  1.809057
aou.EAS             EAS                   aou  1.220000 0.5764083  2.588120
bbj.EAS             EAS                   bbj  1.716756 1.6225191  1.816642
mgb.EAS             EAS                   mgb  1.870000 0.6206754  6.188360
ukb.EAS             EAS                   ukb  0.900000 0.3414912  2.440573
intrcpt...14        EUR         Pooled Effect  1.625801 1.5714867  1.681993
aou.EUR             EUR                   aou  1.450000 1.3237854  1.589785
mgb.EUR             EUR                   mgb  1.150000 1.0409548  1.273449
ukb.EUR             EUR                   ukb  1.750000 1.6809276  1.818275
intrcpt...18     Global Overall Pooled Effect  1.617707 1.5738266  1.662810
intrcpt...19        MID         Pooled Effect  1.688036 1.3150236  2.166856
aou.MID             MID                   aou  1.600000 0.2934386  9.028988
saudi.MID           MID                 saudi  1.690000 1.3159595  2.180119
intrcpt...22        SAS         Pooled Effect  1.994821 1.4956704  2.660555
aou.SAS             SAS                   aou  1.170000 0.4213035  3.289799
mgb.SAS             SAS                   mgb 16.920000 3.6238440 85.004439
ukb.SAS             SAS                   ukb  1.930000 1.4216083  2.619395
                  log_or  se_log_or        HR HR_CI_Lower HR_CI_Upper
intrcpt...1           NA         NA        NA          NA          NA
aou.AFR       0.28517894 0.06967824 1.3351119   1.1718067    1.521176
mgb.AFR      -0.01005034 0.14066131 1.0268351   0.8315990    1.267907
ukb.AFR       0.33647224 0.18468938 1.3704630   0.9646008    1.947095
intrcpt...5           NA         NA        NA          NA          NA
aou.AMR       0.29266961 0.10422419 1.3329000   1.0984158    1.617441
mgb.AMR       0.26236426 0.14398624 1.3016570   1.0284707    1.647408
ukb.AMR       0.88789126 0.78007969 3.3969733   0.8091850   14.260555
intrcpt...9           NA         NA        NA          NA          NA
aou.EAS       0.19885086 0.38313032 1.1541269   0.5587327    2.383982
bbj.EAS       0.54043626 0.02882902        NA          NA          NA
mgb.EAS       0.62593843 0.58663704 1.8358386   0.6447584    5.227234
ukb.EAS      -0.10536052 0.50170057 0.9217767   0.3597363    2.361931
intrcpt...14          NA         NA        NA          NA          NA
aou.EUR       0.37156356 0.04671015 1.4414426   1.3240202    1.569279
mgb.EUR       0.13976194 0.05142613 1.1717108   1.0749651    1.277164
ukb.EUR       0.55961579 0.02003637 1.6971991   1.6362720    1.760395
intrcpt...18          NA         NA        NA          NA          NA
intrcpt...19          NA         NA        NA          NA          NA
aou.MID       0.47000363 0.87411408 1.5447768   0.3005972    7.938647
saudi.MID     0.52472853 0.12877896        NA          NA          NA
intrcpt...22          NA         NA        NA          NA          NA
aou.SAS       0.15700375 0.52429291 1.0136595   0.3901100    2.633887
mgb.SAS       2.82849635 0.80488983 5.1003172   1.3806525   18.841261
ukb.SAS       0.65752000 0.15590681 1.8026748   1.3748214    2.363679
                      wt
intrcpt...1           NA
aou.AFR       205.970807
mgb.AFR        50.541798
ukb.AFR        29.316774
intrcpt...5           NA
aou.AMR        92.058298
mgb.AMR        48.234525
ukb.AMR         1.643320
intrcpt...9           NA
aou.EAS         6.812507
bbj.EAS      1203.206359
mgb.EAS         2.905769
ukb.EAS         3.972929
intrcpt...14          NA
aou.EUR       458.329240
mgb.EUR       378.122284
ukb.EUR      2490.932910
intrcpt...18          NA
intrcpt...19          NA
aou.MID         1.308771
saudi.MID      60.299010
intrcpt...22          NA
aou.SAS         3.637911
mgb.SAS         1.543573
ukb.SAS        41.140523

Code

forest_plot <- ggplot(final_data, aes(x = OR, y = paste(Population,study))) +
  geom_point() +
  geom_errorbarh(aes(xmin = OR_LCI, xmax = OR_UCI), height = 0.2) +
  scale_x_continuous(trans = 'log10', breaks = scales::log_breaks(base = 10)) +
  labs(x = "Odds Ratio", y = "Study", title = "Forest Plot of All Studies and Pooled Effect (Fixed Effects)") +
  geom_vline(xintercept = 1, linetype = "dashed") +
  theme_minimal()

print(forest_plot)

Code

#df_sum=df_r[,c("Population","study","log_or","se_log_or")]

#saveRDS(df_sum,file = "df_summary_forbayes.rds")