mcmc_nononNeg

Author

Sarah Urbut

Published

September 1, 2024

Quarto

Code

library(MASS)
library(mvtnorm)
library(ggplot2)
library(pracma)
library(rTensor)


Attaching package: 'rTensor'

The following object is masked from 'package:pracma':

    fnorm

Code

# Function for Warm Start Using Simulated Data and Tucker Decomposition
warm_gp_initialization <- function(Y, g_i, n_topics, length_scales_lambda_sim, var_scales_lambda_sim, length_scales_phi_sim, var_scales_phi_sim) {
  N <- dim(Y)[1]  # Number of individuals
  D <- dim(Y)[2]  # Number of diseases
  T <- dim(Y)[3]  # Number of time points
  P <- ncol(g_i)  # Number of genetic covariates

  # Compute mu_d from Y as the exp(colMeans) at each time point
  mu_d <- array(0, dim = c(D, T))
  for (d in 1:D) {
    mu_d[d, ] <- exp(colMeans(Y[, d, ]))  # exp(colMeans of Y for each disease)
  }

  # Convert Y to logit scale, with smoothing to avoid infinite values
  Y_smooth <- (Y * (N * D * T - 1) + 0.5) / (N * D * T)
  Y_logit <- log(Y_smooth / (1 - Y_smooth))

  # Center Y_logit by subtracting mu_d (already log-transformed prevalence)
  Y_centered <- array(0, dim = c(N, D, T))
  for (d in 1:D) {
    Y_centered[, d, ] <- Y_logit[, d, ] - matrix(mu_d[d, ], nrow = N, ncol = T, byrow = TRUE)
  }

  # Perform Tucker decomposition on Y_centered
  Y_tensor <- as.tensor(Y_centered)
  tucker_result <- tucker(Y_tensor, ranks = c(n_topics, D, T))

  # Initialize Lambda (N x K x T)
  Lambda_init <- array(0, dim = c(N, n_topics, T))
  for (k in 1:n_topics) {
    Lambda_init[, k, ] <- tucker_result$U[[1]][, k] %o% tucker_result$U[[3]][, k]
  }

  # Initialize Phi (K x D x T)
  Phi_init <- array(0, dim = c(n_topics, D, T))
  for (k in 1:n_topics) {
    Phi_init[k, , ] <- tucker_result$U[[2]][, k] %o% tucker_result$U[[3]][, k]
  }

  # Initialize Gamma using a random normal prior
  Gamma_init <- matrix(rnorm(n_topics * P, mean = 0, sd = 0.1), nrow = n_topics, ncol = P)

  # Use the length scales and variance scales from the simulation for Lambda and Phi
  length_scales_lambda <- length_scales_lambda_sim
  var_scales_lambda <- var_scales_lambda_sim
  length_scales_phi <- length_scales_phi_sim
  var_scales_phi <- var_scales_phi_sim

  # Return the initialized values
  return(list(
    Lambda = Lambda_init,
    Phi = Phi_init,
    Gamma = Gamma_init,
    mu_d = mu_d,  # Return mu_d as well since it's used
    length_scales_lambda = length_scales_lambda,
    var_scales_lambda = var_scales_lambda,
    length_scales_phi = length_scales_phi,
    var_scales_phi = var_scales_phi
  ))
}

####
# Assuming length_scales_lambda_sim, var_scales_lambda_sim, etc., come from sim_noulli.R
source("sim_noulli.R")


Attaching package: 'Matrix'

The following objects are masked from 'package:pracma':

    expm, lu, tril, triu

Code

# Simulate data
length_scales_lambda_sim <- length_scales_lambda
var_scales_lambda_sim <- var_scales_lambda
length_scales_phi_sim <- length_scales_phi
var_scales_phi_sim <- var_scales_phi

# Call warm initialization function
initial_values <- warm_gp_initialization(y, g_i, n_topics = 3,
                                         length_scales_lambda_sim, var_scales_lambda_sim,
                                         length_scales_phi_sim, var_scales_phi_sim)


  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======================================================================| 100%

Code

###

# GP prior function for a single topic k
log_gp_prior_vec <- function(eta, mean, K_inv, log_det_K) {
  T <- length(eta)
  centered_eta <- eta - mean  # Center eta by the mean

  # Compute quadratic form for GP prior
  quad_form <- sum(centered_eta * (K_inv %*% centered_eta))

  # Calculate the log prior (Gaussian distribution with covariance matrix K)
  log_prior <- -0.5 * (log_det_K + quad_form + T * log(2 * pi))
  return(log_prior)
}

###

# Fast logistic function for numerical stability
fast_logistic <- function(x) {
  1 / (1 + exp(-x))
}

# Log-likelihood function for Y, Lambda, Phi, and mu_d
log_likelihood_vec <- function(y, Lambda, Phi, mu_d) {
  n_individuals <- dim(Lambda)[1]
  n_topics <- dim(Lambda)[2]
  T <- dim(Lambda)[3]
  n_diseases <- dim(Phi)[2]

  # Initialize logit probability array
  logit_pi <- array(0, dim = c(n_individuals, n_diseases, T))

  # Calculate logit(pi) based on Lambda, Phi, and mu_d
  for (k in 1:n_topics) {
    for (t in 1:T) {
      logit_pi[, , t] <- logit_pi[, , t] + Lambda[, k, t] %*% t(Phi[k, , t])
    }
  }

  # Add mu_d (logit prevalence) to the logit_pi values
  for (d in 1:n_diseases) {
    logit_pi[, d, ] <- logit_pi[, d, ] + matrix(mu_d[d, ], nrow = n_individuals, ncol = T, byrow = TRUE)
  }

  # Apply the logistic function to get probabilities (pi)
  pi <- array(fast_logistic(as.vector(logit_pi)), dim = dim(logit_pi))

  # Calculate log-likelihood
  log_lik <- 0
  for (i in 1:n_individuals) {
    for (d in 1:n_diseases) {
      # Identify time points where the disease hasn't been diagnosed yet (at risk)
      at_risk <- which(cumsum(y[i, d, ]) == 0)

      if (length(at_risk) > 0) {
        event_time <- max(at_risk) + 1  # The first diagnosed time point (if any)
        if (event_time <= T) {
          log_lik <- log_lik + log(pi[i, d, event_time])
        }
        log_lik <- log_lik + sum(log(1 - pi[i, d, at_risk]))
      } else {
        log_lik <- log_lik + log(pi[i, d, 1])  # If disease is diagnosed at time 1
      }
    }
  }

  return(log_lik)
}
##



precompute_K_inv <- function(T, length_scale, var_scale) {
  time_diff_matrix <- outer(1:T, 1:T, "-") ^ 2
  K <- var_scale * exp(-0.5 * time_diff_matrix / length_scale ^ 2)
  K <- K + diag(1e-6, T)
  K_inv <- solve(K)
  log_det_K <- determinant(K, logarithm = TRUE)$modulus
  return(list(K_inv = K_inv, log_det_K = log_det_K))
}

###
mcmc_sampler_softplus <- function(y, mu_d_logit, g_i, n_iterations, initial_values) {

  current_state <- initial_values
  n_individuals <- dim(current_state$Lambda)[1]
  n_topics <- dim(current_state$Lambda)[2]
  T <- dim(current_state$Lambda)[3]
  n_diseases <- dim(current_state$Phi)[2]

  # Initialize proposal standard deviations
  adapt_sd <- list(
    Lambda = array(0.01, dim = dim(current_state$Lambda)),
    Phi = array(0.01, dim = dim(current_state$Phi)),
    Gamma = matrix(0.01, nrow = nrow(current_state$Gamma), ncol = ncol(current_state$Gamma))
  )

  # Initialize storage for samples
  samples <- list(
    Lambda = array(0, dim = c(n_iterations, dim(current_state$Lambda))),
    Phi = array(0, dim = c(n_iterations, dim(current_state$Phi))),
    Gamma = array(0, dim = c(n_iterations, dim(current_state$Gamma)))
  )

  # Precompute inverse covariance matrices for Lambda and Phi
  K_inv_lambda <- lapply(1:n_topics, function(k)
    precompute_K_inv(T, current_state$length_scales_lambda[k], current_state$var_scales_lambda[k]))

  K_inv_phi <- lapply(1:n_topics, function(k)
    precompute_K_inv(T, current_state$length_scales_phi[k], current_state$var_scales_phi[k]))

  for (iter in 1:n_iterations) {
    # ------ Update Lambda ------
    proposed_Lambda <- current_state$Lambda +
      array(rnorm(prod(dim(current_state$Lambda)), 0, adapt_sd$Lambda),
            dim = dim(current_state$Lambda))

    # Likelihood calculation
    current_log_lik <- log_likelihood_vec(y, current_state$Lambda, current_state$Phi, mu_d_logit)
    proposed_log_lik <- log_likelihood_vec(y, proposed_Lambda, current_state$Phi, mu_d_logit)

    # Compute log prior for Lambda (GP prior)
    current_log_prior <- sum(sapply(1:n_individuals, function(i) {
      sapply(1:n_topics, function(k) {
        log_gp_prior_vec(
          current_state$Lambda[i, k, ],
          rep(g_i[i, ] %*% current_state$Gamma[k, ], T),
          K_inv_lambda[[k]]$K_inv,
          K_inv_lambda[[k]]$log_det_K
        )
      })
    }))

    proposed_log_prior <- sum(sapply(1:n_individuals, function(i) {
      sapply(1:n_topics, function(k) {
        log_gp_prior_vec(
          proposed_Lambda[i, k, ],
          rep(g_i[i, ] %*% current_state$Gamma[k, ], T),
          K_inv_lambda[[k]]$K_inv,
          K_inv_lambda[[k]]$log_det_K
        )
      })
    }))

    # Compute acceptance ratio for Lambda
    log_accept_ratio <- (proposed_log_lik + proposed_log_prior) - (current_log_lik + current_log_prior)

    if (log(runif(1)) < log_accept_ratio) {
      current_state$Lambda <- proposed_Lambda
      adapt_sd$Lambda <- adapt_sd$Lambda * 2
    } else {
      adapt_sd$Lambda <- adapt_sd$Lambda * 0.90
    }

    # ------ Update Phi (with log-prior for GP) ------
    proposed_Phi <- current_state$Phi +
      array(rnorm(prod(dim(current_state$Phi)), 0, adapt_sd$Phi), dim = dim(current_state$Phi))

    # Likelihood calculation for proposed Phi
    current_log_lik <- log_likelihood_vec(y, current_state$Lambda, current_state$Phi, mu_d_logit)
    proposed_log_lik <- log_likelihood_vec(y, current_state$Lambda, proposed_Phi, mu_d_logit)

    # Compute log prior for Phi (using GP prior)
    current_log_prior_phi <- sum(sapply(1:n_topics, function(k) {
      sapply(1:n_diseases, function(d) {
        log_gp_prior_vec(current_state$Phi[k, d, ], 0, K_inv_phi[[k]]$K_inv, K_inv_phi[[k]]$log_det_K)
      })
    }))

    proposed_log_prior_phi <- sum(sapply(1:n_topics, function(k) {
      sapply(1:n_diseases, function(d) {
        log_gp_prior_vec(proposed_Phi[k, d, ], 0, K_inv_phi[[k]]$K_inv, K_inv_phi[[k]]$log_det_K)
      })
    }))

    # Compute acceptance ratio for Phi
    log_accept_ratio <- (proposed_log_lik + proposed_log_prior_phi) - (current_log_lik + current_log_prior_phi)

    if (log(runif(1)) < log_accept_ratio) {
      current_state$Phi <- proposed_Phi
      adapt_sd$Phi <- adapt_sd$Phi * 2
    } else {
      adapt_sd$Phi <- adapt_sd$Phi * 0.90
    }

    # ------ Update Gamma ------
    proposed_Gamma <- current_state$Gamma +
      matrix(rnorm(prod(dim(current_state$Gamma)), 0, adapt_sd$Gamma), nrow = nrow(current_state$Gamma))

    current_log_prior <- sum(dnorm(current_state$Gamma, 0, 1, log = TRUE))
    proposed_log_prior <- sum(dnorm(proposed_Gamma, 0, 1, log = TRUE))

    log_accept_ratio <- proposed_log_prior - current_log_prior

    if (log(runif(1)) < log_accept_ratio) {
      current_state$Gamma <- proposed_Gamma
      adapt_sd$Gamma <- adapt_sd$Gamma * 2
    } else {
      adapt_sd$Gamma <- adapt_sd$Gamma * 0.90
    }

    # Store samples
    samples$Lambda[iter, , , ] <- current_state$Lambda
    samples$Phi[iter, , , ] <- current_state$Phi
    samples$Gamma[iter, , ] <- current_state$Gamma

    # Print progress
    if (iter %% 100 == 0) cat("Iteration", iter, "\n")
  }

  return(samples)
}


# Run the MCMC with softplus constraint
n_iterations <- 5000
samples <- mcmc_sampler_softplus(y, initial_values$mu_d, g_i, n_iterations, initial_values)

Iteration 100 
Iteration 200 
Iteration 300 
Iteration 400 
Iteration 500 
Iteration 600 
Iteration 700 
Iteration 800 
Iteration 900 
Iteration 1000 
Iteration 1100 
Iteration 1200 
Iteration 1300 
Iteration 1400 
Iteration 1500 
Iteration 1600 
Iteration 1700 
Iteration 1800 
Iteration 1900 
Iteration 2000 
Iteration 2100 
Iteration 2200 
Iteration 2300 
Iteration 2400 
Iteration 2500 
Iteration 2600 
Iteration 2700 
Iteration 2800 
Iteration 2900 
Iteration 3000 
Iteration 3100 
Iteration 3200 
Iteration 3300 
Iteration 3400 
Iteration 3500 
Iteration 3600 
Iteration 3700 
Iteration 3800 
Iteration 3900 
Iteration 4000 
Iteration 4100 
Iteration 4200 
Iteration 4300 
Iteration 4400 
Iteration 4500 
Iteration 4600 
Iteration 4700 
Iteration 4800 
Iteration 4900 
Iteration 5000

Code

library(ggplot2)

# Flatten the Lambda array for plotting (choose an individual i, topic k, and time point t)
lambda_samples <- samples$Lambda[, 1, 1, 1]  # Example: Lambda[1,1,1] over iterations

# Create a data frame for ggplot
lambda_df <- data.frame(iteration = 1:length(lambda_samples), value = lambda_samples)

# Plot trace of Lambda
ggplot(lambda_df, aes(x = iteration, y = value)) +
  geom_line() +
  labs(title = "Trace Plot for Lambda[1,1,1]", x = "Iteration", y = "Lambda value")

Code

phi_samples <- samples$Phi[, 1, 1, 1]  # Example: Phi[1,1,1] over iterations

phi_df <- data.frame(iteration = 1:length(phi_samples), value = phi_samples)

ggplot(phi_df, aes(x = iteration, y = value)) +
  geom_line() +
  labs(title = "Trace Plot for Phi[1,1,1]", x = "Iteration", y = "Phi value")

Code

# Autocorrelation plot for Lambda[1,1,1]
acf(lambda_samples, main = "Autocorrelation for Lambda[1,1,1]")


library(coda)

# Calculate ESS for Lambda[1,1,1]
ess_lambda <- effectiveSize(lambda_samples)
print(paste("Effective Sample Size for Lambda[1,1,1]:", ess_lambda))

[1] "Effective Sample Size for Lambda[1,1,1]: 1.0940118445441"

Code

# Extract the final sampled Lambda and Phi values (or the posterior mean)
final_Lambda <- samples$Lambda[n_iterations, , , ]
final_Phi <- samples$Phi[n_iterations, , , ]

# Initialize the logit probabilities array
logit_pi_hat <- array(0, dim = c(100, 5, T))

# Calculate logit(pi_hat) based on final Lambda and Phi
for (k in 1:3) {
  for (t in 1:T) {
    logit_pi_hat[, , t] <- logit_pi_hat[, , t] + final_Lambda[, k, t] %*% t(final_Phi[k, , t])
  }
}

n_diseases=5
# Add the baseline prevalence (mu_d) back to logit_pi_hat
for (d in 1:n_diseases) {
  logit_pi_hat[, d, ] <- logit_pi_hat[, d, ] + matrix(initial_values$mu_d[d, ], nrow = 100, ncol = T, byrow = TRUE)
}

# Transform logit(pi_hat) to probabilities using the logistic function
pi_hat <- 1 / (1 + exp(-logit_pi_hat))