library(Rcpp)
library(rTensor)
<- function(Y, mu_d_logit, g_i, n_topics) {
warm_gp_initialization <- dim(Y)[1]
N <- dim(Y)[2]
D <- dim(Y)[3]
T <- ncol(g_i)
P
# Convert Y to logit scale, with smoothing to avoid infinite values
<- (Y * (N * D * T - 1) + 0.5) / (N * D * T)
Y_smooth <- log(Y_smooth / (1 - Y_smooth))
Y_logit
# Center Y_logit by subtracting mu_d_logit
<- array(0, dim = c(N, D, T))
Y_centered for (d in 1:D) {
<- Y_logit[, d, ] - matrix(mu_d_logit[[d]], nrow = N, ncol = T, byrow = TRUE)
Y_centered[, d, ]
}
# Create a tensor object
<- as.tensor(Y_centered)
Y_tensor
# Perform Tucker decomposition
<- tucker(Y_tensor, ranks = c(n_topics, D, T))
tucker_result
# Initialize Lambda (N x K x T)
<- array(0, dim = c(N, n_topics, T))
Lambda for (k in 1:n_topics) {
<- tucker_result$U[[1]][, k] %o% tucker_result$U[[3]][, k]
Lambda[, k, ]
}
# Initialize Phi (K x D x T)
<- array(0, dim = c(n_topics, D, T))
Phi for (k in 1:n_topics) {
<- tucker_result$U[[2]][, k] %o% tucker_result$U[[3]][, k]
Phi[k, , ]
}
# Initialize Gamma using a conservative approach
<- matrix(rnorm(n_topics * P, mean = 0, sd = 0.1), nrow = n_topics, ncol = P)
Gamma
# Fixed hyperparameters for length and variance scales
<- rep(20, n_topics)
length_scales_lambda <- rep(1, n_topics)
var_scales_lambda <- rep(20, n_topics * D)
length_scales_phi <- rep(1, n_topics * D)
var_scales_phi
return(list(
Lambda = Lambda,
Phi = Phi,
Gamma = Gamma,
length_scales_lambda = length_scales_lambda,
var_scales_lambda = var_scales_lambda,
length_scales_phi = length_scales_phi,
var_scales_phi = var_scales_phi
))
}
cppFunction('
NumericVector fast_logistic(NumericVector x) {
int n = x.size();
NumericVector result(n);
for(int i = 0; i < n; ++i) {
result[i] = 1.0 / (1.0 + exp(-x[i]));
}
return result;
}
')
<- function(T, length_scale, var_scale) {
precompute_K_inv <- outer(1:T, 1:T, "-") ^ 2
time_diff_matrix <- var_scale * exp(-0.5 * time_diff_matrix / length_scale ^ 2)
K <- K + diag(1e-6, T)
K <- solve(K)
K_inv <- determinant(K, logarithm = TRUE)$modulus
log_det_K return(list(K_inv = K_inv, log_det_K = log_det_K))
}
<- function(y,
mcmc_sampler_optimized
mu_d_logit,
g_i,
n_iterations,
initial_values,
alpha_lambda,
beta_lambda,
alpha_sigma,
beta_sigma,
alpha_phi,
beta_phi,
alpha_sigma_phi,
beta_sigma_phi,
alpha_Gamma,
beta_Gamma) {<- initial_values
current_state <- dim(current_state$Lambda)[1]
n_individuals <- dim(current_state$Lambda)[2]
n_topics <- dim(current_state$Lambda)[3]
T <- dim(current_state$Phi)[2]
n_diseases
# Initialize adaptive proposal standard deviations
<- list(
adapt_sd Lambda = array(0.01, dim = dim(current_state$Lambda)),
Phi = array(0.01, dim = dim(current_state$Phi)),
Gamma = matrix(
0.01,
nrow = nrow(current_state$Gamma),
ncol = ncol(current_state$Gamma)
)
)
# Initialize storage for samples
<- list(
samples Lambda = array(0, dim = c(
dim(current_state$Lambda)
n_iterations,
)),Phi = array(0, dim = c(n_iterations, dim(
$Phi
current_state
))),Gamma = array(0, dim = c(
dim(current_state$Gamma)
n_iterations,
))
)
# Precompute constant matrices
# In your main function, before the MCMC loop:
<- lapply(1:n_topics, function(k)
K_inv_lambda precompute_K_inv(
T,$length_scales_lambda[k],
current_state$var_scales_lambda[k]
current_state
))<- lapply(1:(n_topics * n_diseases), function(idx)
K_inv_phi precompute_K_inv(
T,$length_scales_phi[idx],
current_state$var_scales_phi[idx]
current_state
))
for (iter in 1:n_iterations) {
# Update Lambda
<- current_state$Lambda + array(rnorm(prod(dim(
proposed_Lambda $Lambda
current_state0, adapt_sd$Lambda),
)), dim = dim(current_state$Lambda))
<- log_likelihood_vec(y, current_state$Lambda, current_state$Phi, mu_d_logit)
current_log_lik <- log_likelihood_vec(y, proposed_Lambda, current_state$Phi, mu_d_logit)
proposed_log_lik
<- g_i %*% t(current_state$Gamma)
lambda_mean <- sum(sapply(1:n_individuals, function(i) {
current_log_prior sapply(1:n_topics, function(k) {
log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean[i, k],$K_inv,
K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))
<- sum(sapply(1:n_individuals, function(i) {
proposed_log_prior sapply(1:n_topics, function(k) {
log_gp_prior_vec(
proposed_Lambda[i, k, ],
lambda_mean[i, k],$K_inv,
K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))
<- (proposed_log_lik + proposed_log_prior) - (current_log_lik + current_log_prior)
log_accept_ratio
if (log(runif(1)) < log_accept_ratio) {
$Lambda <- proposed_Lambda
current_state$Lambda <- adapt_sd$Lambda * 1.01
adapt_sdelse {
} $Lambda <- adapt_sd$Lambda * 0.99
adapt_sd
}
# Update Phi (similar structure to Lambda update)
<- current_state$Phi + array(rnorm(prod(dim(
proposed_Phi $Phi
current_state0, adapt_sd$Phi), dim = dim(current_state$Phi))
)),
<- log_likelihood_vec(y, current_state$Lambda, current_state$Phi, mu_d_logit)
current_log_lik <- log_likelihood_vec(y, current_state$Lambda, proposed_Phi, mu_d_logit)
proposed_log_lik
<- sum(sapply(1:n_topics, function(k) {
current_log_prior sapply(1:n_diseases, function(d) {
<- (k - 1) * n_diseases + d
idx log_gp_prior_vec(current_state$Phi[k, d, ],
0,
$K_inv,
K_inv_lambda[[k]]$log_det_K)
K_inv_lambda[[k]]
})
}))
<- sum(sapply(1:n_topics, function(k) {
proposed_log_prior sapply(1:n_diseases, function(d) {
<- (k - 1) * n_diseases + d
idx log_gp_prior_vec(proposed_Phi[k, d, ],
0,
$K_inv,
K_inv_lambda[[k]]$log_det_K)
K_inv_lambda[[k]]
})
}))
<- (proposed_log_lik + proposed_log_prior) - (current_log_lik + current_log_prior)
log_accept_ratio
if (log(runif(1)) < log_accept_ratio) {
$Phi <- proposed_Phi
current_state$Phi <- adapt_sd$Phi * 1.01
adapt_sdelse {
} $Phi <- adapt_sd$Phi * 0.99
adapt_sd
}
# Update Gamma (similar structure to original)
<- current_state$Gamma + matrix(rnorm(prod(dim(
proposed_Gamma $Gamma
current_state0, adapt_sd$Gamma),
)), nrow = nrow(current_state$Gamma))
<- sum(dnorm(
current_log_prior $Gamma,
current_state0,
sqrt(alpha_Gamma / beta_Gamma),
log = TRUE
))<- sum(dnorm(proposed_Gamma, 0, sqrt(alpha_Gamma / beta_Gamma), log = TRUE))
proposed_log_prior
<- g_i %*% t(current_state$Gamma)
lambda_mean_current <- g_i %*% t(proposed_Gamma)
lambda_mean_proposed
<- sum(sapply(1:n_individuals, function(i) {
current_log_likelihood sapply(1:n_topics, function(k) {
log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean_current[i, k],$K_inv,
K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))<- sum(sapply(1:n_individuals, function(i) {
proposed_log_likelihood sapply(1:n_topics, function(k) {
log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean_proposed[i, k],$K_inv,
K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))
<- (proposed_log_likelihood + proposed_log_prior) - (current_log_likelihood + current_log_prior)
log_accept_ratio
if (log(runif(1)) < log_accept_ratio) {
$Gamma <- proposed_Gamma
current_state$Gamma <- adapt_sd$Gamma * 1.01
adapt_sdelse {
} $Gamma <- adapt_sd$Gamma * 0.99
adapt_sd
}
# Store samples
$Lambda[iter, , , ] <- current_state$Lambda
samples$Phi[iter, , , ] <- current_state$Phi
samples$Gamma[iter, , ] <- current_state$Gamma
samples
if (iter %% 100 == 0)
cat("Iteration", iter, "\n")
}
return(samples)
}
# Updated log_likelihood_vec function
<- function(y, Lambda, Phi, mu_d_logit) {
log_likelihood_vec <- dim(Lambda)[1]
n_individuals <- dim(Lambda)[2]
n_topics <- dim(Lambda)[3]
T <- dim(Phi)[2]
n_diseases
<- array(0, dim = c(n_individuals, n_diseases, T))
logit_pi
for (k in 1:n_topics) {
for (t in 1:T) {
<- logit_pi[, , t] + Lambda[, k, t] %*% t(Phi[k, , t])
logit_pi[, , t]
}
}
for (d in 1:n_diseases) {
<- logit_pi[, d, ] + matrix(mu_d_logit[[d]],
logit_pi[, d, ] nrow = n_individuals,
ncol = T,
byrow = TRUE)
}
# Apply logistic function element-wise
<- array(fast_logistic(as.vector(logit_pi)), dim = dim(logit_pi))
pi
<- 0
log_lik for (i in 1:n_individuals) {
for (d in 1:n_diseases) {
<- which(cumsum(y[i, d, ]) == 0)
at_risk if (length(at_risk) > 0) {
<- max(at_risk) + 1
event_time if (event_time <= T) {
<- log_lik + log(pi[i, d, event_time])
log_lik
}<- log_lik + sum(log(1 - pi[i, d, at_risk]))
log_lik else {
} <- log_lik + log(pi[i, d, 1])
log_lik
}
}
}
return(log_lik)
}
<- function(x, mean, K_inv, log_det_K) {
log_gp_prior_vec <- length(x)
T <- x - mean
centered_x <- sum(centered_x * (K_inv %*% centered_x))
quad_form return(-0.5 * (log_det_K + quad_form + T * log(2 * pi)))
}
otpimized
Quarto
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see
You can add options to executable code like this
source("~/Dropbox (Personal)/bern_sim.R")
Attaching package: 'dplyr'
The following object is masked from 'package:MASS':
select
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Shape of mu_d_matrix: 12 56
Shape of mean_lambda: 10 2
Shape of mu_i: 10 56
<- warm_gp_initialization(y, mu_d_logit, g_i, n_topics) initial_values
|
| | 0%
|
|=== | 4%
|
|====== | 8%
|
|======================================================================| 100%
print(dim(y))
[1] 10 12 56
print(dim(initial_values$Lambda))
[1] 10 2 56
print(dim(initial_values$Phi))
[1] 2 12 56
print(length(mu_d_logit))
[1] 12
print(length(mu_d_logit[[1]]))
[1] 56
# Define hyperparameters
<- 2
alpha_lambda <- 0.1
beta_lambda <- 2
alpha_sigma <- 2
beta_sigma <- 2
alpha_phi <- 0.1
beta_phi <- 2
alpha_sigma_phi <- 2
beta_sigma_phi <- 2
alpha_Gamma <- 2
beta_Gamma
# Run MCMC
=proc.time()
start<- mcmc_sampler_optimized(y, mu_d_logit, g_i, n_iterations = 5000, initial_values,
mcmc_results
alpha_lambda, beta_lambda, alpha_sigma, beta_sigma,
alpha_phi, beta_phi, alpha_sigma_phi, beta_sigma_phi, alpha_Gamma, beta_Gamma)
Iteration 100
Iteration 200
Iteration 300
Iteration 400
Iteration 500
Iteration 600
Iteration 700
Iteration 800
Iteration 900
Iteration 1000
Iteration 1100
Iteration 1200
Iteration 1300
Iteration 1400
Iteration 1500
Iteration 1600
Iteration 1700
Iteration 1800
Iteration 1900
Iteration 2000
Iteration 2100
Iteration 2200
Iteration 2300
Iteration 2400
Iteration 2500
Iteration 2600
Iteration 2700
Iteration 2800
Iteration 2900
Iteration 3000
Iteration 3100
Iteration 3200
Iteration 3300
Iteration 3400
Iteration 3500
Iteration 3600
Iteration 3700
Iteration 3800
Iteration 3900
Iteration 4000
Iteration 4100
Iteration 4200
Iteration 4300
Iteration 4400
Iteration 4500
Iteration 4600
Iteration 4700
Iteration 4800
Iteration 4900
Iteration 5000
=proc.time()
stop
-start stop
user system elapsed
25.858 0.899 27.073
=readRDS("~/Dropbox (Personal)/UKB_topic_app/disease_array_incidence.rds")
a=na.omit(readRDS("~/Dropbox (Personal)/pheno_dir/prs_subset.rds"))
prs_subset=readRDS("~/Dropbox (Personal)/massivemudlist.rds")
mu_d_list=readRDS("~/Desktop/namesofphenos.rds")
namesnames(mu_d_list)=names$phenotype
colnames(a)=names$phenotype
=names(sapply(mu_d_list,sum)[order(sapply(mu_d_list,sum),decreasing = T)][1:20])
top
## make sure everything matches
=mu_d_list[top]
mu_d_l=sample(intersect(rownames(a[,1,]),rownames(prs_subset)),1000)
f=a[f,top,]
y2=prs_subset[f,-c(10,16,18,36,37)]
g_i
all.equal(colnames(y2),names(mu_d_l))
[1] TRUE
# Ensure dimensions are correct
<- length(f)
n_individuals <- length(mu_d_l)
n_diseases <- dim(y2)[3]
T <- 3 # You can adjust this as needed
n_topics <- ncol(g_i)
n_genetic_factors
# Ensure y2 is in the correct format (should be binary)
<- as.array(y2)
y =y
Y<- dim(Y)[1]
N <- dim(Y)[2]
D <- dim(Y)[3]
T <- ncol(g_i)
P
# Convert Y to logit scale, with smoothing to avoid infinite values
<- (Y * (N * D * T - 1) + 0.5) / (N * D * T)
Y_smooth <- log(Y_smooth / (1 - Y_smooth))
Y_logit =apply(Y_logit,c(2,3),mean)
m=list();for(i in 1:nrow(m)){
mu_d_list=m[i,]}
mu_d_list[[i]]
<- mu_d_list
mu_d_logit
# Ensure g_i is a matrix
<- as.matrix(g_i)
g_i
print(dim(y))
[1] 1000 20 51
print(dim(g_i))
[1] 1000 32
print(length(mu_d_logit))
[1] 20
<- warm_gp_initialization(y, mu_d_logit, g_i, n_topics) initial_values
|
| | 0%
|
|=== | 4%
|
|====== | 8%
|
|======================================================================| 100%
<- beta_lambda <- 1
alpha_lambda <- beta_sigma <- 1
alpha_sigma <- beta_phi <- 1
alpha_phi <- beta_sigma_phi <- 1
alpha_sigma_phi <- beta_Gamma <- 1
alpha_Gamma
# Run MCMC
=proc.time()
start<- mcmc_sampler_optimized(y, mu_d_logit, g_i, n_iterations = 5000, initial_values,
mcmc_results
alpha_lambda, beta_lambda, alpha_sigma, beta_sigma,
alpha_phi, beta_phi, alpha_sigma_phi, beta_sigma_phi, alpha_Gamma, beta_Gamma)
Iteration 100
Iteration 200
Iteration 300
Iteration 400
Iteration 500
Iteration 600
Iteration 700
Iteration 800
Iteration 900
Iteration 1000
Iteration 1100
Iteration 1200
Iteration 1300
Iteration 1400
Iteration 1500
Iteration 1600
Iteration 1700
Iteration 1800
Iteration 1900
Iteration 2000
Iteration 2100
Iteration 2200
Iteration 2300
Iteration 2400
Iteration 2500
Iteration 2600
Iteration 2700
Iteration 2800
Iteration 2900
Iteration 3000
Iteration 3100
Iteration 3200
Iteration 3300
Iteration 3400
Iteration 3500
Iteration 3600
Iteration 3700
Iteration 3800
Iteration 3900
Iteration 4000
Iteration 4100
Iteration 4200
Iteration 4300
Iteration 4400
Iteration 4500
Iteration 4600
Iteration 4700
Iteration 4800
Iteration 4900
Iteration 5000
=proc.time()
stop
-start stop
user system elapsed
3004.475 192.465 3207.969
saveRDS(mcmc_results,"~/Dropbox (Personal)/mcmc_results_827_fast.rds")
library(coda)
<- function(mcmc_results, parameter) {
analyze_results # Extract the parameter samples
<- mcmc_results[[parameter]]
samples
# Get dimensions
<- dim(samples)
dim_samples <- dim_samples[1]
n_iterations
if (length(dim_samples) == 4) { # For 3D parameters like Phi and Lambda
<- dim_samples[2]
n_topics <- dim_samples[3]
n_diseases_or_individuals <- dim_samples[4]
T
# Choose a few representative slices to plot
<- list(
slices c(1, 1, T %/% 2), # First topic, first disease/individual, middle time point
c(n_topics, n_diseases_or_individuals, T %/% 2), # Last topic, last disease/individual, middle time point
c(n_topics %/% 2, n_diseases_or_individuals %/% 2, T) # Middle topic, middle disease/individual, last time point
)
for (slice in slices) {
<- mcmc(samples[, slice[1], slice[2], slice[3]])
chain
# Plot trace
plot(chain, main=paste(parameter, "- Topic", slice[1], "Disease/Individual", slice[2], "Time", slice[3]))
# Print summary
print(summary(chain))
# Effective sample size
print(effectiveSize(chain))
}else if (length(dim_samples) == 3) { # For 2D parameters like Gamma
} <- dim_samples[2]
n_topics <- dim_samples[3]
n_columns
# Choose a few representative slices to plot
<- list(
slices c(1, 1), # First topic, first column
c(n_topics, n_columns), # Last topic, last column
c(n_topics %/% 2, n_columns %/% 2) # Middle topic, middle column
)
for (slice in slices) {
<- mcmc(samples[, slice[1], slice[2]])
chain
# Plot trace
plot(chain, main=paste(parameter, "- Topic", slice[1], "Column", slice[2]))
# Print summary
print(summary(chain))
# Effective sample size
print(effectiveSize(chain))
}else { # For 1D parameters like length scales and var scales
} <- mcmc(samples)
chain
# Plot trace
plot(chain, main=parameter)
# Print summary
print(summary(chain))
# Effective sample size
print(effectiveSize(chain))
}
# Calculate overall acceptance rates
<- mean(diff(as.vector(samples)) != 0)
acceptance_rates print(paste("Overall acceptance rate:", round(acceptance_rates, 3)))
}
# Usage
analyze_results(mcmc_results, "Phi")
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
0.0078556 0.0024820 0.0000351 0.0013699
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
0.0005296 0.0074772 0.0087052 0.0091564 0.0104729
var1
3.282832
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-6.064e-03 1.906e-03 2.696e-05 6.926e-04
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.008570 -0.007524 -0.006969 -0.004265 -0.002042
var1
7.576806
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-2.430e-03 2.085e-03 2.949e-05 9.388e-04
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.005029 -0.003687 -0.002847 -0.001695 0.002967
var1
4.934092
[1] "Overall acceptance rate: 0.446"
analyze_results(mcmc_results, "Lambda")
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-2.753e-04 1.439e-04 2.035e-06 1.096e-04
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-4.435e-04 -4.048e-04 -2.801e-04 -2.103e-04 -5.875e-06
var1
1.723856
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-2.516e-04 4.756e-05 6.726e-07 1.639e-05
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.0003860 -0.0002726 -0.0002396 -0.0002162 -0.0001992
var1
8.422534
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-4.144e-04 1.629e-04 2.304e-06 1.206e-04
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-6.115e-04 -5.740e-04 -4.003e-04 -3.549e-04 -7.879e-05
var1
1.825832
[1] "Overall acceptance rate: 0.423"
analyze_results(mcmc_results, "Gamma")
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
0.0139171 0.0235509 0.0003331 0.0067270
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.035195 -0.004133 0.015798 0.031889 0.054173
var1
12.25678
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
0.0237617 0.0372435 0.0005267 0.0170315
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.022648 -0.002359 0.011943 0.039964 0.107077
var1
4.781855
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-0.0053648 0.0260095 0.0003678 0.0098597
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.060991 -0.023716 -0.006285 0.009739 0.051094
var1
6.958786
[1] "Overall acceptance rate: 0.49"
if we want to update the vari scale:
# Updated log_gp_prior_vec function
<- function(x, mean, length_scale, var_scale, time_diff_matrix) {
log_gp_prior_vec <- length(x)
T <- var_scale * exp(-0.5 * time_diff_matrix / length_scale^2)
K <- K + diag(1e-6, T)
K
<- x - mean
centered_x
<- sum(log(eigen(K, symmetric = TRUE, only.values = TRUE)$values))
log_det_K <- chol(K)
L <- sum(backsolve(L, centered_x, transpose = TRUE)^2)
quad_form
return(-0.5 * (log_det_K + quad_form + T * log(2 * base::pi)))
}
<- function(y,
mcmc_sampler_optimized
mu_d_logit,
g_i,
n_iterations,
initial_values,
alpha_lambda,
beta_lambda,
alpha_sigma,
beta_sigma,
alpha_phi,
beta_phi,
alpha_sigma_phi,
beta_sigma_phi,
alpha_Gamma,
beta_Gamma) {<- initial_values
current_state <- dim(current_state$Lambda)[1]
n_individuals <- dim(current_state$Lambda)[2]
n_topics <- dim(current_state$Lambda)[3]
T <- dim(current_state$Phi)[2]
n_diseases
# Initialize adaptive proposal standard deviations
<- list(
adapt_sd Lambda = array(0.01, dim = dim(current_state$Lambda)),
Phi = array(0.01, dim = dim(current_state$Phi)),
Gamma = matrix(
0.01,
nrow = nrow(current_state$Gamma),
ncol = ncol(current_state$Gamma)
),var_scales_lambda = rep(0.1, n_topics),
var_scales_phi = rep(0.1, n_topics * n_diseases)
)
# Initialize storage for samples
<- list(
samples Lambda = array(0, dim = c(
dim(current_state$Lambda)
n_iterations,
)),Phi = array(0, dim = c(n_iterations, dim(
$Phi
current_state
))),Gamma = array(0, dim = c(
dim(current_state$Gamma)
n_iterations,
)),var_scales_lambda = matrix(0, nrow = n_iterations, ncol = n_topics),
var_scales_phi = matrix(0, nrow = n_iterations, ncol = n_topics * n_diseases)
)
# Precompute time difference matrix and exp terms
<- dim(current_state$Lambda)[3]
T <- outer(1:T, 1:T, "-") ^ 2
time_diff_matrix <- lapply(current_state$length_scales_lambda, function(l)
exp_term_lambda exp(-0.5 * time_diff_matrix / l ^ 2))
<- lapply(current_state$length_scales_phi, function(l)
exp_term_phi exp(-0.5 * time_diff_matrix / l ^ 2))
for (iter in 1:n_iterations) {
# Update Lambda
<- current_state$Lambda + array(rnorm(prod(dim(
proposed_Lambda $Lambda
current_state0, adapt_sd$Lambda),
)), dim = dim(current_state$Lambda))
<- log_likelihood_vec(y, current_state$Lambda, current_state$Phi, mu_d_logit)
current_log_lik <- log_likelihood_vec(y, proposed_Lambda, current_state$Phi, mu_d_logit)
proposed_log_lik
<- g_i %*% t(current_state$Gamma)
lambda_mean <- sum(sapply(1:n_individuals, function(i) {
current_log_prior sapply(1:n_topics, function(k) {
log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean[i, k],$K_inv, K_inv_lambda[[k]]$log_det_K)
K_inv_lambda[[k]])
})
}))
<- sum(sapply(1:n_individuals, function(i) {
proposed_log_prior sapply(1:n_topics, function(k) {
log_gp_prior_vec(
proposed_Lambda[i, k, ],
lambda_mean[i, k],$K_inv, K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))
<- (proposed_log_lik + proposed_log_prior) - (current_log_lik + current_log_prior)
log_accept_ratio
if (log(runif(1)) < log_accept_ratio) {
$Lambda <- proposed_Lambda
current_state$Lambda <- adapt_sd$Lambda * 1.01
adapt_sdelse {
} $Lambda <- adapt_sd$Lambda * 0.99
adapt_sd
}
# Update Phi (similar structure to Lambda update)
<- current_state$Phi + array(rnorm(prod(dim(
proposed_Phi $Phi
current_state0, adapt_sd$Phi), dim = dim(current_state$Phi))
)),
<- log_likelihood_vec(y, current_state$Lambda, current_state$Phi, mu_d_logit)
current_log_lik <- log_likelihood_vec(y, current_state$Lambda, proposed_Phi, mu_d_logit)
proposed_log_lik
<- sum(sapply(1:n_topics, function(k) {
current_log_prior sapply(1:n_diseases, function(d) {
<- (k - 1) * n_diseases + d
idx log_gp_prior_vec(
$Phi[k, d, ],
current_state0,
$length_scales_phi[idx],
current_state$var_scales_phi[idx],
current_state
time_diff_matrix
)
})
}))
<- sum(sapply(1:n_topics, function(k) {
proposed_log_prior sapply(1:n_diseases, function(d) {
<- (k - 1) * n_diseases + d
idx log_gp_prior_vec(
proposed_Phi[k, d, ],0,
$K_inv, K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))
<- (proposed_log_lik + proposed_log_prior) - (current_log_lik + current_log_prior)
log_accept_ratio
if (log(runif(1)) < log_accept_ratio) {
$Phi <- proposed_Phi
current_state$Phi <- adapt_sd$Phi * 1.01
adapt_sdelse {
} $Phi <- adapt_sd$Phi * 0.99
adapt_sd
}
# Update Gamma (similar structure to original)
<- current_state$Gamma + matrix(rnorm(prod(dim(
proposed_Gamma $Gamma
current_state0, adapt_sd$Gamma),
)), nrow = nrow(current_state$Gamma))
<- sum(dnorm(
current_log_prior $Gamma,
current_state0,
sqrt(alpha_Gamma / beta_Gamma),
log = TRUE
))<- sum(dnorm(proposed_Gamma, 0, sqrt(alpha_Gamma / beta_Gamma), log = TRUE))
proposed_log_prior
<- g_i %*% t(current_state$Gamma)
lambda_mean_current <- g_i %*% t(proposed_Gamma)
lambda_mean_proposed
<- sum(sapply(1:n_individuals, function(i) {
current_log_likelihood sapply(1:n_topics, function(k) {
log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean_current[i, k],$K_inv, K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))<- sum(sapply(1:n_individuals, function(i) {
proposed_log_likelihood sapply(1:n_topics, function(k) {
log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean_proposed[i, k],$K_inv, K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
})
}))
<- (proposed_log_likelihood + proposed_log_prior) - (current_log_likelihood + current_log_prior)
log_accept_ratio
if (log(runif(1)) < log_accept_ratio) {
$Gamma <- proposed_Gamma
current_state$Gamma <- adapt_sd$Gamma * 1.01
adapt_sdelse {
} $Gamma <- adapt_sd$Gamma * 0.99
adapt_sd
}
# Update var_scales_lambda
for (k in 1:n_topics) {
<- exp(
proposed_var_scale log(current_state$var_scales_lambda[k]) + rnorm(1, 0, adapt_sd$var_scales_lambda[k])
)
<- dgamma(
current_log_prior $var_scales_lambda[k],
current_stateshape = alpha_sigma,
rate = beta_sigma,
log = TRUE
)<- dgamma(
proposed_log_prior
proposed_var_scale,shape = alpha_sigma,
rate = beta_sigma,
log = TRUE
)
<- sum(sapply(1:n_individuals, function(i) {
current_log_likelihood log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean[i, k],$K_inv, K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
}))<- sum(sapply(1:n_individuals, function(i) {
proposed_log_likelihood log_gp_prior_vec(
$Lambda[i, k, ],
current_state
lambda_mean[i, k],$K_inv, K_inv_lambda[[k]]$log_det_K
K_inv_lambda[[k]]
)
}))
<- (proposed_log_likelihood + proposed_log_prior) -
log_accept_ratio + current_log_prior)
(current_log_likelihood
if (log(runif(1)) < log_accept_ratio) {
$var_scales_lambda[k] <- proposed_var_scale
current_state$var_scales_lambda[k] <- adapt_sd$var_scales_lambda[k] * 1.01
adapt_sdelse {
} $var_scales_lambda[k] <- adapt_sd$var_scales_lambda[k] * 0.99
adapt_sd
}
}
# Update var_scales_phi
for (idx in 1:(n_topics * n_diseases)) {
<- exp(log(current_state$var_scales_phi[idx]) + rnorm(1, 0, adapt_sd$var_scales_phi[idx]))
proposed_var_scale
<- dgamma(
current_log_prior $var_scales_phi[idx],
current_stateshape = alpha_sigma_phi,
rate = beta_sigma_phi,
log = TRUE
)<- dgamma(
proposed_log_prior
proposed_var_scale,shape = alpha_sigma_phi,
rate = beta_sigma_phi,
log = TRUE
)
<- (idx - 1) %/% n_diseases + 1
k <- (idx - 1) %% n_diseases + 1
d
<- log_gp_prior_vec(
current_log_likelihood $Phi[k, d, ],
current_state0,
$length_scales_phi[idx],
current_state$var_scales_phi[idx],
current_state
exp_term_phi[[idx]]
)<- log_gp_prior_vec(
proposed_log_likelihood $Phi[k, d, ],
current_state0,
$length_scales_phi[idx],
current_state
proposed_var_scale,
exp_term_phi[[idx]]
)
<- (proposed_log_likelihood + proposed_log_prior) -
log_accept_ratio + current_log_prior)
(current_log_likelihood
if (log(runif(1)) < log_accept_ratio) {
$var_scales_phi[idx] <- proposed_var_scale
current_state$var_scales_phi[idx] <- adapt_sd$var_scales_phi[idx] * 1.01
adapt_sdelse {
} $var_scales_phi[idx] <- adapt_sd$var_scales_phi[idx] * 0.99
adapt_sd
}
}
# Store samples
$Lambda[iter, , , ] <- current_state$Lambda
samples$Phi[iter, , , ] <- current_state$Phi
samples$Gamma[iter, , ] <- current_state$Gamma
samples$var_scales_lambda[iter, ] <- current_state$var_scales_lambda
samples$var_scales_phi[iter, ] <- current_state$var_scales_phi
samples
#
# # Store samples
# samples$Lambda[iter, , , ] <- current_state$Lambda
# samples$Phi[iter, , , ] <- current_state$Phi
# samples$Gamma[iter, , ] <- current_state$Gamma
if (iter %% 100 == 0)
cat("Iteration", iter, "\n")
}
return(samples)
}