The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Attaching package: 'reshape'
The following object is masked from 'package:dplyr':
rename
Attaching package: 'MASS'
The following object is masked from 'package:dplyr':
select
Attaching package: 'tidyr'
The following objects are masked from 'package:reshape':
expand, smiths
Shape of mu_d_matrix: 12 56
Shape of mean_lambda: 10 2
Shape of mu_i: 10 56
Now we initialize, here’s an adaptvie way to initialize length and var:
Code
library(rTensor)library(MASS)# Warm initialization functionwarm_gp_initialization <-function(Y, mu_d_logit, g_i, n_topics) { N <-dim(Y)[1] D <-dim(Y)[2] T <-dim(Y)[3] P <-ncol(g_i) mu_i_mean <-matrix(colMeans(do.call(rbind, mu_d_logit)), nrow =1) deviations <-matrix(rnorm(N * T, mean =0, sd =0.0001), nrow = N) mu_i <-t(apply(deviations, 1, function(x) { mu_i_mean + x # No need for pmax here as we're working on logit scale }))# Convert Y to logit scale, with smoothing to avoid infinite values Y_smooth <- (Y * (N * D * T -1) +0.5) / (N * D * T) Y_logit <-log(Y_smooth / (1- Y_smooth))# Center Y_logit by subtracting mu_d_logit Y_centered <-array(0, dim =c(N, D, T))for (d in1:D) { Y_centered[, d, ] <- Y_logit[, d, ] -matrix(mu_d_logit[[d]], nrow = N, ncol = T, byrow =TRUE) }# Create a tensor object Y_tensor <-as.tensor(Y_centered)# Perform Tucker decomposition tucker_result <-tucker(Y_tensor, ranks =c(n_topics, D, T))# Initialize Lambda (N x K x T) Lambda <-array(0, dim =c(N, n_topics, T))for (k in1:n_topics) { Lambda[, k, ] <- tucker_result$U[[1]][, k] %o% tucker_result$U[[3]][, k] }# Initialize Phi (K x D x T) Phi <-array(0, dim =c(n_topics, D, T))for (k in1:n_topics) { Phi[k, , ] <- tucker_result$U[[2]][, k] %o% tucker_result$U[[3]][, k] }# Initialize Gamma using a conservative approach Gamma <-matrix(rnorm(n_topics * P, mean =0, sd =0.1), nrow = n_topics, ncol = P)# Fixed hyperparameters for length and variance scales length_scales_lambda <-rep(20, n_topics) var_scales_lambda <-rep(1, n_topics) length_scales_phi <-rep(20, n_topics * D) var_scales_phi <-rep(1, n_topics * D)return(list(Lambda = Lambda,Phi = Phi,Gamma = Gamma,mu_i = mu_i,length_scales_lambda = length_scales_lambda,var_scales_lambda = var_scales_lambda,length_scales_phi = length_scales_phi,var_scales_phi = var_scales_phi ))}
Code
# GP prior functionlog_gp_prior_vec <-function(x, mean, length_scale, var_scale) { T <-length(x) time_points <-1:T K <- var_scale *exp(-0.5*outer(time_points, time_points, "-")^2/ length_scale^2) K <- K +diag(1e-6, T)# Ensure mean is the same length as xif (length(mean) ==1) { mean <-rep(mean, T) } elseif (length(mean) != T) {stop(paste("Length of mean must be 1 or equal to length of x. Length of x:", T, "Length of mean:", length(mean))) } centered_x <- x - mean# Use more stable computation for log determinant log_det_K <-sum(log(eigen(K, symmetric =TRUE, only.values =TRUE)$values))# Use Cholesky decomposition for more stable matrix inversion L <-chol(K) quad_form <-sum(backsolve(L, centered_x, transpose =TRUE)^2)return(-0.5* (log_det_K + quad_form + T *log(2* base::pi)))}
# Define a custom print method for the progress objectknit_print.progress <-function(x, ...) { output <-sprintf("Iteration %d / %d (%.2f%%) completed\n", x$iteration, x$total_iterations, x$percent_complete) output <-paste0(output, sprintf("Time elapsed: %.2f minutes\n", x$elapsed_time)) output <-paste0(output, sprintf("Estimated time remaining: %.2f minutes\n\n", x$remaining_time)) knitr::asis_output(output)}
Code
# Load necessary librarieslibrary(rTensor)library(MASS)# Assuming you have your data (y, mu_d_logit, g_i) ready# Step 1: Set up parametersn_topics <-3# Number of topics (adjust as needed)n_iterations <-5000# Number of MCMC iterationsuse_mu_i <-TRUEuse_survival <-TRUEestimate_scales <-FALSE# We're using fixed scales# Fixed scalesfixed_length_scale <-15fixed_var_scale <-1# Step 2: Initialize the modelinitial_values <-warm_gp_initialization(y, mu_d_logit, g_i, n_topics)
library(coda)analyze_results <-function(mcmc_results, parameter) {# Extract the parameter samples samples <- mcmc_results[[parameter]]# Get dimensions dim_samples <-dim(samples) n_iterations <- dim_samples[1]if (length(dim_samples) ==4) { # For 3D parameters like Phi and Lambda n_topics <- dim_samples[2] n_diseases_or_individuals <- dim_samples[3] T <- dim_samples[4]# Choose a few representative slices to plot slices <-list(c(1, 1, T %/%2), # First topic, first disease/individual, middle time pointc(n_topics, n_diseases_or_individuals, T %/%2), # Last topic, last disease/individual, middle time pointc(n_topics %/%2, n_diseases_or_individuals %/%2, T) # Middle topic, middle disease/individual, last time point )for (slice in slices) { chain <-mcmc(samples[, slice[1], slice[2], slice[3]])# Plot traceplot(chain, main=paste(parameter, "- Topic", slice[1], "Disease/Individual", slice[2], "Time", slice[3]))# Print summaryprint(summary(chain))# Effective sample sizeprint(effectiveSize(chain)) } } elseif (length(dim_samples) ==3) { # For 2D parameters like Gamma n_topics <- dim_samples[2] n_columns <- dim_samples[3]# Choose a few representative slices to plot slices <-list(c(1, 1), # First topic, first columnc(n_topics, n_columns), # Last topic, last columnc(n_topics %/%2, n_columns %/%2) # Middle topic, middle column )for (slice in slices) { chain <-mcmc(samples[, slice[1], slice[2]])# Plot traceplot(chain, main=paste(parameter, "- Topic", slice[1], "Column", slice[2]))# Print summaryprint(summary(chain))# Effective sample sizeprint(effectiveSize(chain)) } } else { # For 1D parameters like length scales and var scales chain <-mcmc(samples)# Plot traceplot(chain, main=parameter)# Print summaryprint(summary(chain))# Effective sample sizeprint(effectiveSize(chain)) }# Calculate overall acceptance rates acceptance_rates <-mean(diff(as.vector(samples)) !=0)print(paste("Overall acceptance rate:", round(acceptance_rates, 3)))}# Usageanalyze_results(mcmc_results, "Phi")
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-3.421e-02 1.703e-03 2.408e-05 5.358e-04
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.03777 -0.03496 -0.03383 -0.03313 -0.03128
var1
10.0985
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
1.014e-02 5.199e-03 7.353e-05 2.095e-03
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
0.006098 0.007559 0.009092 0.010114 0.028899
var1
6.161467
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
6.065e-02 4.191e-03 5.928e-05 2.227e-03
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
0.05496 0.05782 0.05986 0.06177 0.07024
var1
3.542008
[1] "Overall acceptance rate: 0.441"
Code
analyze_results(mcmc_results, "Lambda")
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-4.494e-02 2.558e-03 3.617e-05 9.859e-04
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.05083 -0.04572 -0.04429 -0.04317 -0.04164
var1
6.730941
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-7.885e-03 4.562e-03 6.452e-05 1.539e-03
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-0.024232 -0.008404 -0.007075 -0.005689 -0.003354
var1
8.793405
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
8.519e-02 2.913e-03 4.119e-05 1.272e-03
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
0.07779 0.08363 0.08566 0.08722 0.08897
var1
5.244309
[1] "Overall acceptance rate: 0.441"
Code
analyze_results(mcmc_results, "Gamma")
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-0.304587 0.479222 0.006777 0.123175
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-1.22503 -0.65061 -0.29321 0.04967 0.63304
var1
15.13664
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-0.186976 0.498230 0.007046 0.120126
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-1.1841 -0.5214 -0.1647 0.1775 0.7788
var1
17.20226
Iterations = 1:5000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
-0.24699 0.46739 0.00661 0.10484
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
-1.0480 -0.5924 -0.2710 0.0593 0.6903
var1
19.87526
[1] "Overall acceptance rate: 0.501"