# Chapter 4 Simulation of Disease Loadings

For the disease loadings, we use different mean functions to simulate the progression of disease probabilities over time. We assume that diseases tend to be topic-specific and heavily loaded on a few topics.

cov_function <- function(t1, t2, lengthscale, variance) {
exp(-0.5 * (t1 - t2)^2 / lengthscale^2) * variance
}

# Store disease-specific covariances
Sigma <- array(dim = c(K, V, T, T)) # K topics, V diseases, T x T covariance matrix each

eta <- array(dim = c(K, V, T))

# Bias factor (adjust as needed)

# Generate covariances (adjust as needed)
for (k in 1:K) {
for (v in 1:V) {
# Sample a single lengthscale and variance for each topic-disease pair
cov_matrix_kv <- matrix(0, nrow = T, ncol = T)
#lengthscale_kv <- runif(1,min=5,max=20) ## choose random lengthscale for each topic and disease
#variance_kv <- runif(1,0.1,0.5)## choose random variance for each topic and disease
variance_kv=0.01 ## smaller variance means it changes less from mu funciotn

lengthscale_kv=10
for (i in 1:T) {
for (j in 1:T) {
cov_matrix_kv[i, j] <- cov_function(i, j, lengthscale_kv, variance_kv)
}
}

Sigma[k, v, , ] <- cov_matrix_kv
}
}

## 4.1 Sample from the Gaussian Process

The probability of a disease within a topic over time has some covariance structure. We also might imagine that the disease probabilities also have some shape over time:

linear_trend <- function(time,
slope = 0.1,
intercept = 0) {
return(slope * time + intercept)
}

logistic_growth <-
function(t, carrying_capacity, growth_rate, t_mid) {
carrying_capacity / (1 + exp(-growth_rate * (t - t_mid)))
}

exponential_decay <- function(t, initial_value, decay_rate) {
initial_value * exp(-decay_rate * t)
}

gaussian_peak <- function(t, mean, variance) {
exp(-(t - mean) ^ 2 / (2 * variance))
}

polynomial_trend <- function(t, coefficients) {
result <- sapply(t, function(time_point) {
sum(sapply(1:length(coefficients), function(i) coefficients[i] * time_point^(i - 1)))
})
return(result)
}

sinusoidal_pattern <- function(t, amplitude, period, phase) {
amplitude * sin((2 * pi / period) * t + phase)
}

cov_function <- function(t1, t2, lengthscale, variance) {
return(variance * exp(-0.5 * (t1 - t2) ^ 2 / lengthscale ^ 2))
}

We might wish that each disease within a topic ihas it’s unique mean function:

# Modify eta sampling to use the correct covariance
# Covariance matrix Sigma (you should define this according to your model specifics)
# Parameters

D <- 20  # Number of individuals
K <- 5   # Number of topics
T <- 100  # Number of time points
time_points <- 1:T

# Initialize the array
topic_word_mean <- array(NA, dim = c(K, V))

mean_functions <- list(
linear_trend = linear_trend,
logistic_growth = logistic_growth,
#exponential_decay = exponential_decay,
gaussian_peak = gaussian_peak
#polynomial_trend = polynomial_trend
#sinusoidal_pattern = sinusoidal_pattern
)

# Define a list of names for the mean functions
mean_function_names <- names(mean_functions)

# Simulate mean functions for each disease
mu_vectors <- list()
mean_functions_called = list()

for (k in 1:K) {
mu_vectors[[k]] <- list()
mean_functions_called[[k]] = list()
for (v in 1:V) {
# Choose a mean function randomly
# Choose a mean function by name randomly
chosen_function_name <- sample(mean_function_names, 1)
# Retrieve the function from the list using the name
mean_function <- mean_functions[[chosen_function_name]]
# Sample parameter ranges
params <- list(
slope = runif(1, -0.05, 0.05),
intercept = runif(1, -1, 1),
carrying_capacity = runif(1, 0.02, 0.05),
growth_rate = runif(1, 0.01, 0.05),
t_mid = sample(time_points, 1),
mean = sample(time_points, 1),
variance = runif(1, 10, 50),
coefficients = runif(3, -0.1, 0.1)
)

# Use the appropriate parameters based on the chosen mean function
if (chosen_function_name  == "logistic_growth") {
function_specific_params <- list(
t = time_points,
carrying_capacity = params$carrying_capacity, growth_rate = params$growth_rate,
t_mid = params$t_mid ) } else if (chosen_function_name == "linear_trend") { function_specific_params <- list( t = time_points, slope = params$slope,
intercept = params$intercept ) } else if (chosen_function_name == "exponential_decay") { function_specific_params <- list( t = time_points, initial_value = params$initial_value,
decay_rate = params$decay_rate ) } else if (chosen_function_name == "gaussian_peak") { function_specific_params <- list(t = time_points, mean = params$mean,
variance = params$variance) } else if (chosen_function_name == "polynomial_trend") { function_specific_params <- list(t = time_points, coefficients = params$coefficients)
} else if (chosen_function_name  == "sinusoidal_pattern") {
function_specific_params = list(
t = time_points,
amplitude = params$amplitude, period = params$period,
phase = params\$phase
)
}

# Generate the time series for the mean function using do.call
mu_vectors[[k]][[v]] <-
do.call(mean_function, function_specific_params)
mean_functions_called[[k]][[v]] = list("function_name" = chosen_function_name, "params" = function_specific_params)

eta[k, v, ] <-
mvrnorm(1, mu = mu_vectors[[k]][[v]], Sigma = Sigma[k, v, , ])
}
}

rho = matrix(runif(K*D,min = 0.5,max = 2),nrow=D,ncol=K)
#rho=matrix(1,nrow=D,ncol=K)
# Normalize using logistic function and a scaling factor
adjusted_logistic_function <- function(x, scale) {
return(scale / (1 + exp(-x)))
}

inv_time_warping <- function(t, rho, T) {
return(min(T, max(1, floor((t / T) ^ (1 / rho) * T))))
}

# Initialize inverse warped time array
inv_warped_t_array <- array(0, dim = c(D, K, T))

for (d in 1:D) {
for (k in 1:K) {
for (t in 1:T) {
inv_warped_t_array[d, k, t] <- inv_time_warping(t, rho[d, k], T)
}
}
}

## 4.2 Topic specificity

We simiulate so that disease tend to be somewhat topic specific, that is, heavilty loaded on a few topics

# Determine the number of topics in which each disease will be primarily active
#num_topics_per_disease <- sample(1:2, V, replace = TRUE)  # Each disease is active in 1 or 2 topics
num_topics_per_disease = rep(1,V)
# Initialize the list to store the primary topics for each disease
disease_primary_topics <- vector("list", length = V)

# Initialize the disease-topic matrix with zeros indicating inactivity
disease_topic_matrix <- matrix(0, nrow = V, ncol = K)

# Assign primary topics to each disease
for (disease in 1:V) {
primary_topics <- sample(1:K, num_topics_per_disease[disease])
disease_primary_topics[[disease]] <- primary_topics

# Set the bias value for the disease in its primary topics, can vary
#bias_values <- runif(num_topics_per_disease[disease], 0, 1)  # Variable bias between 0 and 1
bias_values = rep(1,num_topics_per_disease[disease])
# Fill in the disease-topic matrix with bias values for the primary topics
disease_topic_matrix[disease, primary_topics] <- bias_values
}

image(t(disease_topic_matrix),xlab="Topic",ylab="Disease")

image((disease_topic_matrix),ylab="Topic",xlab="Disease")

topic_specific_diseases=list()
for(i in 1:K){
topic_specific_diseases[[i]]=which(disease_topic_matrix[,i]==1)}

## 4.3 Now do on population for different warping

beta <- array(0, dim(eta))
# Normalize using logistic function and a scaling factor
adjusted_logistic_function <- function(x, scale) {
return(scale / (1 + exp(-x)))
}

scaling_factor <- 0.02  #

bias_value=max(eta)
for (topic_idx in 1:K) {
# Retrieve the diseases and their risk periods for this topic
#specific_diseases_periods <- topic_disease_risk_periods[[topic_idx]]
bias_increase <- disease_topic_matrix[,topic_idx]*bias_value
for (time_idx in 1:T) {

disease_values <- eta[topic_idx, , time_idx]

# Increase the values for active diseases at this time before normalization
dv <- disease_values+bias_increase

# Apply softmax normalization
normalized_disease <- adjusted_logistic_function(dv,scale = scaling_factor)
#normalized_disease <- softmax_normalize(dv)

beta[topic_idx, , time_idx] <- normalized_disease
}
}

Show that disease probabilities for two people of different $$\rho$$:

v=sample(topic_specific_diseases[[topic_idx]],1)
plot(beta[topic_idx,v,],pch=19,col="grey",ylab="Disease Probability over Time conditional on topic activity",xlab="Time")
t_high=inv_warped_t_array[which.max(rho[,topic_idx]),topic_idx,]
points(seq(1:T),beta[topic_idx,v,t_high],col="red",pch=19)
t_low=inv_warped_t_array[which.min(rho[,topic_idx]),topic_idx,]
points(seq(1:T),beta[topic_idx,v,t_low],col="blue",pch=19)

Expand beta_prime array so that it stores the right value for each individual:

beta_prime=array(NA,dim=c(D,K,V,T))
for(d in 1:D){
for(k in 1:K){
warped_times = inv_warped_t_array[d,k,]
beta_prime[d,k,,]=beta[k,,warped_times]
}
}

betaold=beta