Chapter 7 Simulating Data
According to our model, we will simulate diagnoses:
Simulate diagnoses based on individuals topic distribution:
diags <- vector("list", D)
topics <- vector("list", D)
disease_in_topic <- vector("list", D)
for (d in 1:D) {
diags[[d]] <-
vector("list", T) # Each individual has a list for each time point
topics[[d]] <-
vector("list", T) # Each individual has a list for each time point
for (t in 2:T) {
lambda_dt = (t*1.5) / T
Nd = max(1, rpois(1, lambda = lambda_dt)) # Allow for more variance in number of diagnoses
# Initialize an array with the correct length
diagnoses_dt = rep(NA, Nd)
topics_dt = rep(NA, Nd)
truetest_dt = rep(NA, Nd)
for (n in 1:Nd) {
z = sample(K, size = 1, prob = Theta[d, , t]) # Sample topic
topics_dt[n] <- z
## pull out disease vector
beta_tk = beta[d, z, , t]
sampled_disease = sample(V, size = 1, prob = beta_tk)
#truetest_dt[n] = sampled_disease %in% topic_specific_diseases[[z]]
diagnoses_dt[n] <- sampled_disease
# Store in the temporary array
}
# Assuming diags is a three-dimensional array, you might need to adjust this
# part depending on how you want to handle varying sizes of Nd. Here it's assumed
# diags[d,,t] can dynamically adjust its size to accommodate diagnoses_dt,
# but this could depend on the data structure you're working with.
if (length(diagnoses_dt) > 0) {
diags[[d]][[t]] <- diagnoses_dt
topics[[d]][[t]] <- topics_dt
disease_in_topic[[d]][[t]] <- truetest_dt
}
}}