Chapter 7 Simulating Data

According to our model, we will simulate diagnoses:

Simulate diagnoses based on individuals topic distribution:

diags <- vector("list", D)
topics <- vector("list", D)
disease_in_topic <- vector("list", D)

for (d in 1:D) {
  diags[[d]] <-
    vector("list", T) # Each individual has a list for each time point
  topics[[d]] <-
    vector("list", T) # Each individual has a list for each time point
  
  for (t in 2:T) {
    lambda_dt = (t*1.5) / T
    
    Nd = max(1, rpois(1, lambda = lambda_dt)) # Allow for more variance in number of diagnoses
    
    # Initialize an array with the correct length
    diagnoses_dt = rep(NA, Nd)
    topics_dt = rep(NA, Nd)
    truetest_dt = rep(NA, Nd)
    
    for (n in 1:Nd) {
      z = sample(K, size = 1, prob = Theta[d, , t]) # Sample topic
      topics_dt[n] <- z
      ## pull out disease vector
      beta_tk = beta[d, z, , t]
      
      sampled_disease = sample(V, size = 1, prob = beta_tk)
      #truetest_dt[n] = sampled_disease %in% topic_specific_diseases[[z]]
      diagnoses_dt[n] <- sampled_disease
      # Store in the temporary array
      
    }
    
    # Assuming diags is a three-dimensional array, you might need to adjust this
    # part depending on how you want to handle varying sizes of Nd. Here it's assumed
    # diags[d,,t] can dynamically adjust its size to accommodate diagnoses_dt,
    # but this could depend on the data structure you're working with.
    if (length(diagnoses_dt) > 0) {
      diags[[d]][[t]] <-  diagnoses_dt
      topics[[d]][[t]] <-  topics_dt
      disease_in_topic[[d]][[t]] <-  truetest_dt
    }
  }}