Latent Trajectory Analysis

Sampling 1,000 people from the PCP 2016 Cohort

Trajectory Analysis:

For this analysis we will use a latent class mixed model to examine the effect that different weight cleaning algorithms would have on overall trajectories. Some things to consider: Since this is a linear method it makes the most sense to analyze those individuals where their trajectory is assumed to be decreasing or increasing, and thus this analysis only involves the MOVE 2016 cohort (could also do 2008). Second, based on experience with this data I am only assuming two latent groups or classes, this may not represent reality as people can presumably increase, decrease or maintain the same weight over time. Further analyses is warranted to find a better fitting model.

library(lcmm)

# variable 'subject' must be numeric
for (i in 1:length(me)) {
  if (class(me[[i]]$PatientICN) == "factor") {
    me[[i]]$PatientICN <- as.integer(as.character(me[[i]]$PatientICN))
  }
}

# initial values/model
B <- lcmm(
  Weight ~ time,
  random = ~time,
  subject = "PatientICN",
  ng = 1,
  idiag = TRUE,
  data = me[[1]],
  verbose = TRUE
)

lca.mods <- lapply(
  me,
  FUN = function(x) {
    lcmm(
      Weight ~ time,
      random = ~time,
      subject = "PatientICN",
      mixture = ~time,
      ng = 3,
      idiag = TRUE,
      data = x,
      B = B
    )
  }
)

names(lca.mods) <- names(me)
for (i in 1:length(lca.mods)) lca.mods[[i]]$data <- me[[i]]

Predicted classes

So, there is no guarantee that each class within an algorithms LCMM, will be the same class across algorithms. We can get this information from the signs of the coefficients on “time” in each model.

concept_classes <- c("Loss", "Maintain", "Gain")

traj_edies <- sapply(
  lca.mods,
  function(x) {
    time_coefs <- coef(x)
    which_coef <- grepl("time", names(time_coefs))
    time_coefs <- time_coefs[which_coef]
    traj_class <- sign(round(time_coefs, 3))
    traj_class
  }
) %>%
  reshape2::melt() %>%
  mutate(
    concept_class = case_when(
      value == -1 ~ concept_classes[1],
      value == 0  ~ concept_classes[2],
      value == 1  ~ concept_classes[3]
    ),
    concept_class = factor(
      concept_class,
      levels = concept_classes,
      labels = concept_classes
    ),
    orig_class = case_when(
      Var1 == "time class1" ~ "Class 1",
      Var1 == "time class2" ~ "Class 2",
      Var1 == "time class3" ~ "Class 3"
    ),
    orig_class = factor(orig_class)
  ) %>%
  select(-value, -Var1) %>%
  rename(Algorithm  = Var2)

newdata <- data.frame(time = seq(0, 365, 5))

Ypred_lcmm <- lapply(
  lca.mods,
  function(x) {
    y <- predictY(x, newdata, var.time = "time")
    cbind(y$pred, y$time)
   }
) %>%
  dplyr::bind_rows(.id = "Algorithm") %>%
  reshape2::melt(id.vars = c("Algorithm", "time")) %>%
  mutate(
    Algorithm = factor(Algorithm,
                       levels = names(me),
                       labels = names(me)),
    orig_class = factor(variable,
                        labels = c("Class 1", "Class 2", "Class 3"))
  ) %>%
  select(-variable) %>%
  rename(Ypred = value) %>%
  left_join(
    traj_edies,
    by = c("Algorithm", "orig_class")
  )


Ypred_lcmm %>%
  ggplot(
    aes(
      x = time, y = Ypred,
      group = interaction(Algorithm, concept_class),
      color = interaction(Algorithm, concept_class)
    )
  ) %>%
  add(geom_line(size = 1)) %>%
  add(theme_fivethirtyeight(16)) %>%
  add(theme(axis.title = element_text(), legend.position = "none")) %>%
  add(xlim(0, 500)) %>% 
  add(directlabels::geom_dl(aes(label = interaction(Algorithm, concept_class)),
                            method = "last.polygons")) %>%
  add(labs(
    x = "Time (days)",
    y = "Weight (Lbs.)",
    title = "Class Specific Mean-Predicted Trajectory",
    subtitle = "2016 Cohort - Latent Class Mixed Model",
    color = ""
  ))

Hah! What a mess. Let’s look at things a little differently.

This next one certainly won’t help anyone,

An Illustration:

library(gganimate)

Ypred_lcmm %>%
  ggplot(
    aes(
      x = time,
      y = Ypred,
      color = Algorithm,
      group = interaction(Algorithm, concept_class)
    )
  ) %>%
  add(geom_line(size = 1)) %>%
  add(theme_minimal(20)) %>%
  add(theme(legend.position = "none")) %>%
  add(labs(
    title = 'Algorithm: {closest_state}',
    x = "Time (days)",
    y = "Weight (lbs.)"
  )) %>%
  add(transition_states(
    states = Algorithm,
    transition_length = 1,
    state_length = 1
  ))

Let’s break it down a bit,

Highlight a few algorithms of interest,

Ypred_lcmm %>%
  ggplot(
      aes(
        x = time,
        y = Ypred,
        group = interaction(Algorithm, concept_class),
        color = Algorithm
      )
  ) %>%
  add(geom_line(size = 1)) %>%
  add(theme_minimal(16)) %>%
  add(theme(axis.title = element_text(), legend.position = "none")) %>%
  add(xlim(0, 450)) %>%
  add(gghighlight::gghighlight(
    Algorithm %in% algos,
    label_key = Algorithm
  )) %>%
  add(scale_color_manual(values = COLS[c(1, 3, 4)])) %>%
  add(labs(
      x = "Time (days)",
      y = "Weight (Lbs.)"
  ))

By Class

Looking only at Class 1: Loss

Looking only at Class 2: Maintainence

Looking only at Class 3: Gain

Comparing slopes, numerically

slopes <- sapply(
  lca.mods,
  function(x) {
    time_coefs <- coef(x)
    which_coef <- grepl("time", names(time_coefs))
    time_coefs <- time_coefs[which_coef]
    time_coefs <- round(time_coefs, 5)
    time_coefs
  }
) %>%
  reshape2::melt() %>%
  mutate(
    orig_class = case_when(
      Var1 == "time class1" ~ "Class 1",
      Var1 == "time class2" ~ "Class 2",
      Var1 == "time class3" ~ "Class 3"
    ),
    orig_class = factor(orig_class)
  ) %>%
  select(-Var1) %>%
  rename(Algorithm  = Var2, Slope = value) %>% 
  full_join(traj_edies, by = c("Algorithm", "orig_class"))

slopes %>%
  select(-orig_class) %>%
  select(Algorithm, concept_class, Slope) %>%
  tableStyle(digits = 5)

Algorithm	concept_class	Slope
Raw Weights	Gain	0.01322
Raw Weights	Loss	-0.00906
Raw Weights	Maintain	0.00000
Janney 2016	Maintain	0.00020
Janney 2016	Gain	0.02454
Janney 2016	Loss	-0.01548
Littman 2012	Gain	0.01123
Littman 2012	Loss	-0.01005
Littman 2012	Maintain	-0.00002
Maciejewski 2016	Gain	0.01405
Maciejewski 2016	Loss	-0.01026
Maciejewski 2016	Maintain	0.00003
Breland 2017	Loss	-0.01013
Breland 2017	Gain	0.01482
Breland 2017	Maintain	0.00002
Maguen 2013	Gain	0.00248
Maguen 2013	Maintain	-0.00038
Maguen 2013	Loss	-0.01869
Goodrich 2016	Gain	0.02909
Goodrich 2016	Maintain	0.00025
Goodrich 2016	Loss	-0.01617
Chan 2017	Gain	0.01470
Chan 2017	Loss	-0.00983
Chan 2017	Maintain	0.00001
Buta 2018	Gain	0.01346
Buta 2018	Loss	-0.00913
Buta 2018	Maintain	-0.00001
Kazerooni 2016	Maintain	0.00004
Kazerooni 2016	Loss	-0.00090
Kazerooni 2016	Gain	0.06236
Rosenberger 2011	Gain	0.02456
Rosenberger 2011	Maintain	0.00002
Rosenberger 2011	Loss	-0.00884
Jackson 2015	Maintain	-0.00015
Jackson 2015	Gain	0.00972
Jackson 2015	Loss	-0.00991
Noel 2012	Maintain	-0.00007
Noel 2012	Gain	0.00406
Noel 2012	Loss	-0.00457

algos <- c("Janney 2016", "Goodrich 2016", "Rosenberger 2011", "Kazerooni 2016")

slopes %>%
  ggplot(aes(
    x = concept_class,
    y = Slope,
    group = Algorithm,
    color = Algorithm
  )) %>%
  add(geom_line()) %>%
  add(directlabels::geom_dl(
    data = slopes %>%
      filter(Algorithm %in% algos),
    aes(label = Algorithm),
    method = list("last.polygons")
  )) %>%
  add(theme_fivethirtyeight(16)) %>%
  add(theme(legend.position = "none", axis.title = element_text())) %>%
  add(labs(
    x = "",
    y = "Estimated Slope"
  ))

Membership Posterior Probabilities

For each algorithm weight was modeled with a random slope and intercept. To illustrate the concept, the output from the Raw Weight (non-algorithm) is displayed:

General latent class mixed model 
     fitted by maximum likelihood method 
 
lcmm(fixed = Weight ~ time, mixture = ~time, random = ~time, 
    subject = "PatientICN", ng = 3, idiag = TRUE, data = x)
 
Statistical Model: 
     Dataset: x 
     Number of subjects: 993 
     Number of observations: 7329 
     Number of latent classes: 3 
     Number of parameters: 11  
     Link function: linear  
 
Iteration process: 
     Convergence criteria satisfied 
     Number of iterations:  51 
     Convergence criteria: parameters= 9.6e-05 
                         : likelihood= 2e-06 
                         : second derivatives= 9.8e-07 
 
Goodness-of-fit statistics: 
     maximum log-likelihood: -27768.14  
     AIC: 55558.27  
     BIC: 55612.18  
 
 
Maximum Likelihood Estimates: 
 
Fixed effects in the class-membership model:
(the class of reference is the last class) 

                      coef       Se    Wald p-value
intercept class1  -4.52848  0.34988 -12.943 0.00000
intercept class2  -2.93765  0.22929 -12.812 0.00000

Fixed effects in the longitudinal model:

                                      coef       Se    Wald p-value
intercept class1 (not estimated)         0                         
intercept class2                  -1.22305  2.76107  -0.443 0.65779
intercept class3                  -6.62143  2.47619  -2.674 0.00749
time class1                        0.01322  0.00100  13.272 0.00000
time class2                       -0.00906  0.00067 -13.428 0.00000
time class3                        0.00000  0.00011   0.022 0.98252


Variance-covariance matrix of the random-effects:
          intercept time
intercept  42.42468     
time        0.00000    0

Residual standard error (not estimated) = 1

Parameters of the link function:

                          coef       Se    Wald p-value
Linear 1 (intercept) 251.15091 17.02209  14.754 0.00000
Linear 2 (std err)     6.93123  0.06530 106.151 0.00000

Each class has a slightly different slope on time. The posterior classifications can be examined from each algorithm:

pprobs <- lapply(lca.mods, postprob)

 
Posterior classification: 
  class1 class2 class3
N   9.00  33.00 951.00
%   0.91   3.32  95.77
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9631 0.0000 0.0369
class2 0.0000 0.9081 0.0919
class3 0.0015 0.0205 0.9780
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7 100.00  90.91  98.21
prob>0.8  88.89  84.85  96.64
prob>0.9  88.89  75.76  94.43
 
 
Posterior classification: 
  class1 class2 class3
N 958.00   3.00  14.00
%  98.26   0.31   1.44
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9891 0.0013 0.0096
class2 0.0015 0.9985 0.0000
class3 0.1426 0.0000 0.8574
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7  99.69    100  78.57
prob>0.8  98.96    100  71.43
prob>0.9  97.29    100  57.14
 
 
Posterior classification: 
  class1 class2 class3
N   7.00   32.0 930.00
%   0.72    3.3  95.98
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9923 0.0000 0.0077
class2 0.0000 0.8442 0.1558
class3 0.0045 0.0177 0.9778
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7    100  75.00  98.17
prob>0.8    100  65.62  96.34
prob>0.9    100  59.38  93.66
 
 
Posterior classification: 
  class1 class2 class3
N    7.0  38.00 948.00
%    0.7   3.83  95.47
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9847 0.0000 0.0153
class2 0.0000 0.8570 0.1430
class3 0.0017 0.0187 0.9795
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7    100  76.32  98.31
prob>0.8    100  68.42  97.05
prob>0.9    100  60.53  94.51
 
 
Posterior classification: 
  class1 class2 class3
N  35.00   8.00 950.00
%   3.52   0.81  95.67
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.8810 0.0000 0.1190
class2 0.0000 0.9684 0.0316
class3 0.0191 0.0014 0.9795
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7  82.86  100.0  98.21
prob>0.8  77.14  100.0  96.95
prob>0.9  65.71   87.5  94.42
 
 
Posterior classification: 
  class1 class2 class3
N  63.00 902.00  14.00
%   6.44  92.13   1.43
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.7793 0.2164 0.0043
class2 0.0364 0.9591 0.0045
class3 0.0165 0.1173 0.8662
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7  66.67  96.34  78.57
prob>0.8  46.03  93.35  57.14
prob>0.9  34.92  87.80  57.14
 
 
Posterior classification: 
  class1 class2 class3
N   3.00 958.00  14.00
%   0.31  98.26   1.44
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9983 0.0017 0.0000
class2 0.0012 0.9874 0.0114
class3 0.0000 0.1175 0.8825
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7    100  99.37  85.71
prob>0.8    100  98.23  71.43
prob>0.9    100  96.97  71.43
 
 
Posterior classification: 
  class1 class2 class3
N   9.00  38.00 922.00
%   0.93   3.92  95.15
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9635 0.0000 0.0365
class2 0.0000 0.8557 0.1443
class3 0.0013 0.0175 0.9812
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7 100.00  76.32  98.48
prob>0.8  88.89  68.42  97.29
prob>0.9  88.89  57.89  95.34
 
 
Posterior classification: 
  class1 class2 class3
N   9.00  33.00 868.00
%   0.99   3.63  95.38
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9668 0.0000 0.0332
class2 0.0000 0.8972 0.1028
class3 0.0014 0.0204 0.9782
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7 100.00  84.85  98.16
prob>0.8  88.89  81.82  96.31
prob>0.9  88.89  72.73  94.47
 
 
Posterior classification: 
  class1 class2 class3
N      0 262.00   1.00
%      0  99.62   0.38
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2 prob3
class1    NaN    NaN   NaN
class2 0.0037 0.9963     0
class3 0.0000 0.0000     1
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7    NaN    100    100
prob>0.8    NaN    100    100
prob>0.9    NaN    100    100
 
 
Posterior classification: 
  class1 class2 class3
N    2.0  634.0   21.0
%    0.3   96.5    3.2
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
       prob1  prob2  prob3
class1     1 0.0000 0.0000
class2     0 0.9870 0.0130
class3     0 0.2019 0.7981
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7    100  98.42  66.67
prob>0.8    100  97.79  57.14
prob>0.9    100  96.21  42.86
 
 
Posterior classification: 
  class1 class2 class3
N 940.00  11.00  26.00
%  96.21   1.13   2.66
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9726 0.0081 0.0193
class2 0.0487 0.9510 0.0002
class3 0.1397 0.0000 0.8603
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7  98.30  90.91  88.46
prob>0.8  96.38  90.91  69.23
prob>0.9  93.51  90.91  53.85
 
 
Posterior classification: 
  class1 class2 class3
N  919.0   35.0   46.0
%   91.9    3.5    4.6
 
Posterior classification table: 
     --> mean of posterior probabilities in each class 
        prob1  prob2  prob3
class1 0.9537 0.0247 0.0216
class2 0.1849 0.8150 0.0002
class3 0.1795 0.0000 0.8205
 
Posterior probabilities above a threshold (%): 
         class1 class2 class3
prob>0.7  96.41  65.71  71.74
prob>0.8  92.82  51.43  65.22
prob>0.9  86.18  40.00  43.48

Posterior Classifications, by Algorithm

post_class <- vector("list", length(pprobs))
for(i in seq_along(pprobs)) {
    post_class[[i]] <- reshape2::melt(pprobs[[i]][[1]])
    post_class[[i]]$Algorithm <- names(pprobs)[[i]]
}

post_class %>%
  bind_rows() %>%
  mutate(
    orig_class = case_when(
      Var2 == "class1" ~ "Class 1",
      Var2 == "class2" ~ "Class 2",
      Var2 == "class3" ~ "Class 3"
    )
  ) %>%
  left_join(traj_edies, by = c("Algorithm", "orig_class")) %>%
  select(Algorithm, concept_class, value, Var1) %>%
  reshape2::dcast(Algorithm + concept_class ~ Var1, value.var = "value") %>%
  scrollTable()

Algorithm	concept_class	N	%
Breland 2017	Loss	35	3.52
Breland 2017	Maintain	950	95.67
Breland 2017	Gain	8	0.81
Buta 2018	Loss	33	3.63
Buta 2018	Maintain	868	95.38
Buta 2018	Gain	9	0.99
Chan 2017	Loss	38	3.92
Chan 2017	Maintain	922	95.15
Chan 2017	Gain	9	0.93
Goodrich 2016	Loss	14	1.44
Goodrich 2016	Maintain	958	98.26
Goodrich 2016	Gain	3	0.31
Jackson 2015	Loss	26	2.66
Jackson 2015	Maintain	940	96.21
Jackson 2015	Gain	11	1.13
Janney 2016	Loss	14	1.44
Janney 2016	Maintain	958	98.26
Janney 2016	Gain	3	0.31
Kazerooni 2016	Loss	262	99.62
Kazerooni 2016	Maintain	0	0.00
Kazerooni 2016	Gain	1	0.38
Littman 2012	Loss	32	3.30
Littman 2012	Maintain	930	95.98
Littman 2012	Gain	7	0.72
Maciejewski 2016	Loss	38	3.83
Maciejewski 2016	Maintain	948	95.47
Maciejewski 2016	Gain	7	0.70
Maguen 2013	Loss	14	1.43
Maguen 2013	Maintain	902	92.13
Maguen 2013	Gain	63	6.44
Noel 2012	Loss	46	4.60
Noel 2012	Maintain	919	91.90
Noel 2012	Gain	35	3.50
Raw Weights	Loss	33	3.32
Raw Weights	Maintain	951	95.77
Raw Weights	Gain	9	0.91
Rosenberger 2011	Loss	21	3.20
Rosenberger 2011	Maintain	634	96.50
Rosenberger 2011	Gain	2	0.30

meanPP <- vector("list", length(pprobs))
for(i in seq_along(pprobs)) {
    meanPP[[i]] <- reshape2::melt(pprobs[[i]][[2]])
    meanPP[[i]]$Algorithm <- names(pprobs)[[i]]
}

meanPP %>%
  bind_rows() %>%
  mutate(
    orig_class = case_when(
      Var1 == "class1" ~ "Class 1",
      Var1 == "class2" ~ "Class 2",
      Var1 == "class3" ~ "Class 3"
    )
  ) %>%
  left_join(traj_edies, by = c("Algorithm", "orig_class")) %>%
  select(Algorithm, concept_class, value, Var2) %>%
  rename(cProb = Var2) %>%
  reshape2::dcast(Algorithm + cProb ~ concept_class, value.var = "value") %>%
  scrollTable()

Algorithm	cProb	Loss	Maintain	Gain
Breland 2017	prob1	0.88	0.02	0.00
Breland 2017	prob2	0.00	0.00	0.97
Breland 2017	prob3	0.12	0.98	0.03
Buta 2018	prob1	0.00	0.00	0.97
Buta 2018	prob2	0.90	0.02	0.00
Buta 2018	prob3	0.10	0.98	0.03
Chan 2017	prob1	0.00	0.00	0.96
Chan 2017	prob2	0.86	0.02	0.00
Chan 2017	prob3	0.14	0.98	0.04
Goodrich 2016	prob1	0.00	0.00	1.00
Goodrich 2016	prob2	0.12	0.99	0.00
Goodrich 2016	prob3	0.88	0.01	0.00
Jackson 2015	prob1	0.14	0.97	0.05
Jackson 2015	prob2	0.00	0.01	0.95
Jackson 2015	prob3	0.86	0.02	0.00
Janney 2016	prob1	0.14	0.99	0.00
Janney 2016	prob2	0.00	0.00	1.00
Janney 2016	prob3	0.86	0.01	0.00
Kazerooni 2016	prob1	0.00	NaN	0.00
Kazerooni 2016	prob2	1.00	NaN	0.00
Kazerooni 2016	prob3	0.00	NaN	1.00
Littman 2012	prob1	0.00	0.00	0.99
Littman 2012	prob2	0.84	0.02	0.00
Littman 2012	prob3	0.16	0.98	0.01
Maciejewski 2016	prob1	0.00	0.00	0.98
Maciejewski 2016	prob2	0.86	0.02	0.00
Maciejewski 2016	prob3	0.14	0.98	0.02
Maguen 2013	prob1	0.02	0.04	0.78
Maguen 2013	prob2	0.12	0.96	0.22
Maguen 2013	prob3	0.87	0.00	0.00
Noel 2012	prob1	0.18	0.95	0.18
Noel 2012	prob2	0.00	0.02	0.82
Noel 2012	prob3	0.82	0.02	0.00
Raw Weights	prob1	0.00	0.00	0.96
Raw Weights	prob2	0.91	0.02	0.00
Raw Weights	prob3	0.09	0.98	0.04
Rosenberger 2011	prob1	0.00	0.00	1.00
Rosenberger 2011	prob2	0.20	0.99	0.00
Rosenberger 2011	prob3	0.80	0.01	0.00