Chapter 5 Extending Further with ggplot2

While the built in plot() function make it easy to quickly visualize the derived VPC, the tidyvpcobj can be plotted using ggplot2 for complete plot customization.

5.1 Plot VPC

obs_data$LLOQ <- ifelse(obs_data$STUDY == "Study A", 50, 25)

vpc <- observed(obs_data, x = TIME, y = DV) %>%
  simulated(sim_data, y = DV) %>%
  censoring(blq = DV < LLOQ, lloq = LLOQ) %>%
  stratify(~STUDY) %>%
  binning(bin = NTIME) %>%
  vpcstats(qpred = c(0.1, 0.5, 0.9))

ggplot(vpc$stats, aes(x = xbin)) + 
  facet_grid(~STUDY, scales = "free", as.table = FALSE) + 
  geom_ribbon(aes(ymin = lo, ymax = hi, fill = qname, col = qname, group = qname),alpha = 0.1, col = NA) + 
  geom_line(aes(y = md, col = qname, group = qname)) +
  geom_line(aes(y = y, linetype = qname), size = 1) + 
  geom_hline(data=unique(obs_data[, .(STUDY, LLOQ)]), aes(yintercept=LLOQ), linetype="dotted", size=1) +
  geom_text(data = unique(vpc$data[, .(LLOQ), by = "STUDY"]), 
            aes(x = 10, y = LLOQ, label = paste("LLOQ", LLOQ, sep = "="), ), vjust = 1, hjust = 1) +
  scale_colour_manual(name = "Simulated Percentiles\nMedian (lines) 95% CI (areas)",
                    breaks = c("q0.1", "q0.5", "q0.9"), 
                    values = c("red", "blue", "red"), 
                    labels = c("10%", "50%", "90%")) + 
  scale_fill_manual(name = "Simulated Percentiles\nMedian (lines) 95% CI (areas)", 
                    breaks = c("q0.1", "q0.5", "q0.9"), 
                    values = c("red", "blue", "red"), 
                    labels = c("10%", "50%", "90%")) + 
  scale_linetype_manual(name = "Observed Percentiles\nMedian (lines) 95% CI (areas)", 
                    breaks = c("q0.1", "q0.5", "q0.9"), 
                    values = c("dotted", "solid", "dashed"), 
                    labels = c("10%", "50%", "90%")) + 
  guides(fill = guide_legend(order = 2), colour = guide_legend(order = 2), linetype = guide_legend(order = 1)) + 
  theme(legend.position = "top", legend.key.width = grid::unit(1, "cm")) + 
  labs(x = "TIME", y = "Concentration") + 
  geom_point(data = vpc$obs, aes(x = x, y = y), size = 1, alpha = 0.1, show.legend = FALSE) + 
  geom_vline(data = bininfo(vpc)[, .(x = sort(unique(c(xleft, xright)))), by = names(vpc$strat)],aes(xintercept = x), size = rel(0.5), col = "gray80") + 
  theme(panel.grid = element_blank()) + 
  geom_rug(data = bininfo(vpc)[, .(x = sort(unique(c(xleft, xright)))), by = names(vpc$strat)],aes(x = x), sides = "t", size = 1)

5.2 Plot Rectangles

The results from bininfo() make it easy to plot a rectangle VPC using ggplot2.

##             bin      xbin qname      y        lo       md        hi nobs
## 1: [0.158,1.89) 0.8418133 q0.05  22.94  19.05306  22.1295  26.32546  213
## 2: [0.158,1.89) 0.8418133  q0.5  61.50  56.61835  61.7225  68.59207  213
## 3: [0.158,1.89) 0.8418133 q0.95 144.40 108.56340 132.0360 156.84105  213
## 4:  [1.89,4.83) 2.8021930 q0.05  29.38  23.01417  28.2300  32.95001  187
## 5:  [1.89,4.83) 2.8021930  q0.5  74.10  66.70315  75.9475  82.94110  187
## 6:  [1.89,4.83) 2.8021930 q0.95 179.00 135.74530 155.3395 180.72525  187
##      xmedian     xmean      xmin     xmax     xmid     xleft   xright  xcenter
## 1: 0.8418133 0.8717959 0.1575342 1.860852 1.009193 0.1575342 1.872980 1.015257
## 2: 0.8418133 0.8717959 0.1575342 1.860852 1.009193 0.1575342 1.872980 1.015257
## 3: 0.8418133 0.8717959 0.1575342 1.860852 1.009193 0.1575342 1.872980 1.015257
## 4: 2.8021930 2.9458444 1.8851078 4.772347 3.328727 1.8729798 4.800346 3.336663
## 5: 2.8021930 2.9458444 1.8851078 4.772347 3.328727 1.8729798 4.800346 3.336663
## 6: 2.8021930 2.9458444 1.8851078 4.772347 3.328727 1.8729798 4.800346 3.336663
##     ymin  ymax
## 1: 22.94 144.4
## 2: 22.94 144.4
## 3: 22.94 144.4
## 4: 29.38 179.0
## 5: 29.38 179.0
## 6: 29.38 179.0

Plot rectangles using ymin and ymax, the min/max y values in vpc$stats grouped by bin.

Alternatively, we can obtain the required data for plotting used in the above bin_information data frame by merging vpc$stats and bininfo(vpc) on bin in the ggplot2 data argument. If stratifying you will need to include the name of the stratification variable(s) in the data.table merge i.e. vpc$stats[bininfo(vpc), on=c("STUDY", "bin")]. In the rectangle vpc below, we will stratify on STUDY and plot rectangles for each quantile.

obs_data$LLOQ <- obs_data[, ifelse(STUDY == "Study A", 50, 25)]

vpc <- observed(obs_data, x = TIME, y = DV) %>%
  simulated(sim_data, y = DV) %>%
  censoring(blq = DV < LLOQ, lloq = LLOQ) %>%
  stratify(~STUDY) %>%
  binning(bin = NTIME) %>%
  vpcstats(qpred = c(0.1, 0.5, 0.9))


ggplot(vpc$stats[bininfo(vpc), on=c("STUDY", "bin")], aes(x = xbin)) + 
  facet_grid(~STUDY, scales = "free", as.table = FALSE) + 
  geom_rect(aes(xmin = xleft, xmax = xright, ymin = lo, ymax = hi, fill = qname, col = qname, group = qname),alpha = 0.1, col = NA) + 
  geom_segment(aes(x = xleft, xend = xright, y = md, yend = md, col = qname, group = qname)) +
  geom_segment(aes(x = xleft, xend = xright, y = y, yend = y, linetype = qname), size = 1) +
  geom_line(aes(y = md, col = qname, group = qname)) +
  geom_line(aes(y = y, linetype = qname), size = 1) + 
  geom_hline(data=unique(obs_data[, list(STUDY, LLOQ)]), aes(yintercept=LLOQ), linetype="dotted", size=1) +
  geom_text(data = unique(vpc$data[, list(LLOQ), by = "STUDY"]), 
            aes(x = 10, y = LLOQ, label = paste("LLOQ", LLOQ, sep = "="), ), vjust = 1, hjust = 1) +
  scale_colour_manual(name = "Simulated Percentiles\nMedian (lines) 95% CI (areas)",
                      breaks = c("q0.1", "q0.5", "q0.9"), 
                      values = c("red", "blue", "red"), 
                      labels = c("10%", "50%", "90%")) + 
  scale_fill_manual(name = "Simulated Percentiles\nMedian (lines) 95% CI (areas)", 
                    breaks = c("q0.1", "q0.5", "q0.9"), 
                    values = c("red", "blue", "red"), 
                    labels = c("10%", "50%", "90%")) + 
  scale_linetype_manual(name = "Observed Percentiles\nMedian (lines) 95% CI (areas)", 
                        breaks = c("q0.1", "q0.5", "q0.9"), 
                        values = c("dotted", "solid", "dashed"), 
                        labels = c("10%", "50%", "90%")) + 
  guides(fill = guide_legend(order = 2), colour = guide_legend(order = 2), linetype = guide_legend(order = 1)) + 
  theme(legend.position = "top", legend.key.width = grid::unit(1, "cm")) + 
  labs(x = "TIME", y = "Concentration") + 
  geom_point(data = vpc$obs, aes(x = x, y = y), size = 1, alpha = 0.1, show.legend = FALSE) + 
  geom_vline(data = bininfo(vpc)[, list(x = sort(unique(c(xleft, xright)))), by = names(vpc$strat)],aes(xintercept = x), size = rel(0.5), col = "gray80") + 
  theme(panel.grid = element_blank()) + 
  geom_rug(data = bininfo(vpc)[, list(x = sort(unique(c(xleft, xright)))), by = names(vpc$strat)],aes(x = x), sides = "t", size = 1)

5.3 Plot Below Quantification Limit (BQL)

If using the censoring() function, the resulting tidyvpcobj will also contain a pctblq table. Use ggplot2 to plot the percentage of data below the limit of quantification across bins.

We can include geom_ribbon() using the lo and hi columns in the vpc$pctblq table to denote the lower/upper bounds of our confidence interval. Let’s also plot the median %blq of the simulated data using the md column in the vpc$pctblq table.