Chapter 5 Phenotypic analysis

5.1 Phenotypic data analysis

In this section, simulated phenotypes are analyzed by fitting a linear mixed model and calculating Best Linear Unbiased Predictors (BLUPs) for each trait using the lme4 R package (Bates et al. 2021).

More information about lmer can be found here. Also, a brief introduction to linear mixed models can be found here.

Additional packages used in this section are data.table (Dowle and Srinivasan 2021), tidyverse (Wickham 2021), cowplot (Wilke 2020).

Load packages

library(lme4)
library(data.table)
library(tidyverse)

Set working directory to where simulated phenotypes file is in workshop materials

setwd("~/PGRP_mapping_workshop/data/genotypic_and_phenotypic_data/")

Read in the simulated data

my.dat = fread("Seed_Color_Simulated.txt", data.table = FALSE)

Check the structure of the data and change Genotype and rep to factors

str(my.dat)
my.dat$Genotype = as.factor(my.dat$Genotype)
my.dat$rep = as.factor(my.dat$rep)
str(my.dat)

Fit mixed model for low heritability trait and calculate BLUPs

## Fit the model
low.mod = lmer(Color_low ~ (1|Genotype) + (1|rep), data = my.dat) # Genotype and rep fit as random effects
## Summary of model 
low.sum = summary(low.mod)
## Repeatability of trait (ratio of genotypic variance to total variance)
low.repeatability = 100*(data.frame(low.sum$varcor)$sdcor[1]^2)/((data.frame(low.sum$varcor)$sdcor[1]^2)+((data.frame(low.sum$varcor)$sdcor[3]^2)/3))
## BLUPs
low.blup = coef(low.mod)$'Genotype'
low.blup$Genotype = rownames(low.blup)
colnames(low.blup) = c("Color_low_h2","Genotype")

Fit mixed model for high heritability trait and calculate BLUPs

high.mod = lmer(Color_high ~ (1|Genotype) + (1|rep), data = my.dat)
high.sum = summary(high.mod)
high.repeatability = 100*(data.frame(high.sum$varcor)$sdcor[1]^2)/((data.frame(high.sum$varcor)$sdcor[1]^2)+((data.frame(high.sum$varcor)$sdcor[3]^2)/3))
high.blup = coef(high.mod)$'Genotype'
high.blup$Genotype = rownames(high.blup)
colnames(high.blup) = c("Color_high_h2","Genotype")

Save BLUPs in a combined text file

high_low = left_join(low.blup,high.blup)
high_low = high_low[,c(2,1,3)]
fwrite(high_low,"BLUPs_Seed_Color_Simulated.txt",sep="\t", quote = FALSE,col.names = TRUE)

Make Density histograms

## Density histogram of low heritability trait
p1 = ggplot(high_low, aes(x=Color_high_h2)) +
  geom_histogram(aes(y=..density..),position="identity", alpha=0.5, colour = "white")+
  geom_density(alpha=0, fill="white")+
  geom_vline(data=high_low, aes(xintercept=mean(Color_high_h2,na.rm = TRUE)),linetype="dashed",colour = "#FF3721")+
  geom_vline(data=high_low, aes(xintercept=mean(Color_high_h2,na.rm = TRUE)-sd(Color_high_h2,na.rm = TRUE)),linetype="dashed")+
  geom_vline(data=high_low, aes(xintercept=mean(Color_high_h2,na.rm = TRUE)+sd(Color_high_h2,na.rm = TRUE)),linetype="dashed")+
  theme_classic(base_size = 18)+
  ylab("Density")+
  xlab("Seed redness (nm) High h2 BLUPs")+
  theme(plot.title = element_text(hjust = 0.5))
p1

## Density histogram of high heritability trait
p2 = ggplot(high_low, aes(x=Color_low_h2)) +
  geom_histogram(aes(y=..density..),position="identity", alpha=0.5, colour = "white")+
  geom_density(alpha=0, fill="white")+
  geom_vline(data=high_low, aes(xintercept=mean(Color_low_h2,na.rm = TRUE)),linetype="dashed",colour = "#FF3721")+
  geom_vline(data=high_low, aes(xintercept=mean(Color_low_h2,na.rm = TRUE)-sd(Color_low_h2,na.rm = TRUE)),linetype="dashed")+
  geom_vline(data=high_low, aes(xintercept=mean(Color_low_h2,na.rm = TRUE)+sd(Color_low_h2,na.rm = TRUE)),linetype="dashed")+
  theme_classic(base_size = 18)+
  ylab("Density")+
  xlab("Seed redness (nm) Low h2 BLUPs")+
  theme(plot.title = element_text(hjust = 0.5))
p2

## Combine the plots
library(cowplot)
combined_plot = plot_grid(p1,p2, ncol = 2, labels = c("A","B"),label_size=18,
                          label_fontfamily = 'serif',label_fontface = 'bold')

## Save plot in figures directory
setwd("~/PGRP_mapping_workshop/figures/phenotypes/")
pdf("WIDIV_942_simulated_pheno_distributions.pdf", width = 10, height = 4)
combined_plot
dev.off()

Note the greater variance among lines in (A) when simulating a high heritability trait compared to (B) with a low heritability. The red dash line is the mean +/- one standard deviation in black dashed lines.

References

Bates, Douglas, Martin Maechler, Ben Bolker, and Steven Walker. 2021. Lme4: Linear Mixed-Effects Models Using Eigen and S4. https://github.com/lme4/lme4/.
Dowle, Matt, and Arun Srinivasan. 2021. Data.table: Extension of ‘Data.frame‘. https://CRAN.R-project.org/package=data.table.
Wickham, Hadley. 2021. Tidyverse: Easily Install and Load the Tidyverse. https://CRAN.R-project.org/package=tidyverse.
Wilke, Claus O. 2020. Cowplot: Streamlined Plot Theme and Plot Annotations for Ggplot2. https://wilkelab.org/cowplot/.