Chapter 5 Phenotypic analysis
5.1 Phenotypic data analysis
In this section, simulated phenotypes are analyzed by fitting a linear mixed model and calculating Best Linear Unbiased Predictors (BLUPs) for each trait using the lme4 R package (Bates et al. 2021).
More information about lmer can be found here. Also, a brief introduction to linear mixed models can be found here.
Additional packages used in this section are data.table (Dowle and Srinivasan 2021), tidyverse (Wickham 2021), cowplot (Wilke 2020).
Load packages
library(lme4)
library(data.table)
library(tidyverse)
Set working directory to where simulated phenotypes file is in workshop materials
setwd("~/PGRP_mapping_workshop/data/genotypic_and_phenotypic_data/")
Read in the simulated data
= fread("Seed_Color_Simulated.txt", data.table = FALSE) my.dat
Check the structure of the data and change Genotype and rep to factors
str(my.dat)
$Genotype = as.factor(my.dat$Genotype)
my.dat$rep = as.factor(my.dat$rep)
my.datstr(my.dat)
Fit mixed model for low heritability trait and calculate BLUPs
## Fit the model
= lmer(Color_low ~ (1|Genotype) + (1|rep), data = my.dat) # Genotype and rep fit as random effects
low.mod ## Summary of model
= summary(low.mod)
low.sum ## Repeatability of trait (ratio of genotypic variance to total variance)
= 100*(data.frame(low.sum$varcor)$sdcor[1]^2)/((data.frame(low.sum$varcor)$sdcor[1]^2)+((data.frame(low.sum$varcor)$sdcor[3]^2)/3))
low.repeatability ## BLUPs
= coef(low.mod)$'Genotype'
low.blup $Genotype = rownames(low.blup)
low.blupcolnames(low.blup) = c("Color_low_h2","Genotype")
Fit mixed model for high heritability trait and calculate BLUPs
= lmer(Color_high ~ (1|Genotype) + (1|rep), data = my.dat)
high.mod = summary(high.mod)
high.sum = 100*(data.frame(high.sum$varcor)$sdcor[1]^2)/((data.frame(high.sum$varcor)$sdcor[1]^2)+((data.frame(high.sum$varcor)$sdcor[3]^2)/3))
high.repeatability = coef(high.mod)$'Genotype'
high.blup $Genotype = rownames(high.blup)
high.blupcolnames(high.blup) = c("Color_high_h2","Genotype")
Save BLUPs in a combined text file
= left_join(low.blup,high.blup)
high_low = high_low[,c(2,1,3)]
high_low fwrite(high_low,"BLUPs_Seed_Color_Simulated.txt",sep="\t", quote = FALSE,col.names = TRUE)
Make Density histograms
## Density histogram of low heritability trait
= ggplot(high_low, aes(x=Color_high_h2)) +
p1 geom_histogram(aes(y=..density..),position="identity", alpha=0.5, colour = "white")+
geom_density(alpha=0, fill="white")+
geom_vline(data=high_low, aes(xintercept=mean(Color_high_h2,na.rm = TRUE)),linetype="dashed",colour = "#FF3721")+
geom_vline(data=high_low, aes(xintercept=mean(Color_high_h2,na.rm = TRUE)-sd(Color_high_h2,na.rm = TRUE)),linetype="dashed")+
geom_vline(data=high_low, aes(xintercept=mean(Color_high_h2,na.rm = TRUE)+sd(Color_high_h2,na.rm = TRUE)),linetype="dashed")+
theme_classic(base_size = 18)+
ylab("Density")+
xlab("Seed redness (nm) High h2 BLUPs")+
theme(plot.title = element_text(hjust = 0.5))
p1
## Density histogram of high heritability trait
= ggplot(high_low, aes(x=Color_low_h2)) +
p2 geom_histogram(aes(y=..density..),position="identity", alpha=0.5, colour = "white")+
geom_density(alpha=0, fill="white")+
geom_vline(data=high_low, aes(xintercept=mean(Color_low_h2,na.rm = TRUE)),linetype="dashed",colour = "#FF3721")+
geom_vline(data=high_low, aes(xintercept=mean(Color_low_h2,na.rm = TRUE)-sd(Color_low_h2,na.rm = TRUE)),linetype="dashed")+
geom_vline(data=high_low, aes(xintercept=mean(Color_low_h2,na.rm = TRUE)+sd(Color_low_h2,na.rm = TRUE)),linetype="dashed")+
theme_classic(base_size = 18)+
ylab("Density")+
xlab("Seed redness (nm) Low h2 BLUPs")+
theme(plot.title = element_text(hjust = 0.5))
p2
## Combine the plots
library(cowplot)
= plot_grid(p1,p2, ncol = 2, labels = c("A","B"),label_size=18,
combined_plot label_fontfamily = 'serif',label_fontface = 'bold')
## Save plot in figures directory
setwd("~/PGRP_mapping_workshop/figures/phenotypes/")
pdf("WIDIV_942_simulated_pheno_distributions.pdf", width = 10, height = 4)
combined_plotdev.off()
Note the greater variance among lines in (A) when simulating a high heritability trait compared to (B) with a low heritability. The red dash line is the mean +/- one standard deviation in black dashed lines.