5 Results
5.1 Exploratory Data Analysis
5.1.1 Descriptive Statistics
pheno_qc %>%
group_by(Subgroup_Number, Subgroup) %>%
summarise(N = n(),
Mean = round(mean(Age), 1),
Median = median(Age),
SD = round(sd(Age), 2),
Minimum = min(Age),
Maximum = max(Age)) %>%
rename(`Subgroup` = Subgroup_Number,
`Genetic lines` = Subgroup) %>%
kable(.,
row.names = FALSE,
align = 'c',
caption = "Descriptive statistics AGE by subgroup after genotype quality control.")
Subgroup | Genetic lines | N | Mean | Median | SD | Minimum | Maximum |
---|---|---|---|---|---|---|---|
1 | Crossbred | 8447 | 142.8 | 142 | 17.20 | 110 | 164 |
2 | Duroc | 16633 | 144.9 | 144 | 20.47 | 55 | 171 |
3 | Landrace | 18834 | 143.2 | 145 | 19.50 | 55 | 171 |
4 | Yorkshire | 17764 | 143.3 | 145 | 19.36 | 60 | 171 |
5 | Crossbred and Duroc | 25091 | 144.2 | 144 | 19.46 | 55 | 171 |
6 | Crossbred and Landrace | 27281 | 143.1 | 144 | 18.82 | 55 | 171 |
7 | Crossbred and Yorkshire | 26221 | 143.1 | 144 | 18.70 | 60 | 171 |
8 | Duroc and Landrace | 35480 | 144.0 | 145 | 19.98 | 55 | 171 |
9 | Duroc and Yorkshire | 34417 | 144.0 | 144 | 19.93 | 55 | 171 |
10 | Landrace and Yorkshire | 36609 | 143.2 | 145 | 19.43 | 55 | 171 |
11 | Duroc, Landrace, and Yorkshire | 53256 | 143.7 | 145 | 19.78 | 55 | 171 |
12 | Crossbred, Duroc, Landrace, and Yorkshire | 61702 | 143.6 | 144 | 19.45 | 55 | 171 |
5.1.2 Raw Distributions
5.1.2.1 One Breed
ggplot(filter(pheno_qc, Subgroup_Code %in% c("c", "d", "l", "y")), aes(x=Age)) +
geom_histogram(color="white", fill="steelblue1", position = "dodge") +
facet_wrap(~Subgroup, nrow = 2) +
scale_x_continuous(expand = c(0,3)) +
scale_y_continuous(expand = c(0,0), limits = c(0, 4000)) +
geom_hline(yintercept = 0) +
labs(y = "No. of pigs", x = "Age, months") +
theme_classic() +
theme(strip.background = element_rect(fill="grey70")) +
theme(strip.text = element_text(color = 'white', face = "bold", size = "12"),
axis.title = element_text(face = "bold"))
5.1.2.2 Two Breeds
ggplot(filter(pheno_qc, Subgroup_Code %in% c("cd", "cl", "cy", "dl", "dy", "ly")), aes(x=Age)) +
geom_histogram(color="white", fill="steelblue1", position = "dodge") +
facet_wrap(~Subgroup, nrow = 3) +
scale_x_continuous(expand = c(0,3)) +
scale_y_continuous(expand = c(0,0), limits = c(0, 4000)) +
geom_hline(yintercept = 0) +
labs(y = "No. of pigs", x = "Age, months") +
theme_classic() +
theme(strip.background = element_rect(fill="grey70")) +
theme(strip.text = element_text(color = 'white', face = "bold", size = "12"),
axis.title = element_text(face = "bold"))
5.1.2.3 Three+ Breeds
ggplot(filter(pheno_qc, Subgroup_Code %in% c("dly", "cdly")), aes(x=Age)) +
geom_histogram(color="white", fill="steelblue1", position = "dodge") +
facet_wrap(~Plot_Code, nrow = 1) +
scale_x_continuous(expand = c(0,3)) +
scale_y_continuous(expand = c(0,0), limits = c(0, 5000)) +
geom_hline(yintercept = 0) +
labs(y = "No. of pigs", x = "Age, months") +
theme_classic() +
theme(strip.background = element_rect(fill="grey70")) +
theme(strip.text = element_text(color = 'white', face = "bold", size = "12"),
axis.title = element_text(face = "bold"))
5.2 Univariate Variance Component Estimation
uvce <- read.csv("results/uvce.csv",
header = TRUE)
uvce$Breeds_N <- factor(uvce$Breeds_N,
levels = c("One Breed", "Two Breeds", "Three+ Breeds"))
uvce$Plot_Code <- factor(uvce$Plot_Code,
levels = c("C", "D", "L", "Y", "C and D", "C and L", "C and Y", "D and L", "D and Y", "L and Y", "D, L, and Y", "C, D, L, and Y"))
library(RColorBrewer)
ggplot(uvce, aes(x = Plot_Code, y = PVE, color = SNP_Group)) +
geom_point(aes(size = Pigs_n)) +
scale_y_continuous(limits = c(0,1), breaks = round(seq(0, 1, by = 0.1), 2), expand = c(0,0)) +
labs(x = "Subgroup", y = "Proportion of Variance Explained by SNPs",
color = "GRM", size = "Sample Size") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
axis.title = element_text(face = "bold"),
strip.background = element_rect(fill="grey70"),
strip.text = element_text(color = 'white', face = "bold", size = "12"),
legend.title = element_text(face = "bold")) +
facet_grid(~Breeds_N, scales = "free") +
scale_color_brewer(palette="RdBu")
5.3 Bivariate Variance Component Estimation
bvce <- read.csv("results/bvce.csv",
header = TRUE)
bvce$Plot_Code <- factor(bvce$Plot_Code,
levels = c("C", "D", "L", "Y", "C and D", "C and L", "C and Y", "D and L", "D and Y", "L and Y", "D, L, and Y", "C, D, L, and Y"))
ggplot(bvce, aes(x = Plot_Code, y = rG, color = SNP_Group, shape = Breed)) +
geom_point(size = 4) +
scale_y_continuous(limits = c(-0.2,1), breaks = round(seq(-0.2, 1, by = 0.1), 2), expand = c(0,0)) +
labs(x = "Subgroup", y = "Genetic correlation (rG)",
color = "SNP Group", shape = "GRM") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
axis.title = element_text(face = "bold"),
strip.background = element_rect(fill="grey70"),
strip.text = element_text(color = 'white', face = "bold", size = "12"),
legend.title = element_text(face = "bold")) +
scale_color_brewer(palette="RdBu") +
geom_hline(yintercept = 0, size = 1)
5.4 Genome-wide Association Analyses
5.4.1 Frequency of Detected Associations
mlma %>%
mutate(Threshold = 0.05,
Adjusted_Threshold = Threshold/SNPs_n,
p_sig = p < Adjusted_Threshold,
q_sig = q < 0.10) %>%
group_by(Subgroup_Number, Subgroup, Pigs_n, SNPs_n) %>%
summarise(N_p = sum(p_sig),
N_q = sum(q_sig)) %>%
rename(`Subgroup` = Subgroup_Number,
`Genetic lines` = Subgroup,
`Pigs, n` = Pigs_n,
`SNPs, n` = SNPs_n,
`Signifigant SNPs (Bonferroni), n` = N_p,
`Signifigant SNPs (Q-value), n` = N_q) %>%
kable(.,
row.names = FALSE,
align = 'c',
caption = "Number of statistically significant SNPs per subgroup based on Bonferroni (alpha/N, SNPs) and Q-value (Q < 0.10) thresholds.")
Subgroup | Genetic lines | Pigs, n | SNPs, n | Signifigant SNPs (Bonferroni), n | Signifigant SNPs (Q-value), n |
---|---|---|---|---|---|
1 | Crossbred | 8447 | 46529 | 23 | 49 |
2 | Duroc | 16633 | 38377 | 41 | 98 |
2 | Simulated Duroc | 16633 | 38376 | 0 | 0 |
3 | Landrace | 18834 | 45316 | 56 | 134 |
3 | Simulated Landrace | 18834 | 45324 | 0 | 0 |
4 | Simulated Yorkshire | 17764 | 45198 | 0 | 0 |
4 | Yorkshire | 17764 | 45196 | 51 | 121 |
5 | Crossbred and Duroc | 25091 | 46348 | 77 | 147 |
6 | Crossbred and Landrace | 27281 | 46431 | 72 | 178 |
7 | Crossbred and Yorkshire | 26221 | 46426 | 72 | 169 |
8 | Duroc and Landrace | 35480 | 46064 | 177 | 486 |
9 | Duroc and Yorkshire | 34417 | 46143 | 215 | 554 |
10 | Landrace and Yorkshire | 36609 | 46236 | 76 | 250 |
11 | Duroc, Landrace, and Yorkshire | 53256 | 46421 | 250 | 865 |
12 | Crossbred, Duroc, Landrace, and Yorkshire | 61702 | 46453 | 297 | 1202 |
5.4.2 Allele Substitution Effects
top_betas <- mlma %>%
filter(Subgroup_Code %in% c("c", "d", "l", "y")) %>%
filter(q < 0.10) %>%
group_by(Plot_Code) %>%
slice_max(order_by = b, n = 10) %>%
arrange(-b)
top_betas %>%
mutate(SNP = reorder(SNP, b)) %>%
group_by(Subgroup, SNP) %>%
arrange(b) %>%
ungroup() %>%
mutate(SNP = factor(paste(SNP, Subgroup, sep = "__"),
levels = rev(paste(SNP, Subgroup, sep = "__")))) %>%
ggplot(., aes(x = SNP, y = b)) +
geom_errorbar(aes(ymin = b - se*1.96, ymax = b + se*1.96), color = "black", width = .5) +
geom_point(aes(color = Subgroup), size = 2) +
facet_wrap(~Subgroup, scales = "free_x") +
scale_x_discrete(labels = function(x) gsub("__.+$", "", x)) +
scale_y_continuous(limits = c(0,15)) +
scale_color_brewer(palette = "RdBu") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
axis.title = element_text(face = "bold"),
strip.background = element_rect(fill="grey70"),
strip.text = element_text(color = 'white', face = "bold", size = "12"),
legend.position = "none") +
geom_hline(yintercept = 0, size = 1) +
labs(y = "Allele Substitution Effect (beta)")
top_betas <- mlma %>%
filter(Subgroup_Code %in% c("cd", "cl", "cy", "dl", "dy", "ly")) %>%
filter(q < 0.10) %>%
group_by(Plot_Code) %>%
slice_max(order_by = b, n = 10) %>%
arrange(-b)
top_betas %>%
mutate(SNP = reorder(SNP, b)) %>%
group_by(Plot_Code, SNP) %>%
arrange(b) %>%
ungroup() %>%
mutate(SNP = factor(paste(SNP, Plot_Code, sep = "__"),
levels = rev(paste(SNP, Plot_Code, sep = "__")))) %>%
ggplot(., aes(x = SNP, y = b)) +
geom_errorbar(aes(ymin = b - se*1.96, ymax = b + se*1.96), color = "black", width = .5) +
geom_point(aes(color = Plot_Code), size = 2) +
facet_wrap(~Plot_Code, scales = "free_x") +
scale_x_discrete(labels = function(x) gsub("__.+$", "", x)) +
scale_y_continuous(limits = c(0,13)) +
scale_color_brewer(palette = "RdBu") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
axis.title = element_text(face = "bold"),
strip.background = element_rect(fill="grey70"),
strip.text = element_text(color = 'white', face = "bold", size = "12"),
legend.position = "none") +
geom_hline(yintercept = 0, size = 1) +
labs(y = "Allele Substitution Effect (beta)")
top_betas <- mlma %>%
filter(Subgroup_Code %in% c("dly", "cdly")) %>%
filter(q < 0.10) %>%
group_by(Plot_Code) %>%
slice_max(order_by = b, n = 10) %>%
arrange(-b)
top_betas$Plot_Code <- factor(top_betas$Plot_Code,
levels = c("D, L, and Y", "C, D, L, and Y"))
top_betas %>%
mutate(SNP = reorder(SNP, b)) %>%
group_by(Plot_Code, SNP) %>%
arrange(b) %>%
ungroup() %>%
mutate(SNP = factor(paste(SNP, Plot_Code, sep = "__"),
levels = rev(paste(SNP, Plot_Code, sep = "__")))) %>%
ggplot(., aes(x = SNP, y = b)) +
geom_errorbar(aes(ymin = b - se*1.96, ymax = b + se*1.96), color = "black", width = .5, size = .7) +
geom_point(aes(color = Plot_Code), size = 2.5) +
facet_wrap(~Plot_Code, scales = "free_x") +
scale_x_discrete(labels = function(x) gsub("__.+$", "", x)) +
scale_y_continuous(limits = c(0,5)) +
scale_color_manual(values = c("firebrick3", "royalblue4")) +
theme_light() +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
axis.title = element_text(face = "bold"),
strip.background = element_rect(fill="grey70"),
strip.text = element_text(color = 'white', face = "bold", size = "12"),
legend.position = "none") +
geom_hline(yintercept = 0, size = 1) +
labs(y = "Allele Substitution Effect (beta)")
5.5 QTL Enrichment
load("QTL_Enrich_Results.RData")
QTLenrich_plot(filter(qtl.enrich, Line == "All") %>%
top_n(15, -adj.pval), x = "QTL", pval = "adj.pval")
load("QTL_Enrich_Results.RData")
QTLenrich_plot(filter(qtl.enrich, Line == "1006") %>%
top_n(15, -adj.pval), x = "QTL", pval = "adj.pval")
load("QTL_Enrich_Results.RData")
QTLenrich_plot(filter(qtl.enrich, Line == "10") %>%
top_n(15, -adj.pval), x = "QTL", pval = "adj.pval")
load("QTL_Enrich_Results.RData")
QTLenrich_plot(filter(qtl.enrich, Line == "11") %>%
top_n(15, -adj.pval), x = "QTL", pval = "adj.pval")
5.6 Gene Enrichment
load("GeneAnnotation.RData")
gene_list <- gene.annotation %>%
filter(Breed == "All") %>%
select(gene_id)
gene_list <- gene_list$gene_id
gostres <- gost(query = gene_list,
organism = "sscrofa", ordered_query = FALSE,
multi_query = FALSE, significant = TRUE, exclude_iea = FALSE,
measure_underrepresentation = FALSE, evcodes = FALSE,
user_threshold = 0.05, correction_method = "fdr",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = NULL, as_short_link = FALSE)
gostplot(gostres, capped = FALSE, interactive = FALSE)
publish_gosttable(gostres, highlight_terms = gostres$result,
use_colors = TRUE,
show_columns = c("source", "term_name", "term_size", "intersection_size"),
filename = NULL)