Visualise hit guides
A volcano plot is a scatter plot that visualizes the differential abundance of guides. The fold change is typically displayed on the x-axis, while the y-axis represents the corresponding p-values. The significantly differentially abundant genes are the ones found in the upper-left and upper-right corners. Below we visualise the hits for ToxA vs Control and ToxB vs Control samples, respectively.
Volcano plot (ToxA vs Control)
# remove rows that contain NA values
de <- lrt2.toxa[complete.cases(lrt2.toxa), ]
colnames(de)[colnames(de) == "logFC"] <- "log2FoldChange"
colnames(de)[colnames(de) == "PValue"] <- "pvalue"
colnames(de)[colnames(de) == "Symbol"] <- "gene_symbol"
colnames(de)[colnames(de) == "FDR"] <- "adjusted_pvalue"
de$diffabundant <- "Pass p-value cuttoff"
de$diffabundant[de$adjusted_pvalue > 0.05] <- "Not Significant"
de$diffabundant[de$log2FoldChange < -5 & de$log2FoldChange >= 5 & de$adjusted_pvalue <
0.05] <- "Pass p-value & Log2FC cuttoff"
de$diffabundant[de$log2FoldChange >= 5 & de$adjusted_pvalue <= 0.05] <- "Up"
de$diffabundant[de$log2FoldChange < -5 & de$adjusted_pvalue <= 0.05] <- "Down"
head(de)
gene_symbol UID seq log2FoldChange
HGLibB_38324 PRDM13 HGLibB_38324 GCAAGTACCTGTCAGACCGC 11.209176
HGLibA_32966 NUDT2 HGLibA_32966 ATGAGCACCAAGCCTACCGC 11.594388
HGLibB_42762 SBSPON HGLibB_42762 GACAAGCTACGTCTCCACAC 12.255728
HGLibB_44908 SLC35B2 HGLibB_44908 TCCGCCTGAAGTACTGCACC 10.286574
HGLibB_45454 SLIT2 HGLibB_45454 TCTCTAGTTCTTTAAGATCC 11.404868
HGLibB_23505 IQGAP3 HGLibB_23505 CACTCACAGGCTGCCCAGCC 9.781491
logCPM LR pvalue adjusted_pvalue diffabundant
HGLibB_38324 10.829302 89.05910 3.831913e-21 2.143074e-16 Up
HGLibA_32966 8.780164 73.01061 1.289546e-17 3.195504e-13 Up
HGLibB_42762 9.530267 72.44893 1.714112e-17 3.195504e-13 Up
HGLibB_44908 11.311403 69.70313 6.893661e-17 9.638544e-13 Up
HGLibB_45454 8.763229 68.20288 1.475103e-16 1.649962e-12 Up
HGLibB_23505 9.434533 66.97834 2.745064e-16 2.491273e-12 Up
g_down <- which(de$log2FoldChange > 5 & de$adjusted_pvalue <= 0.05)
g_up <- which(de$log2FoldChange < -5 & de$adjusted_pvalue <= 0.05)
de$delabel <- NA
de$delabel[c(g_down[1:20], g_up[1:20])] <- de$gene_symbol[c(g_down[1:20], g_up[1:20])]
ggplot(data = de, aes(x = log2FoldChange, y = -log10(adjusted_pvalue), col = diffabundant,
label = delabel)) + geom_point() + theme_minimal() + geom_text_repel(max.overlaps = Inf) +
scale_color_manual(values = c("blue", "black", "gray", "red")) + geom_vline(xintercept = c(-5,
5), col = "darkred") + geom_hline(yintercept = -log10(0.05), col = "darkred")
Volcano plot (ToxB vs Control)
# remove rows that contain NA values
de <- lrt2.toxb[complete.cases(lrt2.toxb), ]
colnames(de)[colnames(de) == "logFC"] <- "log2FoldChange"
colnames(de)[colnames(de) == "PValue"] <- "pvalue"
colnames(de)[colnames(de) == "Symbol"] <- "gene_symbol"
colnames(de)[colnames(de) == "FDR"] <- "adjusted_pvalue"
de$diffabundant <- "Pass p-value cuttoff"
de$diffabundant[de$adjusted_pvalue > 0.05] <- "Not Significant"
de$diffabundant[de$log2FoldChange < -5 & de$log2FoldChange >= 5 & de$adjusted_pvalue <
0.05] <- "Pass p-value & Log2FC cuttoff"
de$diffabundant[de$log2FoldChange >= 5 & de$adjusted_pvalue <= 0.05] <- "Up"
de$diffabundant[de$log2FoldChange < -5 & de$adjusted_pvalue <= 0.05] <- "Down"
head(de)
gene_symbol UID seq log2FoldChange
HGLibA_02011 ANKRD16 HGLibA_02011 CGACGTCGATGTGCCCACAC 10.154406
HGLibA_27301 LRRC29 HGLibA_27301 CTGTGTCCAGTCCCGCTTCG 7.300759
HGLibB_00250 ABHD14B HGLibB_00250 TTAAGTACCTGGCAGGTCAA 7.407535
HGLibA_42214 RPUSD4 HGLibA_42214 GACCCGAGGAATTCTCCACC 10.519154
HGLibA_39135 PSMC3IP HGLibA_39135 AATCGTGGCCCTCACTGCTA 9.567023
HGLibB_42757 SBNO2 HGLibB_42757 AGGGTATGGCGGCAGCGAGA 9.895113
logCPM LR pvalue adjusted_pvalue diffabundant
HGLibA_02011 11.875792 94.27706 2.743108e-22 1.534138e-17 Up
HGLibA_27301 11.784015 81.55213 1.706991e-19 4.773343e-15 Up
HGLibB_00250 12.149716 78.52597 7.895333e-19 1.471874e-14 Up
HGLibA_42214 10.140291 75.48743 3.677413e-18 5.141667e-14 Up
HGLibA_39135 11.552013 68.32588 1.385901e-16 1.550186e-12 Up
HGLibB_42757 9.521769 67.73967 1.865714e-16 1.739064e-12 Up
g_down <- which(de$log2FoldChange > 5 & de$adjusted_pvalue <= 0.05)
g_up <- which(de$log2FoldChange < -5 & de$adjusted_pvalue <= 0.05)
de$delabel <- NA
de$delabel[c(g_down[1:20], g_up[1:20])] <- de$gene_symbol[c(g_down[1:20], g_up[1:20])]
ggplot(data = de, aes(x = log2FoldChange, y = -log10(adjusted_pvalue), col = diffabundant,
label = delabel)) + geom_point() + theme_minimal() + geom_text_repel(max.overlaps = Inf) +
scale_color_manual(values = c("blue", "black", "gray", "red")) + geom_vline(xintercept = c(-5,
5), col = "darkred") + geom_hline(yintercept = -log10(0.05), col = "darkred")