Chapter 4 Seurat QC Cell-level Filtering
library(Seurat)
library(data.table)
library(tidyverse)
library(magrittr)
library(gridExtra)4.1 Description
Basic quality control for snRNA-seq: check the distribution of
number of UMIs per cell
- should above 500
number of genes detected per cell
number of genes detected per UMI
- check the complexity. outlier cells might be cells have less complex RNA species like red blood cells. expected higher than 0.8
mitochondrial ratio
- dead or dying cells will cause large amount of mitochondrial contamination
4.2 Load seurat object
combined <- get(load('data/Demo_CombinedSeurat_SCT_Preprocess.RData'))4.3 Add other meta info
- fraction of reads mapping to mitochondrial gene
# for macaque, not all genes start with MT is mitochondrion genes
mt.gene <- c("MTARC2","MTFR1L","MTERF1","MTFR2","MTRF1L","MTRES1",
"MTO1","MTCH1","MTFMT","MTFR1","MTERF3","MTERF2","MTPAP",
"MTERF4","MTCH2",'MTIF2',"MTG2","MTIF3","MTRF1","MTCL1")
combined[["percent.mt"]] <- PercentageFeatureSet(combined, features = mt.gene )- number of genes detected per UMI
combined$log10GenesPerUMI <- log10(combined$nFeature_RNA) / log10(combined$nCount_RNA)4.4 Violin plots to check
- get the meta data
df <- as.data.table(combined@meta.data)
sel <- c("orig.ident", "nCount_RNA", "nFeature_RNA", "percent.mt", "log10GenesPerUMI")
df <- df[, sel, with = FALSE]
df[1:3, ]## orig.ident nCount_RNA nFeature_RNA percent.mt log10GenesPerUMI
## 1: SeuratProject 2740 1705 0.10795250 0.9400695
## 2: SeuratProject 3140 1687 0.09593860 0.9228424
## 3: SeuratProject 2539 1456 0.03738318 0.9290675
- define plotting function
fontsize <- 10
linesize <- 0.35
gp.ls <- df[, 2:5] %>% imap( ~ {
# define lable fun
give.n <- function(x) {
return(c(y = median(x) + max(x) / 10, label = round(median(x), 2)))
}
# assign colors
col.ls <-
setNames(
c('lightpink2', 'lightblue2', 'lightgreen', 'coral1'),
c("nCount_RNA", "nFeature_RNA", "percent.mt", "log10GenesPerUMI")
)
ggplot(data = df, aes(x = orig.ident, y = .x)) +
geom_violin(trim = FALSE, fill = col.ls[.y]) +
ggtitle(label = .y) + ylab(label = .y) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
panel.border = element_blank()
) +
theme(
axis.text = element_text(size = fontsize),
axis.line = element_line(colour = "black", size = linesize),
axis.ticks = element_line(size = linesize),
axis.title.x = element_blank(),
axis.ticks.length = unit(.05, "cm"),
plot.title = element_text(size = fontsize + 2, hjust = 0.5),
legend.position = 'none'
) +
stat_summary(fun = median, geom = "point", col = "black") + # Add points to plot
stat_summary(fun.data = give.n,
geom = "text",
col = "black")
})
grid.arrange(gp.ls[[1]], gp.ls[[2]], gp.ls[[3]], gp.ls[[4]], ncol = 2)