Chapter 10 diffBD genes expression
10.1 Description
check the expression levels for genes associated with lineage-specific CBS
10.2 Load expression mat
## Section: load expression table
##################################################
<- fread("~/lustrelyt/ChenQ/public/Human-Multitissue-CTCF/RNA-seq/Hep.Myo.Bcell_FPKM.table") mat
10.3 Load lineage-specific/conserved CBS
## Section: load peaks
##################################################
<-
file.ls list.files('data/DiffBD/', full.names = T, pattern = "bed")
<- list(
peaks stable = file.ls[5],
liver = file.ls[2],
muscle = file.ls[3],
bcell = file.ls[1]
)
10.4 Annotate genes for CBS
## Section: annotate genes by promoters
##################################################
= TxDb.Hsapiens.UCSC.hg19.knownGene
txdb <- peaks %>% map( ~ {
gene.ls <- readPeakFile(.x)
peak <-
gene seq2gene(
peak,tssRegion = c(-1000, 1000),
flankDistance = 0,
TxDb = txdb
)
})
10.5 Extract submat
## Section: subset fpkm table based on genes
##################################################
<- gene.ls %>% map( ~ {
submat.ls
<- match(.x, mat$entrez)
loci <- loci[which(!is.na(loci))]
loci
# subset fpkm table
<- mat[loci, 2:4]
submat
# add gene symbol
<- mat$symbol[loci]
gid rownames(submat) <- make.names(gid, unique = TRUE)
# log norm
<- log2(submat + 1)
submat #submat <- t(scale(t(submat)))
return(submat)
})
10.6 Boxplot to explore the expression levels across cells
<- function(y, upper_limit = quantile(df$value, 0.50) ) {
stat_box_data return( data.frame( y = upper_limit,
label = paste('count =', length(y), '\n','mean =', round(mean(y), 3), '\n')))
}<- list(c('hepatocyte', 'myocyte'), c('hepatocyte', 'Bcell'), c('myocyte', 'Bcell'))
my_comparisons
<- submat.ls %>% imap(~ {
gp.ls <- .y
sample <- .x
submat
<- melt(submat)
df colnames(df) <- c('type', 'value')
$type <- factor(df$type, levels = c('hepatocyte', 'myocyte', 'Bcell'))
df$value <- as.numeric(as.character(df$value))
df
= 10
fontsize = 1
linesize
<- ggboxplot(df, x = "type", y = "value",
g color = 'type', size = .3, font.label = list(size = fontsize), outlier.shape = NA)+
ggtitle(sample) +
ylab('log2 FPKM')+ xlab(paste0("genes n=", nrow(submat) ))+
stat_summary(fun.data = stat_box_data, geom = "text", hjust = 0.5,vjust = 0.9)+
theme(legend.position = "none", axis.ticks.x = element_blank())+
stat_compare_means(comparisons = my_comparisons, label.y = c(6,7,8),method = 't.test', size = 2)+
scale_color_manual(values = c("hepatocyte" = rgb(223, 97, 20, maxColorValue = 255),
"myocyte" = rgb(66, 117, 224, maxColorValue = 255),
"Bcell" = rgb(125, 0, 250, maxColorValue = 255)))
return(g)
})
do.call(cowplot::plot_grid, c(list(ncol = 4), gp.ls))
10.7 Save plots
pdf('results/Genes/N10-Hep.Myo.Bcell-specificCBS-AssociatedGenes-log2FPKM-fromENCODEData-boxplot.pdf',
width = 2.5*4, height = 3.5)
do.call(cowplot::plot_grid, c(list(ncol = 4), gp.ls))
dev.off()