Chapter 13 HiC CBS Density around Boundary CurvePlot
13.1 Description
Plot the CBS density distribution around boundaries in three lineages; lineage-conserved and lineage-specific CBS sets.
13.2 Load CBS density data
## Load data
##################################################
file.ls <- list.files('data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries/', full.names = T)
file.ls## [1] "data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries//Mouse.stable.specific.CBS.peakDensity_ON_800kbAround_GSE93431_UNTR.50kb.cool.HDF5_diamond-insulation.800kb.1000kb.txt_filter-0.1.matrix"
## [2] "data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries//Mouse.stable.specific.CBS.peakDensity_ON_800kbAround_GSE98119_Vian-2018-activated_B_cells_24_hours_WT.hic.50kb_diamond-insulation.800kb.1000kb.txt_filter-0.1.matrix"
## [3] "data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries//Mouse.stable.specific.CBS.peakDensity_ON_800kbAround_WT_DM.allValidPairs.res50kb_diamond-insulation.800kb.1000kb.txt_filter-0.1.matrix"
13.3 Calculate density
## Section: read the CBS density distribution
##################################################
# absolute density
mat.ls <- lapply(file.ls, function(f){
df <- fread(f, skip = 1)
df <- df[, 7:ncol(df)]
df[is.na(df)] <- 0
den <- colMeans(df)
n <- ncol(df)
n.ls <- seq(0,n, length = 5)[-1]
mat <- data.frame(dist = seq(1,n/4), stable = den[1:n.ls[1]], hep = den[(n.ls[1]+1):n.ls[2]],
myo = den[(n.ls[2]+1):n.ls[3]], bcell = den[(n.ls[3]+1):n.ls[4]])
mat <- melt(mat, id.vars = 'dist')
mat$variable <- factor(mat$variable, levels = c('stable','hep','myo','bcell'))
mat$value <- as.numeric(as.character(mat$value))
return(mat)
})
# normalized density
mat.ls <- lapply(file.ls, function(f){
df <- fread(f, skip = 1)
df <- df[, 7:ncol(df)]
df[is.na(df)] <- 0
n <- ncol(df)
n.ls <- seq(0,n, length = 5)[-1]
stable <- df[,1:n.ls[1]]
hep <- df[, (n.ls[1]+1):n.ls[2]]
myo <- df[, (n.ls[2]+1):n.ls[3]]
bcell <- df[, (n.ls[3]+1):n.ls[4]]
stable <- colMeans(stable)/sum(stable)*1e4
hep <- colMeans(hep)/sum(hep)*1e04
myo <- colMeans(myo)/sum(myo)*1e04
bcell <- colMeans(bcell)/sum(bcell)*1e04
mat <- data.frame(dist = seq(1,n/4), stable = stable, hep = hep, myo = myo, bcell = bcell)
mat <- melt(mat, id.vars = 'dist')
mat$variable <- factor(mat$variable, levels = c('stable','hep','myo','bcell'))
mat$value <- as.numeric(as.character(mat$value))
return(mat)
})13.4 Plot
## Section: plot density
##################################################
names(mat.ls) <- c('hep', 'Bcell', 'myo')
mat.ls %>% imap( ~ {
fontsize = 8
linesize = 1
df <- .x
sample <- .y
ggplot(df, aes(x = dist, y = value)) +
geom_line(aes(color = variable, linetype = variable))+
theme_bw() +
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
strip.background = element_blank())+
theme(text = element_text(size = fontsize), line = element_line(size = linesize),
axis.ticks.length = unit(.1, "cm"), axis.ticks.x = element_blank()) +
ylab('normalized CTCF binding sites per 20kb')+
xlab(paste0(sample, '_boundaries'))+
ggtitle(label = sample)+ # labs
coord_cartesian(ylim = c(0,0.26))
})## $hep

##
## $Bcell

##
## $myo
