Chapter 13 HiC CBS Density around Boundary CurvePlot
13.1 Description
Plot the CBS density distribution around boundaries in three lineages; lineage-conserved and lineage-specific CBS sets.
13.2 Load CBS density data
## Load data
##################################################
<- list.files('data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries/', full.names = T)
file.ls file.ls
## [1] "data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries//Mouse.stable.specific.CBS.peakDensity_ON_800kbAround_GSE93431_UNTR.50kb.cool.HDF5_diamond-insulation.800kb.1000kb.txt_filter-0.1.matrix"
## [2] "data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries//Mouse.stable.specific.CBS.peakDensity_ON_800kbAround_GSE98119_Vian-2018-activated_B_cells_24_hours_WT.hic.50kb_diamond-insulation.800kb.1000kb.txt_filter-0.1.matrix"
## [3] "data/countmat/StableSpecificCBS_peakdensity_on_diamondBoundaries//Mouse.stable.specific.CBS.peakDensity_ON_800kbAround_WT_DM.allValidPairs.res50kb_diamond-insulation.800kb.1000kb.txt_filter-0.1.matrix"
13.3 Calculate density
## Section: read the CBS density distribution
##################################################
# absolute density
<- lapply(file.ls, function(f){
mat.ls <- fread(f, skip = 1)
df <- df[, 7:ncol(df)]
df is.na(df)] <- 0
df[<- colMeans(df)
den
<- ncol(df)
n <- seq(0,n, length = 5)[-1]
n.ls <- data.frame(dist = seq(1,n/4), stable = den[1:n.ls[1]], hep = den[(n.ls[1]+1):n.ls[2]],
mat myo = den[(n.ls[2]+1):n.ls[3]], bcell = den[(n.ls[3]+1):n.ls[4]])
<- melt(mat, id.vars = 'dist')
mat
$variable <- factor(mat$variable, levels = c('stable','hep','myo','bcell'))
mat$value <- as.numeric(as.character(mat$value))
mat
return(mat)
})# normalized density
<- lapply(file.ls, function(f){
mat.ls <- fread(f, skip = 1)
df <- df[, 7:ncol(df)]
df is.na(df)] <- 0
df[
<- ncol(df)
n <- seq(0,n, length = 5)[-1]
n.ls
<- df[,1:n.ls[1]]
stable <- df[, (n.ls[1]+1):n.ls[2]]
hep <- df[, (n.ls[2]+1):n.ls[3]]
myo <- df[, (n.ls[3]+1):n.ls[4]]
bcell
<- colMeans(stable)/sum(stable)*1e4
stable <- colMeans(hep)/sum(hep)*1e04
hep <- colMeans(myo)/sum(myo)*1e04
myo <- colMeans(bcell)/sum(bcell)*1e04
bcell
<- data.frame(dist = seq(1,n/4), stable = stable, hep = hep, myo = myo, bcell = bcell)
mat <- melt(mat, id.vars = 'dist')
mat
$variable <- factor(mat$variable, levels = c('stable','hep','myo','bcell'))
mat$value <- as.numeric(as.character(mat$value))
mat
return(mat)
})
13.4 Plot
## Section: plot density
##################################################
names(mat.ls) <- c('hep', 'Bcell', 'myo')
%>% imap( ~ {
mat.ls = 8
fontsize = 1
linesize
<- .x
df <- .y
sample
ggplot(df, aes(x = dist, y = value)) +
geom_line(aes(color = variable, linetype = variable))+
theme_bw() +
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
strip.background = element_blank())+
theme(text = element_text(size = fontsize), line = element_line(size = linesize),
axis.ticks.length = unit(.1, "cm"), axis.ticks.x = element_blank()) +
ylab('normalized CTCF binding sites per 20kb')+
xlab(paste0(sample, '_boundaries'))+
ggtitle(label = sample)+ # labs
coord_cartesian(ylim = c(0,0.26))
})
## $hep
##
## $Bcell
##
## $myo