Chapter 15 Assign Epigenetic Attributes to Loop Anchors
15.1 Description
Assign epigenetic and CBS attributes to loop anchors, based on: Epi: - Promoters: overlap with TSS +/- 2kb - Enhnacers: don’t contain promoters; overlap with H3K27ac peak - Others: non-promoters and non-enhancers CBS: - Conserved-CBS-anchor: overlap with conserved CBS while no lineage-specific CBS (1/0) - Specific-CBS-anchor: overlap with lineage-specific CBS (1/1 0/1)
15.2 Load data
<- 'data/DiffBD/Mouse-multipleCells-stable-CBS.center400bp.bed'
cbs.st <- 'data/DiffBD/Mouse-SplenicBcellGSM3027985vsAllotherWithoutSplenicBcelGSM2652790diffBD.minOverlap1.log2FC1FDR01.D210325upinSplenicBcellGSM3027985.sort.bed'
cbs.sp <- 'data/peakfiles/Enhancer-groupby-CBS/AllEnh/GSM2184223-mouse-spleen-activatedBcell-H3K27ac_peaks.narrowPeak.subtrac.UCSC.knowngene.TSS.up2kdown2k.bed'
enh <- '~/lustrelyt/ChenQ/ChIP/data/public/UCSC.genes.TSS.up2kdn2k.mm9.bed'
tss <- '~/lustrelyt/ChenQ/public/Mouse-otherTF/peakfiles/GSM537989-mouse-resting-splenicBcell-PU1_Q005_peaks.narrowPeak'
tf
<- 'data/HiC/loops/GSE82144_Kieffer-Kwon-2017-activated_B_cells_72_hours_WT.hic_5k10kloops_merged_loops.loopanchor.bed' loop
15.3 Read regions
## Section: get the location in GR format
##################################################
<- function(f){
getGR <- fread(f, header = F)
mat <- mat[,1:3]
mat colnames(mat) <- c('chr', 'start', 'end')
<- makeGRangesFromDataFrame(mat)
bed return(bed)
}
<- getGR(tss)
tss.gr <- getGR(enh)
enh.gr <- getGR(tf)
tf.gr <- getGR(cbs.st)
stable.gr <- getGR(cbs.sp)
specific.gr
1:3] tss.gr[
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 3193984-3197984 *
## [2] chr1 3202562-3206562 *
## [3] chr1 3636391-3640391 *
## -------
## seqinfo: 33 sequences from an unspecified genome; no seqlengths
15.4 Find Overlap
<- fread(loop, header = F)
df colnames(df) <- c('chr', 'start', 'end', 'id')
$tf <- df$specific <- df$stable <- df$enh <- df$tss <- 0
df<- makeGRangesFromDataFrame(df, keep.extra.columns = T)
df.gr
## Section: assign attributes 0 means not overlap, 1 means overlap
##################################################
<- list(tss.gr, enh.gr, stable.gr, specific.gr, tf.gr)
query.ls for (i in 1:5) {
<- findOverlaps(query.ls[[i]], df.gr )
comm <- unique(subjectHits(comm))
loci +4)] <- 1
df[loci, (i
}1:3,] df[
## chr start end id tss enh stable specific
## 1: chr10 76300000 76305000 chr10:76300000-76305000 0 0 0 0
## 2: chr10 77740000 77745000 chr10:77740000-77745000 0 0 1 0
## 3: chr10 116895000 116900000 chr10:116895000-116900000 0 0 0 1
## tf
## 1: 0
## 2: 0
## 3: 0
15.5 Assign Attributes
## Section: assign one label per anchor
##################################################
# promoter :1/0 1/1 enhancer:0/1
$state <- "O"
df$state[which(df$tss == 1 )] <- "P"
df$state[which( df$tss ==0 & df$enh == 1)] <- "E"
df# stable: 1/0 specific:1/1 0/1 non:0/0
$CBS <- 'O'
df$CBS[which(df$stable == 1 & df$specific == 0)] <- 'Stable'
df$CBS[which( df$specific == 1)] <- 'Specific'
df
1:3,] df[
## chr start end id tss enh stable specific
## 1: chr10 76300000 76305000 chr10:76300000-76305000 0 0 0 0
## 2: chr10 77740000 77745000 chr10:77740000-77745000 0 0 1 0
## 3: chr10 116895000 116900000 chr10:116895000-116900000 0 0 0 1
## tf state CBS
## 1: 0 O O
## 2: 0 O Stable
## 3: 0 O Specific
15.6 Save Data
write.table(df, file = 'data/HiC/loops/GSE82144_Kieffer-Kwon-2017-activated_B_cells_72_hours_WT.hic_5k10kloops.loopanchor.with.epiCBS.onlyGSM3027985diffBDFDR01.state.bed', row.names = F,
col.names = T, quote = F, sep = '\t')