Chapter 15 Assign Epigenetic Attributes to Loop Anchors

15.1 Description

Assign epigenetic and CBS attributes to loop anchors, based on: Epi: - Promoters: overlap with TSS +/- 2kb - Enhnacers: don’t contain promoters; overlap with H3K27ac peak - Others: non-promoters and non-enhancers CBS: - Conserved-CBS-anchor: overlap with conserved CBS while no lineage-specific CBS (1/0) - Specific-CBS-anchor: overlap with lineage-specific CBS (1/1 0/1)

15.2 Load data

cbs.st <- 'data/DiffBD/Mouse-multipleCells-stable-CBS.center400bp.bed'
cbs.sp <- 'data/DiffBD/Mouse-SplenicBcellGSM3027985vsAllotherWithoutSplenicBcelGSM2652790diffBD.minOverlap1.log2FC1FDR01.D210325upinSplenicBcellGSM3027985.sort.bed'
enh <- 'data/peakfiles/Enhancer-groupby-CBS/AllEnh/GSM2184223-mouse-spleen-activatedBcell-H3K27ac_peaks.narrowPeak.subtrac.UCSC.knowngene.TSS.up2kdown2k.bed'
tss <- '~/lustrelyt/ChenQ/ChIP/data/public/UCSC.genes.TSS.up2kdn2k.mm9.bed'
tf <- '~/lustrelyt/ChenQ/public/Mouse-otherTF/peakfiles/GSM537989-mouse-resting-splenicBcell-PU1_Q005_peaks.narrowPeak'

loop <- 'data/HiC/loops/GSE82144_Kieffer-Kwon-2017-activated_B_cells_72_hours_WT.hic_5k10kloops_merged_loops.loopanchor.bed'

15.3 Read regions

## Section:  get the location in GR format
##################################################
getGR <- function(f){
  mat <- fread(f, header = F)
  mat <- mat[,1:3]
  colnames(mat) <- c('chr', 'start', 'end')
  bed <- makeGRangesFromDataFrame(mat)
  return(bed)
}

tss.gr <- getGR(tss)
enh.gr <- getGR(enh)
tf.gr <- getGR(tf)
stable.gr <- getGR(cbs.st)
specific.gr <- getGR(cbs.sp)

tss.gr[1:3]
## GRanges object with 3 ranges and 0 metadata columns:
##       seqnames          ranges strand
##          <Rle>       <IRanges>  <Rle>
##   [1]     chr1 3193984-3197984      *
##   [2]     chr1 3202562-3206562      *
##   [3]     chr1 3636391-3640391      *
##   -------
##   seqinfo: 33 sequences from an unspecified genome; no seqlengths

15.4 Find Overlap

df <- fread(loop, header = F)
colnames(df) <- c('chr', 'start', 'end', 'id')
df$tf <- df$specific <- df$stable <- df$enh <- df$tss <- 0
df.gr <- makeGRangesFromDataFrame(df, keep.extra.columns = T)

## Section: assign attributes 0 means not overlap, 1 means overlap
##################################################
query.ls <- list(tss.gr, enh.gr, stable.gr, specific.gr, tf.gr)
for (i in 1:5) {
  comm <- findOverlaps(query.ls[[i]], df.gr )
  loci <- unique(subjectHits(comm))
  df[loci, (i+4)] <- 1
  
}
df[1:3,]
##      chr     start       end                        id tss enh stable specific
## 1: chr10  76300000  76305000   chr10:76300000-76305000   0   0      0        0
## 2: chr10  77740000  77745000   chr10:77740000-77745000   0   0      1        0
## 3: chr10 116895000 116900000 chr10:116895000-116900000   0   0      0        1
##    tf
## 1:  0
## 2:  0
## 3:  0

15.5 Assign Attributes

## Section: assign one label per anchor
##################################################
# promoter :1/0 1/1  enhancer:0/1
df$state <- "O"
df$state[which(df$tss == 1 )] <- "P"
df$state[which( df$tss ==0 & df$enh == 1)] <- "E"
# stable: 1/0  specific:1/1 0/1  non:0/0
df$CBS <- 'O'
df$CBS[which(df$stable == 1 & df$specific == 0)] <- 'Stable'
df$CBS[which( df$specific == 1)] <- 'Specific'

df[1:3,]
##      chr     start       end                        id tss enh stable specific
## 1: chr10  76300000  76305000   chr10:76300000-76305000   0   0      0        0
## 2: chr10  77740000  77745000   chr10:77740000-77745000   0   0      1        0
## 3: chr10 116895000 116900000 chr10:116895000-116900000   0   0      0        1
##    tf state      CBS
## 1:  0     O        O
## 2:  0     O   Stable
## 3:  0     O Specific

15.6 Save Data

write.table(df, file = 'data/HiC/loops/GSE82144_Kieffer-Kwon-2017-activated_B_cells_72_hours_WT.hic_5k10kloops.loopanchor.with.epiCBS.onlyGSM3027985diffBDFDR01.state.bed', row.names = F,
     col.names = T, quote = F, sep = '\t')