Chapter 17 Compare Loop Length Distribution
17.1 Description
Comapre loop length distributions for conserved-CBS-loops and specific-CBS-loops
17.2 Load loops
## Section: load data
##################################################
rm(list = ls())
<- list.files('data/HiC/loops/', pattern = "ANDS", full.names = T)
file.ls
<- function(f){
getLen <- fread(f)
loop <- abs(loop$y2 - loop$x2)
len <- log10(len)
len return(len)
}
<- lapply(file.ls, getLen)
len.ls
= len.ls[[2]]
st = len.ls[[1]] de
17.3 Culmulative plot
## Section: plots
##################################################
par(mar = rep(4,4),lwd=0.5)
plot(ecdf(st), verticals=TRUE, do.points=FALSE, col= 'azure3', xlim=c(4.3, 8.2),
main="", xlab=expression(log[10]* '(bp distance)'),
col.01line = NULL, cex.lab=2, ylab=expression(F[n]*'(x)')) #St-St St-Non
plot(ecdf(de), verticals=TRUE, col = 'green3', do.points=FALSE, add=TRUE, col.01line = NULL) #De-De De-Non
legend("bottomright", c('St-St/Non', 'De-De/Non'),
col=c('azure3', 'green3'),
lty=rep(1,2), bty="n")
17.4 Box plot
## Section: boxplots
##################################################
## Section: generate the dataframe
##################################################
<- data.frame(cbind('type' = c(rep('stable',length(st)), rep('specific', length(de))),
df 'value' = c(st, de)))
$type <- factor(df$type, levels = c('stable', 'specific'))
df$value <- as.numeric(as.character(df$value))
df
## Section: seting plot parameters
##################################################
<- list(c('stable','specific'))
my_comparisons <- function(y, upper_limit = quantile(df$value, 0.50) ) {
stat_box_data return( data.frame( y = upper_limit,
label = paste('count =', length(y), '\n','median =', 10^median(y), '\n')))
}
= 4
fontsize = 1
linesize
ggboxplot(df, x = "type", y = "value",
color = 'type', size = .3, font.label = list(size = fontsize), outlier.shape = NA)+
stat_summary(fun.data = stat_box_data, geom = "text", hjust = 0.5,vjust = 0.9)+
theme(legend.position = "none", axis.ticks.x = element_blank())+
stat_compare_means(comparisons = my_comparisons, label.y = c(6),method = 't.test', size = 2) +
ylab('log10 loop size')+ xlab('loop types by CBS')+ ggtitle(label = "wtdm") +# labs
coord_cartesian(ylim = c(4.5,7))