Chapter 19 Create ArrowFiles and ArrowProject

19.1 Description

  • This is a demo data, randomly selected 3000 cells of fetal macaque cortex
  • Create arrow files from fragment files generated by 10X Cellranger ATAC
  • Create arrow project merging arrow files for four individuals

19.2 Load ArchRGenome and set threads

## Section: load annotation data
##################################################
load("data/AchrR/Macaca_mulatta_genomeAnnotation_geneAnnotationSubset.RData")


## Section: set default para
##################################################
addArchRThreads(threads = 20) # setting default number of parallel threads

19.3 Create arrow files

## Section: creating arrow files
##################################################
inputFiles <- list.files('cellranger-res/', pattern = 'gz$', full.names = T)
inputFiles <- inputFiles[c(1:3,6)]

ArrowFiles <- createArrowFiles(
  inputFiles = inputFiles,
  sampleNames = c("68A", "68B", "84B", "84C"),
  minTSS = 4, #Dont set this too high because you can always increase later
  minFrags = 1000, 
  addTileMat = TRUE,
  addGeneScoreMat = TRUE,  
  geneAnnotation = geneAnnotationSubset, 
  genomeAnnotation = genomeAnnotation
  
)

19.4 Add doublet scores

## Section: infer doublet
##################################################
doubScores <- addDoubletScores(
  input = ArrowFiles,
  k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
  knnMethod = "UMAP", #Refers to the embedding to use for nearest neighbor search.
  LSIMethod = 1
)

19.5 Create arrow project

## Section: create ArchRProject and filter doublets
##################################################
proj <- ArchRProject(
  ArrowFiles = ArrowFiles, 
  outputDirectory = "ArrowProject/Merged",
  genomeAnnotation = genomeAnnotation,
  geneAnnotation = geneAnnotationSubset,
  copyArrows = TRUE #This is recommened so that you maintain an unaltered copy for later usage.
)

19.6 Remove doublets

proj <- filterDoublets(proj)

19.7 Save arrow project

saveArchRProject(ArchRProj = proj)