tomsing1 · January 11, 2022 01:11
diff --git a/mapping_read_with_subread_or_sublong.R b/mapping_read_with_subread_or_sublong.R
 library(BSgenome.Hsapiens.UCSC.hg38)
 library(Biostrings)
 library(Rsubread)
 library(GenomicRanges)
 library(parallel)
 library(GenomicAlignments)

 kCores <- parallel::detectCores() - 1L
 kQuery <- GRanges(seqnames = "chr12", IRanges(40263807, 40264221))

 # read reference sequences into memory
 chromosomes <- standardChromosomes(BSgenome.Hsapiens.UCSC.hg38)
 seqs <- getSeq(x = BSgenome.Hsapiens.UCSC.hg38, 
               names = chromosomes)

 # write reference files to compressed FASTA file
 ref_file <- tempfile(fileext = ".fasta.gz")
 writeXStringSet(seqs, filepath = ref_file, compress = TRUE)

 # build subread / sublong index
 index_dir <- tempfile()
 Rsubread::buildindex(basename = index_dir, reference = ref_file,
                     gappedIndex = FALSE, indexSplit = FALSE)

 # write reads to disk (here we just pull a sequence from the genome)
 read <- getSeq(BSgenome.Hsapiens.UCSC.hg38, kQuery)
 names(read) <- "query"
 read_file <- tempfile(fileext = ".fastq")
 writeXStringSet(read, filepath = read_file, compress = FALSE, format = "fastq")

 # align read(s) to the index with align (subread/subjunc) or sublong (for long reads)
 alignment_file <- tempfile(fileext = ".bam")
 sublong(index_dir, read_file, alignment_file, nthreads = kCores)

 # read results into memory
 ga <- GenomicAlignments::readGAlignments(alignment_file)
 seqlevels(ga) <- seqlevels(genome)
 seqinfo(ga) <- seqinfo(genome)
 ga <- keepStandardChromosomes(ga)

 # the mapped sequence matches the query sequence
 getSeq(genome, GRanges(ga)) == read
	library(BSgenome.Hsapiens.UCSC.hg38)
	library(Biostrings)
	library(Rsubread)
	library(GenomicRanges)
	library(parallel)
	library(GenomicAlignments)

	kCores <- parallel::detectCores() - 1L
	kQuery <- GRanges(seqnames = "chr12", IRanges(40263807, 40264221))

	# read reference sequences into memory
	chromosomes <- standardChromosomes(BSgenome.Hsapiens.UCSC.hg38)
	seqs <- getSeq(x = BSgenome.Hsapiens.UCSC.hg38,
	names = chromosomes)

	# write reference files to compressed FASTA file
	ref_file <- tempfile(fileext = ".fasta.gz")
	writeXStringSet(seqs, filepath = ref_file, compress = TRUE)

	# build subread / sublong index
	index_dir <- tempfile()
	Rsubread::buildindex(basename = index_dir, reference = ref_file,
	gappedIndex = FALSE, indexSplit = FALSE)

	# write reads to disk (here we just pull a sequence from the genome)
	read <- getSeq(BSgenome.Hsapiens.UCSC.hg38, kQuery)
	names(read) <- "query"
	read_file <- tempfile(fileext = ".fastq")
	writeXStringSet(read, filepath = read_file, compress = FALSE, format = "fastq")

	# align read(s) to the index with align (subread/subjunc) or sublong (for long reads)
	alignment_file <- tempfile(fileext = ".bam")
	sublong(index_dir, read_file, alignment_file, nthreads = kCores)

	# read results into memory
	ga <- GenomicAlignments::readGAlignments(alignment_file)
	seqlevels(ga) <- seqlevels(genome)
	seqinfo(ga) <- seqinfo(genome)
	ga <- keepStandardChromosomes(ga)

	# the mapped sequence matches the query sequence
	getSeq(genome, GRanges(ga)) == read