Created
January 11, 2022 01:11
-
-
Save tomsing1/ff6188f994821f3c322cca44fe4a9023 to your computer and use it in GitHub Desktop.
Mapping long read to a genome in R with the RSubread Bioconductor package
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(BSgenome.Hsapiens.UCSC.hg38) | |
library(Biostrings) | |
library(Rsubread) | |
library(GenomicRanges) | |
library(parallel) | |
library(GenomicAlignments) | |
kCores <- parallel::detectCores() - 1L | |
kQuery <- GRanges(seqnames = "chr12", IRanges(40263807, 40264221)) | |
# read reference sequences into memory | |
chromosomes <- standardChromosomes(BSgenome.Hsapiens.UCSC.hg38) | |
seqs <- getSeq(x = BSgenome.Hsapiens.UCSC.hg38, | |
names = chromosomes) | |
# write reference files to compressed FASTA file | |
ref_file <- tempfile(fileext = ".fasta.gz") | |
writeXStringSet(seqs, filepath = ref_file, compress = TRUE) | |
# build subread / sublong index | |
index_dir <- tempfile() | |
Rsubread::buildindex(basename = index_dir, reference = ref_file, | |
gappedIndex = FALSE, indexSplit = FALSE) | |
# write reads to disk (here we just pull a sequence from the genome) | |
read <- getSeq(BSgenome.Hsapiens.UCSC.hg38, kQuery) | |
names(read) <- "query" | |
read_file <- tempfile(fileext = ".fastq") | |
writeXStringSet(read, filepath = read_file, compress = FALSE, format = "fastq") | |
# align read(s) to the index with align (subread/subjunc) or sublong (for long reads) | |
alignment_file <- tempfile(fileext = ".bam") | |
sublong(index_dir, read_file, alignment_file, nthreads = kCores) | |
# read results into memory | |
ga <- GenomicAlignments::readGAlignments(alignment_file) | |
seqlevels(ga) <- seqlevels(genome) | |
seqinfo(ga) <- seqinfo(genome) | |
ga <- keepStandardChromosomes(ga) | |
# the mapped sequence matches the query sequence | |
getSeq(genome, GRanges(ga)) == read |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment