Skip to content

Instantly share code, notes, and snippets.

@nh13
Created April 17, 2020 03:21
Show Gist options
  • Save nh13/35fff91660b05b3acbf74b32f092d051 to your computer and use it in GitHub Desktop.
Save nh13/35fff91660b05b3acbf74b32f092d051 to your computer and use it in GitHub Desktop.
Add a read group (and sample) per read in a SAM/BAM
import $ivy.`com.fulcrumgenomics::fgbio:1.1.0`
import com.fulcrumgenomics.FgBioDef._
import java.nio.file.{Path, Paths}
import com.fulcrumgenomics.commons.util.{LazyLogging, Logger}
import com.fulcrumgenomics.util.ProgressLogger
import com.fulcrumgenomics.bam.api._
import htsjdk.samtools.SAMReadGroupRecord
// So that a [[Path]] can be built from a [[String]] on the command line
implicit val pathRead: scopt.Read[Path] = scopt.Read.reads(path => Paths.get(path))
// So we can have a logger.
private object AddRgPerRead extends LazyLogging {
def toLogger: Logger = this.logger
}
@doc("Tags each read with its own read group")
@main
def main(in: PathToBam @doc("The input SAM/BAM file."),
out: PathToBam@doc("The output SAM/BAM file.")): Unit = {
val logger = AddRgPerRead.toLogger
val progress = ProgressLogger(logger, verb="Read", unit=500000)
val source = SamSource(in)
val header = source.header.clone()
val records = source.toSeq // NB: loads all the reads into memory!
// update the header
val readGroups = records.map { rec =>
val readGroup = new SAMReadGroupRecord(rec.name)
readGroup.setSample(rec.name)
readGroup
}
header.setReadGroups(readGroups.iterator.toJavaList)
// write the records
val writer = SamWriter(out, header)
records.foreach { rec =>
progress.record(rec)
rec("RG") = rec.name
writer += rec
}
progress.logLast()
// clean up
writer.close()
source.safelyClose()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment