Skip to content

Instantly share code, notes, and snippets.

@nh13
Created November 6, 2017 04:16
Show Gist options
  • Save nh13/2338f8d21b3a07b41c219063216d81ae to your computer and use it in GitHub Desktop.
Save nh13/2338f8d21b3a07b41c219063216d81ae to your computer and use it in GitHub Desktop.
package com.nilshomer.bfx.examples.utilities
import com.fulcrumgenomics.bam.api.SamSource
import com.fulcrumgenomics.cmdline.ClpGroups
import com.fulcrumgenomics.commons.CommonsDef.{PathToBam, SafelyClosable}
import com.fulcrumgenomics.commons.util.{LazyLogging, NumericCounter, SimpleCounter}
import com.fulcrumgenomics.sopt.{arg, clp}
import com.fulcrumgenomics.util.{Io, ProgressLogger}
import com.nilshomer.bfx.examples.cmdline.BfxExamplesTool
@clp(
description = "Collects statistics about the RG, NM, NH, and IH SAM tags. Assumes the RG SAM tag is present on all mapped records.",
group = ClpGroups.Utilities
)
class CountSamTags
( @arg(flag='i', doc="Input SAM or BAM file.") val input: PathToBam
) extends BfxExamplesTool with LazyLogging {
Io.assertReadable(input)
def execute(): Unit = {
val in = SamSource(this.input)
val rgCounter = new SimpleCounter[String]()
val nmCounter = new NumericCounter[Int]()
val nhCounter = new NumericCounter[Int]()
val ihCounter = new NumericCounter[Int]()
val progress = ProgressLogger(logger, unit=1e6.toInt)
in.foreach { rec =>
rgCounter.count(rec[String]("RG"))
rec.get[Int]("NM").foreach { nm => nmCounter.count(nm) }
rec.get[Int]("NH").foreach { nh => nhCounter.count(nh) }
rec.get[Int]("IH").foreach { ih => ihCounter.count(ih) }
progress.record(rec)
}
in.safelyClose()
println(s"RG: unique RGs in records: ${rgCounter.iterator.length} unique RGs in header:${in.header.getReadGroups.size}")
println(f"NM: count: ${nmCounter.total} mean: ${nmCounter.mean}%.5f median: ${nmCounter.median} stddev: ${nmCounter.stddev()}%.5f")
println(f"NH: count: ${nmCounter.total} mean: ${nhCounter.mean}%.5f median: ${nhCounter.median} stddev: ${nhCounter.stddev()}%.5f")
println(f"IH: count: ${nmCounter.total} mean: ${ihCounter.mean}%.5f median: ${ihCounter.median} stddev: ${ihCounter.stddev()}%.5f")
}
}
#java -Xmx2g -jar tools/target/scala-2.12/bfx-examples-tools.jar CountSamTags -i in.bam
...
RG: unique RGs in records: 4 unique RGs in header:4
NM: count: 9216420 mean: 0.70909 median: 0.0 stddev: 1.69551
NH: count: 9216420 mean: 0.00000 median: 0.0 stddev: 0.00000
IH: count: 9216420 mean: 0.00000 median: 0.0 stddev: 0.00000
[2017/11/05 21:13:47 | BfxExamplesMain | Info] CountSamTags completed. Elapsed time: 0.46 minutes.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment