Skip to content

Instantly share code, notes, and snippets.

@brentp
Last active November 30, 2018 21:32
Show Gist options
  • Save brentp/ac0169cce003ddc8d7b4b50940dc3f42 to your computer and use it in GitHub Desktop.
Save brentp/ac0169cce003ddc8d7b4b50940dc3f42 to your computer and use it in GitHub Desktop.
import os
import stats
import hts
import plotly
import strformat
import bpbiopkg/pedfile
#[
look at allele balance on mitochondrial chromosome.
this only plots sites for each sample that have an allele balance (alt / ref + alt) between 0.01 and 0.99.
In most cases, the depth on the MT chrom is more than 10X (and often 100X) of the autosome so we can get
a reliable count.
]#
var vcf:VCF
if not open(vcf, commandLineParams()[0]):
quit "couldn't open VCF"
var samples = parse_ped(commandLineParams()[1])
samples = samples.match(vcf)
var kids = newSeq[Sample]()
for s in samples:
if s.dad != nil and s.mom != nil:
kids.add(s)
# initialize the traces used for plotting.
var traces = newSeqOfCap[Trace[float64]](kids.len)
for kid in kids:
var t = Trace[float64](mode:PlotMode.Markers, `type`: PlotType.Scatter)
t.marker = Marker[float64](size: @[6'f64])
t.name = kid.id
traces.add(t)
#[
var traces = newSeqOfCap[Trace[float64]](samples.len)
for sample in samples:
var t = Trace[float64](mode:PlotMode.Markers, `type`: PlotType.Scatter)
t.marker = Marker[float64](size: @[6'f64])
t.name = sample.id
traces.add(t)
]#
proc allele_balance(r:int32, a: int32): float64 {.inline.} =
return a.float64 / float64(r + a)
# ad gets filled with the AD (allele depth) values for each sample.
var ad = newSeq[int32]()
for v in vcf.query("MT"):
if v.start < 200 or v.start > 16000: continue
if v.format.get("AD", ad) != Status.OK:
quit "no AD"
# limit to bi-allelic sites.
if ad.len != v.n_samples * 2:
continue
for i, kid in kids:
# skip unknown (./.)
var ok = true
for c in @[ad[2*kid.i], ad[2*kid.i+1],
ad[2*kid.mom.i],ad[2*kid.mom.i+1],
ad[2*kid.dad.i],ad[2*kid.dad.i+1]]:
if c < 0:
ok = false
break
if not ok:
continue
var (kr, ka) = (ad[2*kid.i], ad[2*kid.i+1])
var (mr, ma) = (ad[2*kid.mom.i], ad[2*kid.mom.i+1])
var (dr, da) = (ad[2*kid.dad.i], ad[2*kid.dad.i+1])
var ab = allele_balance(kr, ka)
var mab = allele_balance(mr, ma)
if (ab < 0.01 or ab > 0.99) and (mab < 0.01 or mab > 0.99): continue
traces[i].xs.add(allele_balance(kr, ka))
traces[i].ys.add(allele_balance(mr, ma))
#traces[i].text.add(&"position: {v.start}, {kid.id}:{kr},{ka}, mom:{mr},{ma}, dad:{dr},{da}")
#echo traces[i].xs[traces[i].xs.high], traces[i].ys[traces[i].xs.high]
#[
traces[kid.i].xs.add(v.start.float64)
traces[kid.i].ys.add(allele_balance(kr, ka))
traces[kid.mom.i].xs.add(v.start.float64)
traces[kid.mom.i].ys.add(allele_balance(mr, ma))
traces[kid.dad.i].xs.add(v.start.float64)
traces[kid.dad.i].ys.add(allele_balance(dr, da))
]#
var delete = newSeq[int]()
for i, t in traces.mpairs:
if t.xs.len == 0: continue
t.text = @["sample:" & kids[i].id & " sites:" & $t.xs.len]
t.marker.size = @[t.xs.len.float64 / 1.2'f64]
var s = RunningStat()
for x in t.xs:
s.push(if x > 0.5: 1 - x else: x)
t.xs = @[s.mean.float64]
t.xs_err = newErrorBar[float64](s.standardDeviation.float64)
s = RunningStat()
for x in t.ys:
s.push(if x > 0.5: 1 - x else: x)
t.ys = @[s.mean.float64]
t.ys_err = newErrorBar[float64](s.standardDeviation.float64)
if t.xs[0] < 0.025 and t.ys[0] < 0.025:
delete.add(i)
var nTraces = newSeqOfCap[Trace[float64]](traces.len)
for i, t in traces:
if i in delete: continue
nTraces.add(t)
traces = nTraces
let
layout = Layout(title: "MT allele balance", width: 600, height: 600,
xaxis: Axis(title:"kid AB +/ std"),
yaxis: Axis(title: "mom AB +/ std"),
autosize: false)
p = Plot[float64](layout: layout, traces: traces)
discard p.save("/tmp/x.html")
#p.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment