Skip to content

Instantly share code, notes, and snippets.

@brentp
Last active September 29, 2015 14:38
Show Gist options
  • Save brentp/5b111b8f1fb1562e35f6 to your computer and use it in GitHub Desktop.
Save brentp/5b111b8f1fb1562e35f6 to your computer and use it in GitHub Desktop.
expand <DEL>, etc into the sequence field so we can do graph stuff.
from __future__ import print_function
from pyfaidx import Fasta
from cyvcf2 import VCF
import string
import re
import sys
try: # 2.x
maketrans = string.maketrans
except AttributeError: # 3.x
maketrans = str.maketrans
_complement = maketrans('ATCGatcgNnXx', 'TAGCtagcNnXx')
if sys.version_info[0] < 3:
_complement = _complement.decode('latin-1')
complement = lambda s: s.translate(_complement)
def inseq(fa, vcf):
print(vcf.raw_header, end='')
patt = re.compile("^[ACTG]+$")
last_chrom, last_o = None, None
for v in vcf:
if patt.match(v.REF) and any(patt.match(a) for a in v.ALT):
print(str(v))
else:
alt = v.ALT
assert len(alt) == 1, alt
alt = alt[0]
end = v.INFO.get("END")
start = v.POS - 1
toks = str(v).rstrip("\r\n").split("\t")
if alt.startswith(("<DEL", "<DUP", "<INV")):
toks[4] = toks[3]
toks[3] = seq
if v.CHROM != last_chrom:
last_chrom = v.CHROM
last_o = fa[v.CHROM]
seq = str(last_o[start: end])
if alt.startswith("<INV"):
toks[3] = complement(toks[3])[::-1]
print("\t".join(toks))
def main(fa, vcf):
fa = Fasta(fa, read_ahead=100000)
vcf = VCF(vcf)
inseq(fa, vcf)
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("fasta", help="samtools faidx indexed fasta")
p.add_argument("vcf")
a = p.parse_args()
main(a.fasta, a.vcf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment