Skip to content

Instantly share code, notes, and snippets.

@brantfaircloth
Created February 13, 2015 22:49
Show Gist options
  • Save brantfaircloth/e51911187da80c127c63 to your computer and use it in GitHub Desktop.
Save brantfaircloth/e51911187da80c127c63 to your computer and use it in GitHub Desktop.
Extract most variable SNP from a "stack" of sequence (from stacks)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
(c) 2015 Brant Faircloth || http://faircloth-lab.org/
All rights reserved.
This code is distributed under a 3-clause BSD license. Please see
LICENSE.txt for more information.
Created on 13 February 2015 12:37 CST (-0600)
"""
import sys
import vcf
import numpy
import argparse
from collections import Counter
#import pdb
def get_args():
parser = argparse.ArgumentParser(description='Extract most variable SNP from a stack')
parser.add_argument(
'--input',
required=True,
help="The vcf file to process"
)
parser.add_argument(
'--output',
required=True,
help="The vcf file to write"
)
return parser.parse_args()
def main():
args = get_args()
count = 0
with open(args.input, 'r') as infile:
with open(args.output, 'w') as outfile:
vcf_reader = vcf.Reader(infile)
vcf_writer = vcf.Writer(outfile, vcf_reader)
results = []
previous = None
for pos, record in enumerate(vcf_reader):
#pdb.set_trace()
if results == [] or record.ID == previous:
results.append(record)
previous = record.ID
elif record.ID != previous:
# do some work on results
if len(results) > 1:
freq_diff = numpy.array([0.5 - (old_record.INFO['AF'][0] - old_record.INFO['AF'][1]) for old_record in results])
freq_diff = numpy.absolute(freq_diff)
min_freq = numpy.argmin(freq_diff)
if results[min_freq].INFO['AF'][0] <= 0.99:
vcf_writer.write_record(results[min_freq])
else:
if results[0].INFO['AF'][0] <= 0.99:
vcf_writer.write_record(results[0])
# create a new results and add record to it
results = []
results.append(record)
previous = record.ID
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment