Last active
July 8, 2018 21:52
-
-
Save wdecoster/26c7de0e9b0dd7c2f2fed321f50bd182 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from cyvcf2 import VCF | |
| from argparse import ArgumentParser | |
| import matplotlib.pyplot as plt | |
| import sys | |
| def main(): | |
| args = get_args() | |
| sample_dict = get_sample_dict(args.samples) | |
| vcf = VCF(args.vcf) | |
| test_vcf(vcf, sample_dict) | |
| count_dict = get_counts(vcf, sample_dict) | |
| make_barchart(count_dict) | |
| def get_args(): | |
| parser = ArgumentParser(description="make hist type plot for number of mutations per type") | |
| parser.add_argument( | |
| "--samples", | |
| help="file matching samples to types. no headers, two columns, no funny spaces anywhere", | |
| required=True) | |
| parser.add_argument( | |
| "--vcf", | |
| help="vcf containing same samples as --samples", | |
| required=True) | |
| return parser.parse_args() | |
| def test_vcf(vcf, sample_dict): | |
| for sample in vcf.samples: | |
| if sample not in sample_dict.keys(): | |
| sys.exit("Sample {} from vcf not found in file supplied with --samples".format(sample)) | |
| def get_counts(vcf, sample_dict): | |
| index_dict = {i: sample_dict[sample] for i, sample in enumerate(vcf.samples)} | |
| count_dict = {sample_type: 0 for sample_type in set(sample_dict.values())} | |
| for variant in vcf: | |
| for index, call in enumerate(variant.gt_types): | |
| if is_variant(call): | |
| count_dict[index_dict[index]] += 1 | |
| return count_dict | |
| def get_sample_dict(samples_file): | |
| return {line.split()[0]: line.strip().split()[1] for line in open(samples_file) if line} | |
| def is_variant(call): | |
| """Check if a variant position qualifies as a variant""" | |
| if call == 1 or call == 3: | |
| return True | |
| else: | |
| return False | |
| def make_barchart(count_dict): | |
| categories = range(len(count_dict.keys())) | |
| plt.bar(categories, list(count_dict.values()), align="center") | |
| plt.xticks(categories, list(count_dict.keys())) | |
| plt.show() | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment