Skip to content

Instantly share code, notes, and snippets.

@wdecoster
Created January 29, 2020 09:04
Show Gist options
  • Save wdecoster/6c6cfe30a1af4d67b5ac64dee9294fba to your computer and use it in GitHub Desktop.
Save wdecoster/6c6cfe30a1af4d67b5ac64dee9294fba to your computer and use it in GitHub Desktop.
from cyvcf2 import VCF
import pandas as pd
import plotly.graph_objects as go
import plotly
import sys
def main():
vcf = VCF(sys.argv[1])
filter_counts = pd.DataFrame.from_records([list(v.gt_types) + [v.FILTER] for v in vcf], columns = vcf.samples + ['filter']) \
.apply(lambda x: is_variant(x), axis=1, result_type='expand') \
.apply(pd.Series.value_counts) \
.rename(columns={i:j for i,j in zip(range(len(vcf.samples)), vcf.samples)})
trace = [go.Bar(name=i, x=filter_counts.columns, y=filter_counts.loc[i]) for i in filter_counts.index if not i == 0]
layout = dict(xaxis=dict(title='Number of variants'),
title="Number of variants per sample",
barmode='stack')
with open("Variants_per_sample.html", 'w') as output:
output.write(plot(trace, layout))
def is_variant(s):
"""
0,1,2,3==HOM_REF, HET, UNKNOWN, HOM_ALT
"""
filter_status = s.iloc[-1]
if filter_status is None:
filter_status = 'PASS'
else:
filter_status = filter_status.split(';')[0]
return [filter_status if call in [1,3] else 0 for call in s.iloc[:-1]]
def plot(trace, layout):
return plotly.offline.plot(dict(data=trace, layout=layout),
output_type="div",
show_link=False,
include_plotlyjs='cdn')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment