Skip to content

Instantly share code, notes, and snippets.

@clintval
Created July 1, 2019 20:45
Show Gist options
  • Select an option

  • Save clintval/6d1bb6bbba599bf04819e248ff2897b0 to your computer and use it in GitHub Desktop.

Select an option

Save clintval/6d1bb6bbba599bf04819e248ff2897b0 to your computer and use it in GitHub Desktop.
Scratch
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import vcf
from palettable.colorbrewer.qualitative import Paired_12
from scipy.stats import gaussian_kde
from vcf_figures.helpers import *
__all__ = [
'VAF_distribution']
def VAF_distribution(
vcf_file,
var_type=None,
minimum=0.0,
maximum=1.0,
colors=Paired_12.hex_colors,
resolution=int(5e5),
show_het_bands=True,
samples=None,
figsize=(13, 6),
height_ratios=(1, 1),
):
handle = vcf.Reader(open(vcf_file))
if samples is None:
samples = handle.samples
if not 0 <= minimum < 1 and not 0 < maximum <= 1:
raise ValueError('`minimum` and `maximum` bust be in [0, 1].')
elif minimum >= maximum:
raise ValueError('`minimum` must be less than `maximum`.')
elif var_type not in ('snp', 'sv', 'indel', 'unknown'):
raise ValueError('Call type must be `snp`/`sv`/`indel`/`unknown`.')
elif len(colors) < len(samples):
raise ValueError('Length of `colors` must be >= number of samples')
fig, (ax_density, ax_rug) = plt.subplots(
2, 1,
figsize=figsize,
gridspec_kw={'height_ratios': height_ratios, 'hspace': 0.075})
sample_distributions = defaultdict(list)
xs = np.linspace(minimum, maximum, resolution)
for record in handle:
for call in filter(lambda _: _.is_variant, record.samples):
for ALT, ALT_depth in zip(call.site.ALT, call.data.AD[1:]):
REF, ALT = map(lambda _: str(_).upper(), [call.site.REF, ALT])
if ALT == REF or ALT_depth is None:
continue
# Calculate AF as not all tags have accurate values in VCF.
VD, DP = call.data.VD, call.data.DP
if call.site.var_type == var_type:
sample_distributions[call.sample].append(VD / DP)
for i, sample in list(enumerate(samples)):
vector = np.array(sample_distributions[sample])
vector = vector[(vector >= minimum) & (vector <= maximum)]
if len(vector) > 1:
y = gaussian_kde(vector)(xs)
ys = y / max(y)
else:
ys = [0] * len(xs)
ax_density.plot(xs, ys, color=colors[i], label=sample)
ax_rug.axhline(i, color=str(0.9), zorder=0)
ax_rug.eventplot(vector, color=colors[i], lineoffsets=i)
ticks_off(ax_density, 'x')
ax_density.set(
xlim=(minimum, maximum),
xticklabels=(),
ylabel='Variant Allele Frequency')
ax_rug.set(
xlabel='VAF',
xlim=(minimum, maximum),
yticks=range(len(samples)),
yticklabels=samples)
if show_het_bands:
ax_density.fill_between(
x=(0.4, 0.6),
y1=1, y2=0,
color='#ffff00', alpha=0.075)
ax_rug.fill_between(
x=(0.4, 0.6),
y1=len(samples), y2=-1,
color='#ffff00', alpha=0.075)
ax_density.legend(loc=1, fontsize=8, frameon=False)
return fig, (ax_density, ax_rug)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment