Created
July 1, 2019 20:45
-
-
Save clintval/6d1bb6bbba599bf04819e248ff2897b0 to your computer and use it in GitHub Desktop.
Scratch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from collections import defaultdict | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import vcf | |
| from palettable.colorbrewer.qualitative import Paired_12 | |
| from scipy.stats import gaussian_kde | |
| from vcf_figures.helpers import * | |
| __all__ = [ | |
| 'VAF_distribution'] | |
| def VAF_distribution( | |
| vcf_file, | |
| var_type=None, | |
| minimum=0.0, | |
| maximum=1.0, | |
| colors=Paired_12.hex_colors, | |
| resolution=int(5e5), | |
| show_het_bands=True, | |
| samples=None, | |
| figsize=(13, 6), | |
| height_ratios=(1, 1), | |
| ): | |
| handle = vcf.Reader(open(vcf_file)) | |
| if samples is None: | |
| samples = handle.samples | |
| if not 0 <= minimum < 1 and not 0 < maximum <= 1: | |
| raise ValueError('`minimum` and `maximum` bust be in [0, 1].') | |
| elif minimum >= maximum: | |
| raise ValueError('`minimum` must be less than `maximum`.') | |
| elif var_type not in ('snp', 'sv', 'indel', 'unknown'): | |
| raise ValueError('Call type must be `snp`/`sv`/`indel`/`unknown`.') | |
| elif len(colors) < len(samples): | |
| raise ValueError('Length of `colors` must be >= number of samples') | |
| fig, (ax_density, ax_rug) = plt.subplots( | |
| 2, 1, | |
| figsize=figsize, | |
| gridspec_kw={'height_ratios': height_ratios, 'hspace': 0.075}) | |
| sample_distributions = defaultdict(list) | |
| xs = np.linspace(minimum, maximum, resolution) | |
| for record in handle: | |
| for call in filter(lambda _: _.is_variant, record.samples): | |
| for ALT, ALT_depth in zip(call.site.ALT, call.data.AD[1:]): | |
| REF, ALT = map(lambda _: str(_).upper(), [call.site.REF, ALT]) | |
| if ALT == REF or ALT_depth is None: | |
| continue | |
| # Calculate AF as not all tags have accurate values in VCF. | |
| VD, DP = call.data.VD, call.data.DP | |
| if call.site.var_type == var_type: | |
| sample_distributions[call.sample].append(VD / DP) | |
| for i, sample in list(enumerate(samples)): | |
| vector = np.array(sample_distributions[sample]) | |
| vector = vector[(vector >= minimum) & (vector <= maximum)] | |
| if len(vector) > 1: | |
| y = gaussian_kde(vector)(xs) | |
| ys = y / max(y) | |
| else: | |
| ys = [0] * len(xs) | |
| ax_density.plot(xs, ys, color=colors[i], label=sample) | |
| ax_rug.axhline(i, color=str(0.9), zorder=0) | |
| ax_rug.eventplot(vector, color=colors[i], lineoffsets=i) | |
| ticks_off(ax_density, 'x') | |
| ax_density.set( | |
| xlim=(minimum, maximum), | |
| xticklabels=(), | |
| ylabel='Variant Allele Frequency') | |
| ax_rug.set( | |
| xlabel='VAF', | |
| xlim=(minimum, maximum), | |
| yticks=range(len(samples)), | |
| yticklabels=samples) | |
| if show_het_bands: | |
| ax_density.fill_between( | |
| x=(0.4, 0.6), | |
| y1=1, y2=0, | |
| color='#ffff00', alpha=0.075) | |
| ax_rug.fill_between( | |
| x=(0.4, 0.6), | |
| y1=len(samples), y2=-1, | |
| color='#ffff00', alpha=0.075) | |
| ax_density.legend(loc=1, fontsize=8, frameon=False) | |
| return fig, (ax_density, ax_rug) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment