Created
May 17, 2021 22:37
-
-
Save peterk87/8cb494bdff433e2afe51a9d44ecf42af to your computer and use it in GitHub Desktop.
Create coverage plots for multiple genomes from "samtools depth" tabular outputs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from pathlib import Path | |
import logging | |
import typer | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.gridspec as gridspec | |
import matplotlib.pyplot as plt | |
import pylab | |
from rich.logging import RichHandler | |
logger = logging.getLogger(__name__) | |
def read_depths(fpath: Path, name: str = None) -> pd.DataFrame: | |
df = pd.read_table(fpath, names=["reference", "pos", "depth"], header=None) | |
if name: | |
df["genome"] = name | |
else: | |
df["genome"] = fpath.stem | |
return df | |
def main( | |
depths_dir: Path, | |
depth_file_pattern: str = typer.Option( | |
"**/*.tsv", help="samtools depth file glob pattern" | |
), | |
width: float = typer.Option(16.0, help="Output figure width"), | |
height: float = typer.Option(10.0, help="Output figure height"), | |
xlabel: str = typer.Option("SARS-CoV-2 Wuhan-Hu-1 Genome Position"), | |
output: str = typer.Option( | |
"covplot.pdf", | |
help='Output figure file path. ".pdf" for PDF output, ".png" for PNG output', | |
), | |
verbose: bool = typer.Option(False, help="Verbose logs"), | |
): | |
from rich.traceback import install | |
install(show_locals=True) | |
logging.basicConfig( | |
format="%(message)s", | |
datefmt="[%Y-%m-%d %X]", | |
level=logging.INFO if not verbose else logging.DEBUG, | |
handlers=[RichHandler(rich_tracebacks=True, tracebacks_show_locals=True)], | |
) | |
depths_paths = Path(depths_dir) | |
df_depths = pd.concat( | |
[read_depths(p, p.stem) for p in depths_paths.glob(depth_file_pattern)] | |
) | |
df_depths.sort_values(["genome", "pos"], inplace=True, ascending=True) | |
samples = df_depths.genome.unique() | |
logger.info(f"Read depths for {len(samples)}") | |
df_depths_log = df_depths.copy() | |
df_depths_log.depth = np.log10(df_depths_log.depth) | |
df_depths_log_pivot = pd.pivot( | |
df_depths_log, index="genome", columns="pos", values="depth" | |
) | |
sns.set_context("talk", font_scale=0.8) | |
fig = plt.figure(figsize=(width, height)) | |
count = 0 | |
pylab.subplots_adjust(hspace=0.1) | |
n_samples = df_depths_log_pivot.index.size | |
gs = gridspec.GridSpec(n_samples * 4, 1) | |
for idx in samples: | |
row = df_depths_log_pivot.loc[idx, :] | |
ax1 = fig.add_subplot(gs[count : count + 3, 0]) | |
ax1.set_ylim(top=df_depths_log_pivot.values.max()) | |
ax1.set_xlim(left=1, right=row.size) | |
ax1.set_yticks([0, 1, 2, 3, 4]) | |
ax1.set_yticklabels([1, 10, 100, 1000, 10000]) | |
ax1.fill_between(np.arange(row.size), row, 0, color="steelblue") | |
ax1.set_xticks([]) | |
ax1.set_xticklabels([]) | |
ax1.set_ylabel(idx, rotation=0, labelpad=70, fontdict=dict(fontsize=16)) | |
for edge, spine in ax1.spines.items(): | |
spine.set_visible(False) | |
ax2 = fig.add_subplot(gs[count + 3, 0]) | |
arr = row.values.copy() | |
arr[arr > 1.0] = 2 | |
arr[(arr > -np.inf) & (arr <= 1.0)] = 1 | |
arr[arr == -np.inf] = 0 | |
arr.shape = (1, arr.shape[0]) | |
im = ax2.imshow(arr, aspect="auto", cmap=plt.cm.Greys_r) | |
ax2.set_yticks([]) | |
ax2.set_yticklabels([]) | |
for edge, spine in ax2.spines.items(): | |
spine.set_visible(False) | |
count += 4 | |
if count < n_samples * 4: | |
ax2.set_xticks([]) | |
ax2.set_xticklabels([]) | |
else: | |
ticks = list(range(0, row.values.size, 2000)) | |
ticks[0] = 1 | |
mticks = range(1000, row.values.size, 500) | |
ax2.set_xticks(ticks) | |
ax2.set_xticks(mticks, minor=True) | |
ax2.set_xlabel(xlabel, fontdict=dict(fontsize=16), labelpad=15) | |
plt.savefig(output) | |
logger.info(f'Saved covplot to "{output}"') | |
if __name__ == "__main__": | |
typer.run(main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment