Last active
February 9, 2016 04:11
-
-
Save matsen/a28a737269552a2a022a to your computer and use it in GitHub Desktop.
A simple script to render a molecular sequence file as a PNG, with a specified number of pixels per symbol.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Turn a sequence file into a PNG, with a specified number of pixels per symbol. | |
Gaps are shown in light gray. | |
Unknown symbols, including N and X, are shown in pink. | |
Requires matplotlib, seqmagick, and their dependencies. | |
""" | |
import argparse | |
import matplotlib | |
matplotlib.use('agg') | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import os | |
import seqmagick | |
from Bio import SeqIO | |
from matplotlib import colors | |
from seqmagick.fileformat import from_filename | |
aa_symbols = list('-ACDEFGHIKLMNPQRSTVWY') | |
old_paper = '#F8F4DA' | |
# Default colors from | |
# http://www.bioinformatics.org/sms/multi_align.html | |
aa_colors = [ | |
old_paper, | |
'#999999', | |
'#FFFF00', | |
'#33CCFF', | |
'#33CCFF', | |
'#FF9900', | |
'#999999', | |
'#CC0000', | |
'#FF6666', | |
'#CC0000', | |
'#FF6666', | |
'#3366FF', | |
'#3366FF', | |
'#CC33CC', | |
'#3366FF', | |
'#CC0000', | |
'#999999', | |
'#3366FF', | |
'#FF6666', | |
'#FF9900', | |
'#FF9900', | |
'pink', | |
'white' | |
] | |
nt_symbols = list('-AGCT') | |
nt_colors = [old_paper, 'red', 'yellow', 'blue', 'green', 'pink', 'white'] | |
hipster_colors = \ | |
[old_paper, '#e7298a', '#d95f02','#7570b3', '#1b9e77', 'pink', 'white'] | |
def array_of_sequence_file(path, symbol_d): | |
unknown_symbol_index = max(symbol_d.values())+1 # Unknown is pink. | |
aln_ll = [[symbol_d.get(b, unknown_symbol_index) for b in list(str(s.seq))] | |
for s in SeqIO.parse(path, from_filename(path))] | |
m = np.zeros((len(aln_ll), max(len(r) for r in aln_ll)), dtype=np.int) | |
m[:,:] = unknown_symbol_index+1 # Default is white. | |
for i, r in enumerate(aln_ll): | |
m[i, 0:len(r)] = r | |
return m | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description="Make a PNG of a sequence file.") | |
parser.add_argument('in_path', type=str, | |
help="Sequence file name.") | |
parser.add_argument( | |
'-o', type=str, default=None, | |
help="Output file name. Default is the same name, with .png") | |
parser.add_argument( | |
'-p', type=int, default=3, help="Pixels on a side per symbol.") | |
parser.add_argument( | |
'--aa', action='store_true', default=False, help="Amino acid sequences.") | |
parser.add_argument( | |
'--hipster', action='store_true', default=False, | |
help="Use hipster colors for nucleotides.") | |
args = parser.parse_args() | |
pixels_per_symbol = args.p | |
nominal_dpi = 100 | |
factor = float(pixels_per_symbol) / nominal_dpi | |
if args.o is None: | |
base, _ = os.path.splitext(args.in_path) | |
args.o = base + '.png' | |
if args.aa: | |
symbols, symbol_colors = aa_symbols, aa_colors | |
elif args.hipster: | |
symbols, symbol_colors = nt_symbols, hipster_colors | |
else: | |
symbols, symbol_colors = nt_symbols, nt_colors | |
symbol_d = {c: np.int(n) for n, c in enumerate(symbols)} | |
symbol_d['.'] = symbol_d['-'] | |
if not args.aa: | |
symbol_d['U'] = symbol_d['T'] | |
for s in symbols: | |
symbol_d[s.lower()] = symbol_d[s] | |
# make a color map of fixed colors | |
# http://stackoverflow.com/questions/9707676/defining-a-discrete-colormap-for-imshow-in-matplotlib | |
cmap = colors.ListedColormap(symbol_colors) | |
bounds = np.array(range(len(symbol_colors)+1))-0.5 | |
norm = colors.BoundaryNorm(bounds, cmap.N) | |
m = array_of_sequence_file(args.in_path, symbol_d) | |
fig, ax = plt.subplots( | |
figsize=(m.shape[1] * factor, m.shape[0] * factor), dpi=nominal_dpi) | |
ax.imshow(m, aspect='equal', interpolation='nearest', origin='upper', | |
cmap=cmap, norm=norm) | |
for x in 'top bottom left right'.split(): | |
ax.spines[x].set_visible(False) | |
fig.savefig(args.o, dpi=nominal_dpi, bbox_inches='tight') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment