Created
December 3, 2010 05:35
-
-
Save dwf/726620 to your computer and use it in GitHub Desktop.
Make some plots for our 2010 NAR Web Server paper.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Make some plots for our 2010 NAR Web Server paper.""" | |
| import sys | |
| import numpy as np | |
| import matplotlib as mpl | |
| mpl.use('Agg') # Don't pop up figures, just render in memory | |
| import matplotlib.pyplot as plt | |
| # Empty dictionaries | |
| bp = {} | |
| mf = {} | |
| cc = {} | |
| # Yeast (Saccharomyces cerevisiae) numbers | |
| bp['yeast'] = np.array([[0.23, 0.23, 0.27], [0.33, 0.37, 0.40], | |
| [0.33, 0.38, 0.40], [0.36, 0.42, 0.43]]) | |
| cc['yeast'] = np.array([[0.48, 0.48, 0.55], [0.50, 0.56, 0.58], | |
| [0.46, 0.54, 0.57], [0.41, 0.50, 0.51]]) | |
| mf['yeast'] = np.array([[0.36, 0.36, 0.41], [0.50, 0.51, 0.53], | |
| [0.56, 0.60, 0.61], [0.63, 0.70, 0.70]]) | |
| # Mouse (Mus musculus) numbers | |
| bp['mouse'] = np.array([[0.05, 0.05, 0.07], [0.06, 0.06, 0.08], | |
| [0.09, 0.09, 0.09], [0.13, 0.13, 0.13]]) | |
| cc['mouse'] = np.array([[0.15, 0.15, 0.17], [0.13, 0.13, 0.15], | |
| [0.11, 0.11, 0.11], [0.15, 0.15, 0.15]]) | |
| mf['mouse'] = np.array([[0.22, 0.22, 0.24], [0.22, 0.22, 0.24], | |
| [0.20, 0.20, 0.21], [0.21, 0.21, 0.22]]) | |
| # Human numbers | |
| bp['human'] = np.array([[0.04, 0.05, 0.08], [0.05, 0.07, 0.09], | |
| [0.07, 0.09, 0.10], [0.09, 0.11, 0.11]]) | |
| cc['human'] = np.array([[0.09, 0.09, 0.13], [0.10, 0.12, 0.14], | |
| [0.13, 0.15, 0.15], [0.13, 0.17, 0.17]]) | |
| mf['human'] = np.array([[0.17, 0.21, 0.25], [0.14, 0.20, 0.22], | |
| [0.16, 0.21, 0.22], [0.15, 0.20, 0.23]]) | |
| # Fly (Drosophila melanogaster) numbers | |
| bp['fly'] = np.array([[0.07, 0.07, 0.09], [0.08, 0.08, 0.10], | |
| [0.10, 0.11, 0.11], [0.15, 0.17, 0.17]]) | |
| cc['fly'] = np.array([[0.17, 0.17, 0.20], [0.25, 0.28, 0.28], | |
| [0.28, 0.32, 0.32], [0.36, 0.40, 0.42]]) | |
| mf['fly'] = np.array([[0.34, 0.37, 0.39], [0.43, 0.51, 0.51], | |
| [0.45, 0.50, 0.50], [0.58, 0.66, 0.66]]) | |
| # Worm (Caenorhabditis elegans) numbers | |
| bp['worm'] = np.array([[0.05, 0.05, 0.06], [0.05, 0.05, 0.06], | |
| [0.04, 0.05, 0.05], [0.06, 0.08, 0.09]]) | |
| cc['worm'] = np.array([[0.05, 0.08, 0.12], [0.05, 0.10, 0.10], | |
| [0.06, 0.09, 0.09], [0.02, 0.03, 0.07]]) | |
| mf['worm'] = np.array([[0.14, 0.18, 0.20], [0.03, 0.05, 0.17], | |
| [0.05, 0.10, 0.16], [0.03, 0.04, 0.16]]) | |
| # Arabidopsis (Arabidopsis thaliana) | |
| bp['arabidopsis'] = np.array([[0.18, 0.19, 0.22], [0.22, 0.27, 0.28], | |
| [0.21, 0.26, 0.27], [0.22, 0.30, 0.30]]) | |
| cc['arabidopsis'] = np.array([[0.25, 0.28, 0.30], [0.36, 0.47, 0.47], | |
| [0.22, 0.32, 0.32], [0.37, 0.46, 0.54]]) | |
| mf['arabidopsis'] = np.array([[0.41, 0.49, 0.50], [0.51, 0.67, 0.67], | |
| [0.51, 0.67, 0.68], [0.57, 0.75, 0.76]]) | |
| def make_subplot(component, organism, title, maxval, barheight=0.6): | |
| """ | |
| Create one of the three subplots of the figure showcasing GeneMANIA on | |
| a particular GO ontology relative to the best and worst performing | |
| methods tested. | |
| """ | |
| labels_classsize = ['3-10', '11-30', '31-100', '101-300'] | |
| labels_methods = ['worst performing', 'GM-2008', 'best performing'] | |
| # Set the position array so that the bars are grouped into 4 | |
| positions = np.concatenate((np.arange(12, 9, -1), | |
| np.arange(8, 5, -1), | |
| np.arange(4, 1, -1), | |
| np.arange(0, -3, -1))) | |
| # Colours we will use for each group of 3 | |
| acolors = ['b', 'g', 'orange'] | |
| data = component[organism].reshape(12) | |
| barstuff = plt.barh(positions - barheight / 2., # Vertically center on pos | |
| data, height=barheight, | |
| color=acolors * 4, # repeat the list of colors 4 times | |
| linewidth=0, # get rid of pesky outline | |
| aa=False) # Don't antialias | |
| # At the center of each group place a class size label | |
| plt.yticks(positions[1::3], labels_classsize, fontsize=8, rotation=90) | |
| # Label the y-axis | |
| plt.ylabel('Category size', fontsize=10) | |
| plt.ylim(-2 - barheight/2., 12 + barheight / 2) | |
| # Determine the right number of xticks | |
| high = np.round(maxval,1) | |
| ticks = np.round(np.linspace(0, high, int(high / 0.1) + 2), 1) | |
| # Slight fudge to get the xticks and yticks not to hit each other | |
| plt.xticks(ticks + 0.005, ticks, fontsize=8) | |
| plt.box('off') | |
| # Cool code for placing several labels to the left of the yticks, | |
| # sadly it wasn't needed. | |
| #for idx, pos in enumerate(np.mgrid[0.1:1:12j][1::3]-.05): | |
| # plt.text(-0.4, pos, labels_classsize[idx], axes=None, rotation=90, | |
| # horizontalalignment='center', verticalalignment='center', | |
| # transform=plt.gca().transAxes, fontsize=8) | |
| plt.title(title, fontsize=10) | |
| plt.xlabel('Average precision', fontsize=10) | |
| # Leave a little bit of horizontal space after the largest bar on the right | |
| plt.xlim(0, maxval + 0.01) | |
| # Turn off the tick markers | |
| for tick in plt.gca().get_yticklines() + plt.gca().get_xticklines(): | |
| tick.set_markersize(0) | |
| # Return 3 of the rectangles for legend-making purposes | |
| return barstuff[-3:] | |
| def generate_figure(org): | |
| """ | |
| Generate a figure showcasing GeneMANIA's performance on all three GO | |
| trees for a variety of category sizes, for a specific organism. | |
| """ | |
| fig = plt.figure() | |
| maxval = max(mf[org].max(), cc[org].max(), bp[org].max()) | |
| # Make room for everything | |
| fig.subplots_adjust( | |
| left=0.05, | |
| wspace=0.22, | |
| top=0.82, | |
| bottom=0.2, | |
| right=0.83 | |
| ) | |
| # Make the MF subplot | |
| plt.subplot(131) | |
| make_subplot(mf, org, 'Molecular Function', maxval) | |
| # Make the CC subplot | |
| plt.subplot(132) | |
| make_subplot(cc, org, 'Cellular Component', maxval) | |
| # Make the BP subplot, save the bars | |
| plt.subplot(133) | |
| bars = make_subplot(bp, org, 'Biological Process', maxval) | |
| # Make a figure-wide legend, size down the font | |
| plt.figlegend(bars, ('worst performing', 'GM-2008', 'best performing'), | |
| prop=mpl.font_manager.FontProperties(size=10), | |
| loc='center right') | |
| # Figure wide title | |
| plt.suptitle('Performance on %s' % org, fontsize=11) | |
| # Programmatically set the size of the figure. This eluded me for a while. | |
| fig.set_size_inches((11, 2.8)) | |
| plt.show() | |
| if __name__ == "__main__": | |
| for arg in sys.argv[1:]: | |
| generate_figure(arg) | |
| plt.savefig(arg + '.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment