Skip to content

Instantly share code, notes, and snippets.

@dwf
Created December 3, 2010 05:35
Show Gist options
  • Save dwf/726620 to your computer and use it in GitHub Desktop.
Save dwf/726620 to your computer and use it in GitHub Desktop.
Make some plots for our 2010 NAR Web Server paper.
"""Make some plots for our 2010 NAR Web Server paper."""
import sys
import numpy as np
import matplotlib as mpl
mpl.use('Agg') # Don't pop up figures, just render in memory
import matplotlib.pyplot as plt
# Empty dictionaries
bp = {}
mf = {}
cc = {}
# Yeast (Saccharomyces cerevisiae) numbers
bp['yeast'] = np.array([[0.23, 0.23, 0.27], [0.33, 0.37, 0.40],
[0.33, 0.38, 0.40], [0.36, 0.42, 0.43]])
cc['yeast'] = np.array([[0.48, 0.48, 0.55], [0.50, 0.56, 0.58],
[0.46, 0.54, 0.57], [0.41, 0.50, 0.51]])
mf['yeast'] = np.array([[0.36, 0.36, 0.41], [0.50, 0.51, 0.53],
[0.56, 0.60, 0.61], [0.63, 0.70, 0.70]])
# Mouse (Mus musculus) numbers
bp['mouse'] = np.array([[0.05, 0.05, 0.07], [0.06, 0.06, 0.08],
[0.09, 0.09, 0.09], [0.13, 0.13, 0.13]])
cc['mouse'] = np.array([[0.15, 0.15, 0.17], [0.13, 0.13, 0.15],
[0.11, 0.11, 0.11], [0.15, 0.15, 0.15]])
mf['mouse'] = np.array([[0.22, 0.22, 0.24], [0.22, 0.22, 0.24],
[0.20, 0.20, 0.21], [0.21, 0.21, 0.22]])
# Human numbers
bp['human'] = np.array([[0.04, 0.05, 0.08], [0.05, 0.07, 0.09],
[0.07, 0.09, 0.10], [0.09, 0.11, 0.11]])
cc['human'] = np.array([[0.09, 0.09, 0.13], [0.10, 0.12, 0.14],
[0.13, 0.15, 0.15], [0.13, 0.17, 0.17]])
mf['human'] = np.array([[0.17, 0.21, 0.25], [0.14, 0.20, 0.22],
[0.16, 0.21, 0.22], [0.15, 0.20, 0.23]])
# Fly (Drosophila melanogaster) numbers
bp['fly'] = np.array([[0.07, 0.07, 0.09], [0.08, 0.08, 0.10],
[0.10, 0.11, 0.11], [0.15, 0.17, 0.17]])
cc['fly'] = np.array([[0.17, 0.17, 0.20], [0.25, 0.28, 0.28],
[0.28, 0.32, 0.32], [0.36, 0.40, 0.42]])
mf['fly'] = np.array([[0.34, 0.37, 0.39], [0.43, 0.51, 0.51],
[0.45, 0.50, 0.50], [0.58, 0.66, 0.66]])
# Worm (Caenorhabditis elegans) numbers
bp['worm'] = np.array([[0.05, 0.05, 0.06], [0.05, 0.05, 0.06],
[0.04, 0.05, 0.05], [0.06, 0.08, 0.09]])
cc['worm'] = np.array([[0.05, 0.08, 0.12], [0.05, 0.10, 0.10],
[0.06, 0.09, 0.09], [0.02, 0.03, 0.07]])
mf['worm'] = np.array([[0.14, 0.18, 0.20], [0.03, 0.05, 0.17],
[0.05, 0.10, 0.16], [0.03, 0.04, 0.16]])
# Arabidopsis (Arabidopsis thaliana)
bp['arabidopsis'] = np.array([[0.18, 0.19, 0.22], [0.22, 0.27, 0.28],
[0.21, 0.26, 0.27], [0.22, 0.30, 0.30]])
cc['arabidopsis'] = np.array([[0.25, 0.28, 0.30], [0.36, 0.47, 0.47],
[0.22, 0.32, 0.32], [0.37, 0.46, 0.54]])
mf['arabidopsis'] = np.array([[0.41, 0.49, 0.50], [0.51, 0.67, 0.67],
[0.51, 0.67, 0.68], [0.57, 0.75, 0.76]])
def make_subplot(component, organism, title, maxval, barheight=0.6):
"""
Create one of the three subplots of the figure showcasing GeneMANIA on
a particular GO ontology relative to the best and worst performing
methods tested.
"""
labels_classsize = ['3-10', '11-30', '31-100', '101-300']
labels_methods = ['worst performing', 'GM-2008', 'best performing']
# Set the position array so that the bars are grouped into 4
positions = np.concatenate((np.arange(12, 9, -1),
np.arange(8, 5, -1),
np.arange(4, 1, -1),
np.arange(0, -3, -1)))
# Colours we will use for each group of 3
acolors = ['b', 'g', 'orange']
data = component[organism].reshape(12)
barstuff = plt.barh(positions - barheight / 2., # Vertically center on pos
data, height=barheight,
color=acolors * 4, # repeat the list of colors 4 times
linewidth=0, # get rid of pesky outline
aa=False) # Don't antialias
# At the center of each group place a class size label
plt.yticks(positions[1::3], labels_classsize, fontsize=8, rotation=90)
# Label the y-axis
plt.ylabel('Category size', fontsize=10)
plt.ylim(-2 - barheight/2., 12 + barheight / 2)
# Determine the right number of xticks
high = np.round(maxval,1)
ticks = np.round(np.linspace(0, high, int(high / 0.1) + 2), 1)
# Slight fudge to get the xticks and yticks not to hit each other
plt.xticks(ticks + 0.005, ticks, fontsize=8)
plt.box('off')
# Cool code for placing several labels to the left of the yticks,
# sadly it wasn't needed.
#for idx, pos in enumerate(np.mgrid[0.1:1:12j][1::3]-.05):
# plt.text(-0.4, pos, labels_classsize[idx], axes=None, rotation=90,
# horizontalalignment='center', verticalalignment='center',
# transform=plt.gca().transAxes, fontsize=8)
plt.title(title, fontsize=10)
plt.xlabel('Average precision', fontsize=10)
# Leave a little bit of horizontal space after the largest bar on the right
plt.xlim(0, maxval + 0.01)
# Turn off the tick markers
for tick in plt.gca().get_yticklines() + plt.gca().get_xticklines():
tick.set_markersize(0)
# Return 3 of the rectangles for legend-making purposes
return barstuff[-3:]
def generate_figure(org):
"""
Generate a figure showcasing GeneMANIA's performance on all three GO
trees for a variety of category sizes, for a specific organism.
"""
fig = plt.figure()
maxval = max(mf[org].max(), cc[org].max(), bp[org].max())
# Make room for everything
fig.subplots_adjust(
left=0.05,
wspace=0.22,
top=0.82,
bottom=0.2,
right=0.83
)
# Make the MF subplot
plt.subplot(131)
make_subplot(mf, org, 'Molecular Function', maxval)
# Make the CC subplot
plt.subplot(132)
make_subplot(cc, org, 'Cellular Component', maxval)
# Make the BP subplot, save the bars
plt.subplot(133)
bars = make_subplot(bp, org, 'Biological Process', maxval)
# Make a figure-wide legend, size down the font
plt.figlegend(bars, ('worst performing', 'GM-2008', 'best performing'),
prop=mpl.font_manager.FontProperties(size=10),
loc='center right')
# Figure wide title
plt.suptitle('Performance on %s' % org, fontsize=11)
# Programmatically set the size of the figure. This eluded me for a while.
fig.set_size_inches((11, 2.8))
plt.show()
if __name__ == "__main__":
for arg in sys.argv[1:]:
generate_figure(arg)
plt.savefig(arg + '.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment