Skip to content

Instantly share code, notes, and snippets.

@joferkington
Created July 11, 2015 20:18
Show Gist options
  • Save joferkington/4eb02c5f1c051772e590 to your computer and use it in GitHub Desktop.
Save joferkington/4eb02c5f1c051772e590 to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
from matplotlib.mlab import csv2rec
from matplotlib.cbook import get_sample_data
#fname = get_sample_data('percent_bachelors_degrees_women_usa.csv')
fname = 'percent_bachelors_degrees_women_usa.csv'
gender_degree_data = csv2rec(fname)
# These are the colors that will be used in the plot
color_sequence = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
'#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
'#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
'#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']
fig, ax = plt.subplots(figsize=(12, 14))
# Hide all spines
plt.setp(ax.spines.values(), visible=False)
# Provide tick lines across the plot to help your viewers trace along
# the axis ticks. Make sure that the lines are light and small so they
# don't obscure the primary data lines.
ax.grid(axis='y', ls='--', lw=0.5, color='black', alpha=0.3)
ax.set(yticks=range(10, 100, 10), xticks=range(1970, 2020, 10))
# Remove the tick marks; they are unnecessary with the tick lines we just
# plotted. Make sure your axis ticklabels are large enough to be easily read.
# You don't want your viewers squinting to read your plot.
ax.tick_params(axis='both', which='both', bottom='off', top='off',
labelbottom='on', left='off', right='off', labelleft='on',
labelsize=14)
# Now that the plot is prepared, it's time to actually plot the data!
# Note that I plotted the majors in order of the highest % in the final year.
majors = ['Health Professions', 'Public Administration', 'Education',
'Psychology', 'Foreign Languages', 'English',
'Communications\nand Journalism', 'Art and Performance', 'Biology',
'Agriculture', 'Social Sciences and History', 'Business',
'Math and Statistics', 'Architecture', 'Physical Sciences',
'Computer Science', 'Engineering']
y_offsets = {'Foreign Languages': 0.5, 'English': -0.5,
'Communications\nand Journalism': 2.5,
'Art and Performance': -0.25, 'Agriculture': 1.25,
'Social Sciences and History': 0.25, 'Business': -0.75,
'Math and Statistics': 0.75, 'Architecture': -0.75,
'Computer Science': 0.75, 'Engineering': -0.25}
for color, column in zip(color_sequence, majors):
# Plot each line separately with its own color.
column_rec_name = column.replace('\n', '_').replace(' ', '_').lower()
line, = ax.plot(gender_degree_data.year,
gender_degree_data[column_rec_name],
lw=2.5, color=color)
# Add a text label to the right end of every line. Most of the code below
# is adding specific offsets y position because some labels overlapped.
y_pos = gender_degree_data[column_rec_name][-1]
if column in y_offsets:
y_pos += y_offsets[column]
# Place the labels at the y-position, but 10 points to the right of the
# axes border. Again, make sure that all labels are large enough to be
# easily read by the viewer.
ax.annotate(column, xy=(1, y_pos), xytext=(10, 0), fontsize=14, color=color,
xycoords=('axes fraction', 'data'),
textcoords='offset points', va='center')
# Make the title big enough so it spans the entire plot, but don't make it
# so big that it requires two lines to show.
# Note that if the title is descriptive enough, it is unnecessary to include
# axis labels; they are self-evident, in this plot's case.
fig.suptitle('Percentage of Bachelor\'s degrees conferred to women in '
'the U.S.A. by major (1970-2011)\n', fontsize=18, y=0.95)
# Give ourselves a bit of padding in the y-direction but none in the
# x-direction and snap the bottom of the y-limits to 0
ax.margins(x=0, y=0.05)
ax.set_ylim(bottom=0)
# Make room for the labels on the right side of the axes
fig.subplots_adjust(left=0.05, right=0.75, bottom=0.05)
# Finally, save the figure as a PNG.
# You can also save it as a PDF, JPEG, etc.
# Just change the file extension in this call.
plt.savefig('percent-bachelors-degrees-women-usa.png', bbox_inches='tight')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment