Skip to content

Instantly share code, notes, and snippets.

@TomAugspurger
Last active August 29, 2015 13:56
Show Gist options
  • Select an option

  • Save TomAugspurger/9156696 to your computer and use it in GitHub Desktop.

Select an option

Save TomAugspurger/9156696 to your computer and use it in GitHub Desktop.
missingmap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import collections as collections
from matplotlib.patches import Rectangle
from itertools import izip, cycle
def missmap(df, ax=None, colors=None, aspect=4, sort='descending',
title=None, **kwargs):
"""
Plot the missing values of df.
Parameters
----------
df : pandas DataFrame
ax : matplotlib axes
if None then a new figure and axes will be created
colors : dict
dict with {True: c1, False: c2} where the values are
matplotlib colors.
aspect : int
the width to height ratio for each rectangle.
sort : one of {'descending', 'ascending', None}
title : str
kwargs : dict
matplotlib.axes.bar kwargs
Returns
-------
ax : matplotlib axes
"""
if ax is None:
fig, ax = plt.subplots()
# setup the axes
dfn = pd.isnull(df)
if sort in ('ascending', 'descending'):
counts = dfn.sum()
sort_dict = {'ascending': True, 'descending': False}
counts.sort(ascending=sort_dict[sort])
dfn = dfn[counts.index]
ny = len(df)
nx = len(df.columns)
# each column is a stacked bar made up of ny patches.
xgrid = np.tile(np.arange(len(df.columns)), (ny, 1)).T
ygrid = np.tile(np.arange(ny), (nx, 1))
# xys is the lower left corner of each patch
xys = (zip(x, y) for x, y in izip(xgrid, ygrid))
if colors is None:
colors = {True: '#EAF205', False: 'k'}
widths = cycle([aspect])
heights = cycle([1])
for xy, width, height, col in izip(xys, widths, heights, dfn.columns):
color_array = dfn[col].map(colors)
rects = [Rectangle(xyc, width, height, **kwargs)
for xyc, c in zip(xy, color_array)]
p_coll = collections.PatchCollection(rects, color=color_array,
edgecolor=color_array, **kwargs)
ax.add_collection(p_coll, autolim=False)
# post plot aesthetics
ax.set_xlim(0, nx)
ax.set_ylim(0, ny)
ax.set_xticks(.5 + np.arange(nx)) # center the ticks
ax.set_xticklabels(dfn.columns)
for t in ax.get_xticklabels():
t.set_rotation(90)
# remove tick lines
ax.tick_params(axis='both', which='both', bottom='off', left='off',
labelleft='off')
ax.grid(False)
if title:
ax.set_title(title)
return ax
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment