Plot a horizontal bar plot and the lower triangle of a heatmap aligned at the base of the bars
#!/usr/bin/env python
Kamil Slowikowski
April 4, 2014
This module has a function for creating a horizontal bar plot with an adjacent
heatmap rotated 45 degrees to show the lower triangle of a correlation
matrix comparing all pairs of bars.
import matplotlib as mp
import numpy as np
import pandas as pd
import pylab as pl
import scipy.cluster.hierarchy as sch
import string
def main():
mp.rc("font", family="serif")
nrows = 5
ncols = 10
labels = np.random.choice(list(string.ascii_uppercase),
ncols, replace=False)
series = pd.Series(np.random.random(ncols) * 5.0, index=labels)
matrix = pd.DataFrame(np.random.random((nrows, ncols)), columns=labels)
outfile = "barplot_heatmap.png"
barplot(series, matrix, outfile)
def barplot(series, matrix, outfile,
figsize=(6, 6), fontsize=10, title=None):
"""Create a bar plot and place the lower triangle of a heatmap directly
adjacent so that the bases of the bars line up with the diagonal of the
series : pandas.Series
The bar heights and labels.
matrix : pandas.DataFrame
A matrix where each column corresponds to a bar in the bar plot.
outfile : str
Full path to the output file.
figsize : (width, height)
fontsize : float
title : str
# Create a figure.
fig = pl.figure(figsize=figsize)
# Axes for the heatmap triangle.
ax = fig.add_subplot(121, frame_on=False, aspect=2.0)
# Get the heatmap triangle's axes and the order of the clustered samples.
cax, order = heatmap_triangle(matrix, ax)
# Adjust spacing between the heatmap triangle and the barplot.
fig.subplots_adjust(wspace=-0.12, hspace=0, left=0, right=1)
# Axes for the barplot.
ax = fig.add_subplot(122, frame_on=False)
# Put gridlines beneath the bars.
# Order the bars by the clustering.
series = series.ix[order]
ax = series.plot(ax=ax, kind='barh', title=title, linewidth=0,
grid=False, color='grey')
# Set the font size for the y-axis labels.
ax.tick_params(axis='y', which='major', labelsize=fontsize)
# Grid lines.
ax.grid(b=True, which='major', axis='both', alpha=0.5)
# Tick marks for the x-axis.
ax.set_xticks(np.arange(0, round(series.max() + 1)))
# Put the y-axis marks on the right.
# Adjust tick length.
ax.tick_params(length=0, axis='x')
ax.tick_params(length=0, axis='y')
# Labels.
# Save.
fig.savefig(outfile, bbox_inches='tight')
def heatmap_triangle(dataframe, axes):
"""Create a heatmap of the lower triangle of a pairwise correlation
matrix of all pairs of columns in the given dataframe. The heatmap
triangle is rotated 45 degrees clockwise and drawn on the given axes.
dataframe : pandas.DataFrame
axes : matplotlib.axes.Axes
N = dataframe.shape[1]
D = dataframe.corr(method='pearson')
# UPGMA clustering, but other methods are also available.
Z = sch.linkage(D, method='average')
R = sch.dendrogram(Z, no_plot=True)
cluster_order = R['leaves']
D = D.ix[cluster_order, cluster_order]
# Get the lower triangle of the matrix.
C = np.tril(D)
# Mask the upper triangle.
C =, C == 0)
# Set the diagonal to zero.
for i in range(N):
C[i, i] = 0
# Transformation matrix for rotating the heatmap.
A = np.array([(y, x) for x in range(N, -1, -1) for y in range(N + 1)])
t = np.array([[0.5, 1], [0.5, -1]])
A =, t)
# -1.0 correlation is blue, 0.0 is white, 1.0 is red.
cmap =
norm = mp.colors.BoundaryNorm(np.linspace(-1, 1, 14), cmap.N)
# This MUST be before the call to pl.pcolormesh() to align properly.
# Plot the correlation heatmap triangle.
X = A[:, 1].reshape(N + 1, N + 1)
Y = A[:, 0].reshape(N + 1, N + 1)
caxes = pl.pcolormesh(X, Y, np.flipud(C), axes=axes, cmap=cmap, norm=norm)
# Remove the ticks and reset the x limit.
# Add a colorbar below the heatmap triangle.
cb = pl.colorbar(caxes, ax=axes, orientation='horizontal', shrink=0.5825,
fraction=0.05, pad=-0.035, ticks=np.linspace(-1, 1, 5),
cb.set_label("$\mathrm{Pearson's}\ r$")
return caxes, D.index
if __name__ == '__main__':
