Skip to content

Instantly share code, notes, and snippets.

@cjgunase
Forked from slowkow/barplot_heatmap.png
Created January 2, 2018 16:41
Show Gist options
  • Save cjgunase/983bd1a70a84d49e02f56a3ca7e66985 to your computer and use it in GitHub Desktop.
Save cjgunase/983bd1a70a84d49e02f56a3ca7e66985 to your computer and use it in GitHub Desktop.
Plot a horizontal bar plot and the lower triangle of a heatmap aligned at the base of the bars
#!/usr/bin/env python
"""
barplot_heatmap.py
Kamil Slowikowski
April 4, 2014
This module has a function for creating a horizontal bar plot with an adjacent
heatmap rotated 45 degrees to show the lower triangle of a correlation
matrix comparing all pairs of bars.
References:
http://stackoverflow.com/questions/12848581/is-there-a-way-to-rotate-a-matplotlib-plot-by-45-degrees
http://stackoverflow.com/questions/2982929/plotting-results-of-hierarchical-clustering-ontop-of-a-matrix-of-data-in-python
"""
import matplotlib as mp
import numpy as np
import pandas as pd
import pylab as pl
import scipy.cluster.hierarchy as sch
import string
def main():
mp.rc("font", family="serif")
nrows = 5
ncols = 10
labels = np.random.choice(list(string.ascii_uppercase),
ncols, replace=False)
series = pd.Series(np.random.random(ncols) * 5.0, index=labels)
matrix = pd.DataFrame(np.random.random((nrows, ncols)), columns=labels)
outfile = "barplot_heatmap.png"
barplot(series, matrix, outfile)
def barplot(series, matrix, outfile,
figsize=(6, 6), fontsize=10, title=None):
"""Create a bar plot and place the lower triangle of a heatmap directly
adjacent so that the bases of the bars line up with the diagonal of the
heatmap.
Parameters
----------
series : pandas.Series
The bar heights and labels.
matrix : pandas.DataFrame
A matrix where each column corresponds to a bar in the bar plot.
outfile : str
Full path to the output file.
figsize : (width, height)
fontsize : float
title : str
"""
# Create a figure.
fig = pl.figure(figsize=figsize)
# Axes for the heatmap triangle.
ax = fig.add_subplot(121, frame_on=False, aspect=2.0)
# Get the heatmap triangle's axes and the order of the clustered samples.
cax, order = heatmap_triangle(matrix, ax)
# Adjust spacing between the heatmap triangle and the barplot.
fig.subplots_adjust(wspace=-0.12, hspace=0, left=0, right=1)
# Axes for the barplot.
ax = fig.add_subplot(122, frame_on=False)
# Put gridlines beneath the bars.
ax.set_axisbelow(True)
# Order the bars by the clustering.
series = series.ix[order]
ax = series.plot(ax=ax, kind='barh', title=title, linewidth=0,
grid=False, color='grey')
# Set the font size for the y-axis labels.
ax.tick_params(axis='y', which='major', labelsize=fontsize)
# Grid lines.
ax.grid(b=True, which='major', axis='both', alpha=0.5)
# Tick marks for the x-axis.
ax.set_xticks(np.arange(0, round(series.max() + 1)))
# Put the y-axis marks on the right.
ax.yaxis.tick_right()
ax.yaxis.set_label_position('right')
# Adjust tick length.
ax.tick_params(length=0, axis='x')
ax.tick_params(length=0, axis='y')
# Labels.
ax.set_xlabel('')
ax.set_ylabel('')
# Save.
fig.savefig(outfile, bbox_inches='tight')
def heatmap_triangle(dataframe, axes):
"""Create a heatmap of the lower triangle of a pairwise correlation
matrix of all pairs of columns in the given dataframe. The heatmap
triangle is rotated 45 degrees clockwise and drawn on the given axes.
Parameters
----------
dataframe : pandas.DataFrame
axes : matplotlib.axes.Axes
"""
N = dataframe.shape[1]
D = dataframe.corr(method='pearson')
# UPGMA clustering, but other methods are also available.
Z = sch.linkage(D, method='average')
R = sch.dendrogram(Z, no_plot=True)
cluster_order = R['leaves']
D = D.ix[cluster_order, cluster_order]
# Get the lower triangle of the matrix.
C = np.tril(D)
# Mask the upper triangle.
C = np.ma.masked_array(C, C == 0)
# Set the diagonal to zero.
for i in range(N):
C[i, i] = 0
# Transformation matrix for rotating the heatmap.
A = np.array([(y, x) for x in range(N, -1, -1) for y in range(N + 1)])
t = np.array([[0.5, 1], [0.5, -1]])
A = np.dot(A, t)
# -1.0 correlation is blue, 0.0 is white, 1.0 is red.
cmap = pl.cm.RdBu_r
norm = mp.colors.BoundaryNorm(np.linspace(-1, 1, 14), cmap.N)
# This MUST be before the call to pl.pcolormesh() to align properly.
axes.set_xticks([])
axes.set_yticks([])
# Plot the correlation heatmap triangle.
X = A[:, 1].reshape(N + 1, N + 1)
Y = A[:, 0].reshape(N + 1, N + 1)
caxes = pl.pcolormesh(X, Y, np.flipud(C), axes=axes, cmap=cmap, norm=norm)
# Remove the ticks and reset the x limit.
axes.set_xlim(right=0)
# Add a colorbar below the heatmap triangle.
cb = pl.colorbar(caxes, ax=axes, orientation='horizontal', shrink=0.5825,
fraction=0.05, pad=-0.035, ticks=np.linspace(-1, 1, 5),
use_gridspec=True)
cb.set_label("$\mathrm{Pearson's}\ r$")
return caxes, D.index
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment