Last active
September 11, 2023 09:33
-
-
Save lkilcher/8722204056c117bc1ff0edc5b4b4a29e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
def map_hist(x, y, h, bins): | |
xi = np.digitize(x, bins[0]) - 1 | |
yi = np.digitize(y, bins[1]) - 1 | |
inds = np.ravel_multi_index((xi, yi), | |
(len(bins[0]) - 1, len(bins[1]) - 1), | |
mode='clip') | |
vals = h.flatten()[inds] | |
bads = ((x < bins[0][0]) | (x > bins[0][-1]) | | |
(y < bins[1][0]) | (y > bins[1][-1])) | |
vals[bads] = np.NaN | |
return vals | |
def scatter_hist2d(x, y, | |
s=20, marker=u'o', | |
mode='mountain', | |
bins=10, range=None, | |
normed=False, weights=None, # np.histogram2d args | |
ax=None, dens_func=None, | |
**kwargs): | |
""" | |
Make a scattered-histogram plot. | |
Parameters | |
---------- | |
x, y : array_like, shape (n, ) | |
Input data | |
s : scalar or array_like, shape (n, ), optional, default: 20 | |
size in points^2. | |
marker : `~matplotlib.markers.MarkerStyle`, optional, default: 'o' | |
See `~matplotlib.markers` for more information on the different | |
styles of markers scatter supports. `marker` can be either | |
an instance of the class or the text shorthand for a particular | |
marker. | |
mode: [None | 'mountain' | 'valley' | 'clip'] | |
Possible values are: | |
- None : The points are plotted as one scatter object, in the | |
order in-which they are specified at input. | |
- 'mountain' : The points are sorted/plotted in the order of | |
the number of points in their 'bin'. This means that points | |
in the highest density will be plotted on-top of others. This | |
cleans-up the edges a bit, the points near the edges will | |
overlap. | |
- 'valley' : The reverse order of 'mountain'. The low density | |
bins are plotted on top of the high ones. | |
bins : int or array_like or [int, int] or [array, array], optional | |
The bin specification: | |
* If int, the number of bins for the two dimensions (nx=ny=bins). | |
* If array_like, the bin edges for the two dimensions | |
(x_edges=y_edges=bins). | |
* If [int, int], the number of bins in each dimension | |
(nx, ny = bins). | |
* If [array, array], the bin edges in each dimension | |
(x_edges, y_edges = bins). | |
* A combination [int, array] or [array, int], where int | |
is the number of bins and array is the bin edges. | |
range : array_like, shape(2,2), optional | |
The leftmost and rightmost edges of the bins along each dimension | |
(if not specified explicitly in the `bins` parameters): | |
``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range | |
will be considered outliers and not tallied in the histogram. | |
normed : bool, optional | |
If False, returns the number of samples in each bin. If True, | |
returns the bin density ``bin_count / sample_count / bin_area``. | |
weights : array_like, shape(N,), optional | |
An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. | |
Weights are normalized to 1 if `normed` is True. If `normed` is | |
False, the values of the returned histogram are equal to the sum of | |
the weights belonging to the samples falling into each bin. | |
ax : an axes instance to plot into. | |
dens_func : function or callable (default: None) | |
A function that modifies (inputs and returns) the dens | |
values (e.g., np.log10). The default is to not modify the | |
values. | |
kwargs : these are all passed on to scatter. | |
Returns | |
------- | |
paths : `~matplotlib.collections.PathCollection` | |
The scatter instance. | |
""" | |
if ax is None: | |
ax = plt.gca() | |
h, xe, ye = np.histogram2d(x, y, bins=bins, | |
range=range, normed=normed, | |
weights=weights) | |
# bins = (xe, ye) | |
dens = map_hist(x, y, h, bins=(xe, ye)) | |
if dens_func is not None: | |
dens = dens_func(dens) | |
iorder = slice(None) # No ordering by default | |
if mode == 'mountain': | |
iorder = np.argsort(dens) | |
elif mode == 'valley': | |
iorder = np.argsort(dens)[::-1] | |
x = x[iorder] | |
y = y[iorder] | |
dens = dens[iorder] | |
return ax.scatter(x, y, | |
s=s, c=dens, | |
marker=marker, | |
**kwargs) | |
def scatter_hexbin( | |
x, y, s=20, C=None, marker='o', | |
gridsize=100, mode='mountain', | |
xscale='linear', yscale='linear', extent=None, | |
cmap=None, norm=None, vmin=None, vmax=None, | |
reduce_C_function=np.mean, | |
ax=None, | |
**kwargs): | |
""" | |
Make a scatter plot where points are grouped into 'hexagonal bins'. | |
If *C* is *None*, the color-value of the points in each bin is | |
determined by the number of points in the hexagon (i.e., a | |
hexagonal histogram). Otherwise, *C* specifies values at the | |
coordinate (x[i], y[i]). For each hexagon, these values are | |
reduced using *reduce_C_function*. The 'color-values' are then | |
mapped to colors using the specified colormap. | |
Parameters | |
---------- | |
x, y : array-like | |
The data positions. *x* and *y* must be of the same length. | |
s : array-like, default: 20 | |
The marker size in points**2. Default is 20. | |
C : array-like, optional | |
If given, these values are accumulated in the bins. Otherwise, | |
every point has a value of 1. Must be of the same length as *x* | |
and *y*. | |
marker : MarkerStyle, default: 'o' | |
The marker style. This is passed directly to scatter. | |
gridsize : int or (int, int), default: 100 | |
If a single int, the number of hexagons in the *x*-direction. | |
The number of hexagons in the *y*-direction is chosen such that | |
the hexagons are approximately regular. | |
Alternatively, if a tuple (*nx*, *ny*), the number of hexagons | |
in the *x*-direction and the *y*-direction. | |
mode: [None | 'mountain' | 'valley'] | |
Possible values are: | |
- None : The points are plotted as one scatter object, in the | |
order in-which they are specified at input. This leaves the | |
edges of hexbins somewhat chaotic. | |
- 'mountain' : The points are sorted/plotted in the order of | |
the number of points in their 'bin'. This means that points | |
in the highest density will be plotted on-top of others. This | |
cleans-up the edges a bit, the points near the edges will | |
overlap. | |
- 'valley' : The reverse order of 'mountain'. The low density | |
bins are plotted on top of the high ones. | |
xscale : {'linear', 'log'}, default: 'linear' | |
Use a linear or log10 scale on the horizontal axis. | |
yscale : {'linear', 'log'}, default: 'linear' | |
Use a linear or log10 scale on the vertical axis. | |
extent : 4-tuple of float, default: *None* | |
The limits of the bins (xmin, xmax, ymin, ymax). | |
The default assigns the limits based on | |
*gridsize*, *x*, *y*, *xscale* and *yscale*. | |
If *xscale* or *yscale* is set to 'log', the limits are | |
expected to be the exponent for a power of 10. E.g. for | |
x-limits of 1 and 50 in 'linear' scale and y-limits | |
of 10 and 1000 in 'log' scale, enter (1, 50, 1, 3). | |
Returns | |
------- | |
paths : `~matplotlib.collections.PathCollection` | |
The scatter instance. | |
Other Parameters | |
---------------- | |
cmap : str or `~matplotlib.colors.Colormap`, default: :rc:`image.cmap` | |
The Colormap instance or registered colormap name used to map | |
the bin values to colors. | |
norm : `~matplotlib.colors.Normalize`, optional | |
The Normalize instance scales the bin values to the canonical | |
colormap range [0, 1] for mapping to colors. By default, the data | |
range is mapped to the colorbar range using linear scaling. | |
vmin, vmax : float, default: None | |
The colorbar range. If *None*, suitable min/max values are | |
automatically chosen by the `.Normalize` instance (defaults to | |
the respective min/max values of the bins in case of the default | |
linear scaling). | |
It is an error to use *vmin*/*vmax* when *norm* is given. | |
reduce_C_function : callable, default: `numpy.mean` | |
The function to aggregate *C* within the bins. It is ignored if | |
*C* is not given. This must have the signature:: | |
def reduce_C_function(C: array) -> float | |
Commonly used functions are: | |
- `numpy.mean`: average of the points | |
- `numpy.sum`: integral of the point values | |
- `numpy.amax`: value taken from the largest point | |
**kwargs : These are all passed on to scatter | |
Notes | |
----- | |
For best results, you typically have to make the marker size | |
(``s``) much smaller than the hexbin size. Setting | |
edgecolor='none' can also be helpful to reduce markersize. | |
See Also | |
-------- | |
scatter : a scatter plot. | |
hexbin : 2D hexagonal binning plot of points x, y. | |
hist2d : A 2D histogram | |
scatter_hist2d : a 2D 'scatter histogram' with rectangular bins. | |
""" | |
if ax is None: | |
ax = plt.gca() | |
# Much of this was copied <matplotlib>/lib/matplotlib/axes/_axes.py | |
# Set the size of the hexagon grid | |
if np.iterable(gridsize): | |
nx, ny = gridsize | |
else: | |
nx = gridsize | |
ny = int(nx / np.sqrt(3)) | |
# Count the number of data in each hexagon | |
x = np.asarray(x, float) | |
y = np.asarray(y, float) | |
# Will be log()'d if necessary, and then rescaled. | |
tx = x | |
ty = y | |
if xscale == 'log': | |
if np.any(x <= 0.0): | |
raise ValueError("x contains non-positive values, so can not " | |
"be log-scaled") | |
tx = np.log10(tx) | |
if yscale == 'log': | |
if np.any(y <= 0.0): | |
raise ValueError("y contains non-positive values, so can not " | |
"be log-scaled") | |
ty = np.log10(ty) | |
if extent is not None: | |
xmin, xmax, ymin, ymax = extent | |
else: | |
xmin, xmax = (tx.min(), tx.max()) if len(x) else (0, 1) | |
ymin, ymax = (ty.min(), ty.max()) if len(y) else (0, 1) | |
##### | |
# Start hexagon magic | |
nx1 = nx + 1 | |
ny1 = ny + 1 | |
nx2 = nx | |
ny2 = ny | |
n = nx1 * ny1 + nx2 * ny2 | |
# In the x-direction, the hexagons exactly cover the region from | |
# xmin to xmax. Need some padding to avoid roundoff errors. | |
padding = 1.e-9 * (xmax - xmin) | |
xmin -= padding | |
xmax += padding | |
sx = (xmax - xmin) / nx | |
sy = (ymax - ymin) / ny | |
# Positions in hexagon index coordinates. | |
ix = (tx - xmin) / sx | |
iy = (ty - ymin) / sy | |
ix1 = np.round(ix).astype(int) | |
iy1 = np.round(iy).astype(int) | |
ix2 = np.floor(ix).astype(int) | |
iy2 = np.floor(iy).astype(int) | |
# flat indices, plus one so that out-of-range points go to position 0. | |
i1 = np.where((0 <= ix1) & (ix1 < nx1) & (0 <= iy1) & (iy1 < ny1), | |
ix1 * ny1 + iy1 + 1, 0) | |
i2 = np.where((0 <= ix2) & (ix2 < nx2) & (0 <= iy2) & (iy2 < ny2), | |
ix2 * ny2 + iy2 + 1, 0) | |
d1 = (ix - ix1) ** 2 + 3.0 * (iy - iy1) ** 2 | |
d2 = (ix - ix2 - 0.5) ** 2 + 3.0 * (iy - iy2 - 0.5) ** 2 | |
# Which points are in the 'first set' vs. 'second set'? | |
bdist = (d1 < d2) | |
# This is the index of the hexagon 'number' | |
ihex = i2 + nx1 * ny1 - 1 | |
ihex[bdist] = i1[bdist] - 1 | |
# End hexagon magic | |
##### | |
if C is None: # [1:] drops out-of-range points. | |
counts1 = np.bincount(i1[bdist], minlength=1 + nx1 * ny1)[1:] | |
counts2 = np.bincount(i2[~bdist], minlength=1 + nx2 * ny2)[1:] | |
accum = np.concatenate([counts1, counts2]).astype(float) | |
C = np.ones(len(x)) | |
else: | |
# store the C values in a list per hexagon index | |
Cs_at_i1 = [[] for _ in range(1 + nx1 * ny1)] | |
Cs_at_i2 = [[] for _ in range(1 + nx2 * ny2)] | |
for i in range(len(x)): | |
if bdist[i]: | |
Cs_at_i1[i1[i]].append(C[i]) | |
else: | |
Cs_at_i2[i2[i]].append(C[i]) | |
accum = np.array( | |
[reduce_C_function(acc) if len(acc) > mincnt else np.nan | |
for Cs_at_i in [Cs_at_i1, Cs_at_i2] | |
for acc in Cs_at_i[1:]], # [1:] drops out-of-range points. | |
float) | |
good_idxs = ~np.isnan(accum) | |
offsets = np.zeros((n, 2), float) | |
offsets[:nx1 * ny1, 0] = np.repeat(np.arange(nx1), ny1) | |
offsets[:nx1 * ny1, 1] = np.tile(np.arange(ny1), nx1) | |
offsets[nx1 * ny1:, 0] = np.repeat(np.arange(nx2) + 0.5, ny2) | |
offsets[nx1 * ny1:, 1] = np.tile(np.arange(ny2), nx2) + 0.5 | |
offsets[:, 0] *= sx | |
offsets[:, 1] *= sy | |
offsets[:, 0] += xmin | |
offsets[:, 1] += ymin | |
# remove accumulation bins with no data | |
offsets = offsets[good_idxs, :] | |
accum = accum[good_idxs] | |
cvals = accum[ihex] | |
iorder = slice(None) # No ordering by default | |
if mode == 'mountain': | |
iorder = np.argsort(cvals) | |
elif mode == 'valley': | |
iorder = np.argsort(cvals)[::-1] | |
x = x[iorder] | |
y = y[iorder] | |
cvals = cvals[iorder] | |
return ax.scatter(x, y, | |
s=s, c=cvals, marker=marker, | |
cmap=None, norm=None, vmin=None, vmax=None, | |
**kwargs) | |
if __name__ == '__main__': | |
import matplotlib as mpl | |
mpl.use('TkAgg') | |
plt.ion() | |
randgen = np.random.RandomState(84309242) | |
npoint = 10000 | |
x = randgen.randn(npoint) | |
y = 2 * randgen.randn(npoint) + x | |
extent = [-10, 10, -10, 10] | |
bins = np.linspace(extent[0], extent[1], 50) | |
hexbin_gridsize = [50, 20] | |
fig, axs = plt.subplots(3, 2, figsize=[6, 8], | |
gridspec_kw=dict(hspace=0.3), | |
sharex=True, sharey=True) | |
ax = axs[0, 0] | |
ax.plot(x, y, '.', color='b', ) | |
ax.set_title("Traditional Scatterplot") | |
ax = axs[1, 0] | |
ax.hist2d(x, y, bins=[bins, bins]) | |
ax.set_title("Traditional 2-D Histogram") | |
ax = axs[2, 0] | |
scatter_hist2d(x, y, bins=[ bins, bins], ax=ax, s=5, edgecolor='none') | |
ax.set_title("Scatter histogram combined!") | |
axs[0,1].set_visible(False) | |
ax = axs[1, 1] | |
ax.hexbin(x, y, gridsize=hexbin_gridsize, extent=extent, linewidths=0.2) | |
ax.set_title("A 'hexbin' histogram") | |
ax = axs[2, 1] | |
scatter_hexbin(x, y, gridsize=hexbin_gridsize, extent=extent, ax=ax, s=5, edgecolor='none') | |
ax.set_title("Scatter hexbin combined!") | |
ax.set_xlim([-10,10]) | |
ax.set_ylim([-5,5]) | |
fig.text(0.5, 0.02, "Note: The hexbin (right) doesn't have as much 'striping'\n" | |
"as a rectangular 2-D histogram (left).", ha='center', va='bottom', size='large') | |
fig.savefig('ScatterHist_Example.png', dpi=200) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment