Last active
February 3, 2023 15:51
-
-
Save jquacinella/1a6341f0f1446973714c to your computer and use it in GitHub Desktop.
Violin Plots for Weighted Data in Matplotlib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import weighted | |
from matplotlib.cbook import violin_stats | |
from scipy import stats | |
import statsmodels.api as sm | |
def vdensity_with_weights(weights): | |
''' Outer function allows innder function access to weights. Matplotlib | |
needs function to take in data and coords, so this seems like only way | |
to 'pass' custom density function a set of weights ''' | |
def vdensity(data, coords): | |
''' Custom matplotlib weighted violin stats function ''' | |
# Using weights from closure, get KDE fomr statsmodels | |
weighted_cost = sm.nonparametric.KDEUnivariate(data) | |
weighted_cost.fit(fft=False, weights=weights) | |
# Return y-values for graph of KDE by evaluating on coords | |
return weighted_cost.evaluate(coords) | |
return vdensity | |
def custom_violin_stats(data, weights): | |
# Get weighted median and mean (using weighted module for median) | |
median = weighted.quantile_1D(data, weights, 0.5) | |
mean, sumw = np.ma.average(data, weights=list(weights), returned=True) | |
# Use matplotlib violin_stats, which expects a function that takes in data and coords | |
# which we get from closure above | |
results = violin_stats(data, vdensity_with_weights(weights)) | |
# Update result dictionary with our updated info | |
results[0][u"mean"] = mean | |
results[0][u"median"] = median | |
# No need to do this, since it should be populated from violin_stats | |
# results[0][u"min"] = np.min(data) | |
# results[0][u"max"] = np.max(data) | |
return results | |
### Example | |
#vpstats1 = custom_violin_stats(np.asarray(df_column_data), np.asarray(df_column_weights)) | |
#vplot = ax.violin(vpstats1, [pos_idx], vert=False, showmeans=True, showextrema=True, showmedians=True) | |
#current_color_palette = ... | |
#for pc in vplot['bodies']: | |
# pc.set_facecolor(current_color_palette[pos_idx]) | |
# pc.set_edgecolor('black') |
Thanks for this! I added a couple pieces to make this fully runnable, you can see it here: https://colab.research.google.com/drive/1cSnJGKJEqbllkPbF2z0cnfdwT40sUKKR
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello!
It looks like you use the
np
module here (I assume that this is an alias fornumpy
) but you never import it. Am i right?