Skip to content

Instantly share code, notes, and snippets.

@khangvan
Created June 23, 2020 06:17
Show Gist options
  • Save khangvan/a32c8bb1062cfa7ade8867a7d569bd99 to your computer and use it in GitHub Desktop.
Save khangvan/a32c8bb1062cfa7ade8867a7d569bd99 to your computer and use it in GitHub Desktop.
sidetable_pareto
# Refer code develop by Tyler Marrs
# https://tylermarrs.com/posts/pareto-plot-with-matplotlib/
def pareto_plot(df, x, y="Count", title=None, show_pct_y=True, pct_format='{0:.0%}'):
xlabel = x
ylabel = y
df=df.stb.freq([x]) # use sidetable
if len(x) >1:
df['MergCol'] = df[df.columns[:2]].apply(
lambda x: ','.join(x.astype(str)),
axis=1)
tmp = df.sort_values(y, ascending=False)
x = tmp[x].values
y = tmp[y].values
# weights = y / y.sum()
# cumsum = weights.cumsum()
cumsum=tmp["Cumulative Percent"].values
fig, ax1 = plt.subplots()
ax1.bar(x, y)
ax1.set_xlabel(xlabel)
ax1.set_ylabel(ylabel)
ax2 = ax1.twinx()
ax2.plot(x, cumsum, '-ro', alpha=0.5)
ax2.set_ylabel('', color='r')
ax2.tick_params('y', colors='r')
vals = ax2.get_yticks()
ax2.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
# hide y-labels on right side
if not show_pct_y:
ax2.set_yticks([])
formatted_weights = [pct_format.format(x) for x in cumsum]
for i, txt in enumerate(formatted_weights):
ax2.annotate(txt, (x[i], cumsum[i]), fontweight='heavy')
if title:
plt.title(title)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
import sidetable as stb
df=sns.load_dataset("diamonds")
pareto_plot(df,x="cut",title="Cost value by Diamond types")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment