Created
April 9, 2025 10:53
-
-
Save 1504168/5fc35ddd09e1c4c7e5238291528f1af1 to your computer and use it in GitHub Desktop.
Stacked Bar Chart Alternative
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
def segmented_total_bar_chart( | |
df: pd.DataFrame, | |
group_col: str, | |
sub_group_col: str, | |
value_col: str, | |
left_margin=0.03, | |
sub_group_width=0.25, | |
distance=0.05, | |
right_margin=0.03, | |
data_label_offset=1000, | |
figsize=(10, 6), | |
title="Grouped Data by Sub-Group", | |
title_pad=40, | |
title_loc='left', | |
title_x=0.04, | |
legend_offset=0.1 | |
): | |
""" | |
Create a segmented total bar chart with dynamic spacing and labels. It is better alternative to the stacked bar chart. | |
This chart allows for better readability and understanding of the data distribution across different segments. | |
The chart displays total values for each group and segments within each group, with dynamic spacing and labels. | |
The chart is customizable with various parameters for margins, widths, colors, and labels. | |
Credit Reference: https://www.flerlagetwins.com/2025/04/the-best-alternative-to-stacked-bar.html | |
:param df: DataFrame containing the data to be plotted | |
:param group_col: Column name for the main grouping variable | |
:param sub_group_col: Column name for the sub-grouping variable | |
:param value_col: Column name for the values to be plotted | |
:param left_margin: each sub-group's left margin | |
:param sub_group_width: width of each sub-group | |
:param distance: distance between two sub-groups in the same group | |
:param right_margin: each sub-group's right margin | |
:param data_label_offset: offset for data labels above the bars | |
:param figsize: size of the figure | |
:param title: title of the chart | |
:param title_pad: padding for the title | |
:param title_loc: location of the title ('left', 'center', 'right') | |
:param title_x: x-coordinate for the title position | |
:param legend_offset: offset for the legend text | |
:return: None | |
""" | |
# Compute total values per group and sort | |
total_values = df.groupby(group_col)[value_col].sum().sort_values(ascending=False) | |
# Pivot DataFrame to get sub-group values by group and sort | |
sub_group_values = df.pivot(index=group_col, columns=sub_group_col, values=value_col).fillna(0).loc[ | |
total_values.index] | |
# Define spacing parameters | |
n_sub_groups = len(sub_group_values.columns) # Number of sub-group types | |
white_bar_width = (n_sub_groups - 1) * distance + n_sub_groups * sub_group_width + left_margin + right_margin | |
# Plot | |
fig, ax = plt.subplots(figsize=figsize) | |
# Adjust subplot parameters to fit the figure. Experimental values. | |
plt.subplots_adjust(left=0.01, right=0.99, top=0.875, bottom=0.04) | |
# Positioning for each group | |
groups = sub_group_values.index | |
x_positions = np.arange(len(groups)) | |
# Plot total values (background gray bar) using calculated width | |
ax.bar(x_positions, total_values[groups], color='lightgray', width=white_bar_width) | |
# Plot sub-group bars dynamically with spacing | |
colors = {sub_group: color for sub_group, color in | |
zip(sub_group_values.columns, plt.cm.tab10.colors[:n_sub_groups])} | |
for i, sub_group in enumerate(colors.keys()): | |
offset = left_margin + i * (sub_group_width + distance) + (sub_group_width / 2) - (white_bar_width / 2) | |
ax.bar(x_positions + offset, sub_group_values[sub_group], color=colors.get(sub_group, 'gray'), | |
width=sub_group_width) | |
# Add sub-group labels above each bar with matching color | |
for x, y in zip(x_positions, sub_group_values[sub_group]): | |
ax.text(x + offset, y + data_label_offset, f'${y // 1000}k', ha='center') | |
# Add total labels on top (adjusted position) | |
for x, y, group in zip(x_positions, total_values, groups): | |
ax.text(x, y + data_label_offset, f'{group}\n${y // 1000}k', ha='center', va='bottom') | |
# Labels & Formatting | |
ax.set_title(title, pad=title_pad, loc=title_loc, x=title_x, color='gray', fontweight='bold', fontsize=14) | |
ax.axis('off') # Hide x-axis ticks and labels | |
# Horizontal Legend with Colored Text (Top Left) | |
for i, text in enumerate(colors.keys()): | |
ax.text(title_x + (i * legend_offset), 1.06, text, transform=ax.transAxes, fontweight='bold', | |
color=colors[text]) | |
plt.show() | |
data = { | |
'Region': ['West', 'West', 'West', 'East', 'East', 'East', 'Central', 'Central', 'Central', 'South', 'South', | |
'South'], | |
'Segment Type': ['Consumer', 'Corporate', 'Home Office', 'Consumer', 'Corporate', 'Home Office', 'Consumer', | |
'Corporate', 'Home Office', 'Consumer', 'Corporate', 'Home Office'], | |
'Sales': [364000, 232000, 143000, 357000, 204000, 131000, 254000, 158000, 91000, 196000, 122000, 74000], | |
} | |
df = pd.DataFrame(data) | |
segmented_total_bar_chart( | |
df, | |
group_col='Region', | |
sub_group_col='Segment Type', | |
value_col='Sales', | |
title="Regions-Revenue by Segment", | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment