Last active
March 23, 2026 18:20
-
-
Save cavedave/779193b08673c735f2f0a9e048e0f79f to your computer and use it in GitHub Desktop.
Skittles charts: fun-size (Clare Wallace Excel) + full-size packs (possibly-wrong/skittles). https://www.clarewallace.co.uk/skittles | https://github.com/possibly-wrong/skittles | https://github.com/cavedave/skittles
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Skittles Analysis — Possibly Wrong dataset | |
| ------------------------------------------- | |
| Analyzes 468 full-size Skittles packs from the possibly-wrong dataset. | |
| Same chart style as the Clare Wallace fun-size analysis. | |
| Source: https://github.com/possibly-wrong/skittles | |
| Usage: | |
| pip install pandas matplotlib | |
| python skittles_possiblywrong_analysis.py | |
| Sort modes (change SORT_MODE below): | |
| total_then_rainbow — sort by pack size, then R→O→Y→G→P (chart v4) | |
| rainbow_count — sort by red, then orange, … (chart v3) | |
| """ | |
| import pandas as pd | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| from collections import Counter | |
| # Recreate v4 (recommended) or v3: change this and re-run. | |
| SORT_MODE = "total_then_rainbow" # or "rainbow_count" | |
| # ── 1. LOAD ─────────────────────────────────────────────────────────────────── | |
| DATA_FILE = "skittles_possiblywrong.txt" | |
| df = pd.read_csv(DATA_FILE, sep="\t") | |
| df = df.drop(columns=["Uncounted"]) | |
| df.columns = ["red", "orange", "yellow", "green", "purple"] | |
| print(f"Loaded: {len(df)} packs") | |
| # ── 2. CLEAN & STATS ───────────────────────────────────────────────────────── | |
| totals = df.sum(axis=1) | |
| print(f"Before cleaning: min={totals.min()}, max={totals.max()}, mean={totals.mean():.1f}") | |
| MIN_SKITTLES = 54 | |
| MAX_SKITTLES = 65 | |
| outliers = df[~totals.between(MIN_SKITTLES, MAX_SKITTLES)] | |
| df = df[totals.between(MIN_SKITTLES, MAX_SKITTLES)].reset_index(drop=True) | |
| print(f"Removed {len(outliers)} outlier packs (totals: {sorted(outliers.sum(axis=1).tolist())})") | |
| totals = df.sum(axis=1) | |
| print(f"Clean dataset: {len(df)} packs, min={totals.min()}, max={totals.max()}, mean={totals.mean():.1f}") | |
| # ── 3. PREPARE ──────────────────────────────────────────────────────────────── | |
| COLORS = ["red", "orange", "yellow", "green", "purple"] | |
| COLOR_HEX = { | |
| "red": "#c0043f", | |
| "orange": "#e64808", | |
| "yellow": "#f1be02", | |
| "green": "#048207", | |
| "purple": "#441349", | |
| } | |
| color_order = ["red", "orange", "yellow", "green", "purple"] | |
| print(f"Colour order (rainbow): {color_order}") | |
| print(f"Sort mode: {SORT_MODE}") | |
| if SORT_MODE == "total_then_rainbow": | |
| sorted_indices = sorted( | |
| range(len(df)), | |
| key=lambda i: [df.loc[i, color_order].sum()] + [df.loc[i, c] for c in color_order], | |
| reverse=True, | |
| ) | |
| chart_version = "v4" | |
| elif SORT_MODE == "rainbow_count": | |
| sorted_indices = sorted( | |
| range(len(df)), | |
| key=lambda i: [df.loc[i, c] for c in color_order], | |
| reverse=True, | |
| ) | |
| chart_version = "v3" | |
| else: | |
| raise ValueError(f"Unknown SORT_MODE: {SORT_MODE!r} (use total_then_rainbow or rainbow_count)") | |
| df_sorted = df.iloc[sorted_indices].reset_index(drop=True) | |
| bag_tuples = [tuple(row) for row in df_sorted[color_order].values.tolist()] | |
| counts = Counter(bag_tuples) | |
| is_duplicate = [counts[t] > 1 for t in bag_tuples] | |
| n_dupes = sum(is_duplicate) | |
| n_unique = len(df_sorted) - n_dupes | |
| print(f"Duplicates: {n_dupes} of {len(df_sorted)} packs") | |
| # ── 4. PLOT ─────────────────────────────────────────────────────────────────── | |
| n_bags = len(df_sorted) | |
| bag_to_group = {} | |
| group_id = 0 | |
| tuple_to_group = {} | |
| for i, t in enumerate(bag_tuples): | |
| if counts[t] > 1: | |
| if t not in tuple_to_group: | |
| tuple_to_group[t] = group_id | |
| group_id += 1 | |
| bag_to_group[i] = tuple_to_group[t] | |
| DUP_MARKER_COLORS = [ | |
| "#000000", "#e60000", "#0066cc", "#ff9900", "#00aa44", | |
| "#9933cc", "#cc0066", "#006666", "#996600", "#3366ff", | |
| ] | |
| fig, ax = plt.subplots(figsize=(14, 24)) | |
| plt.subplots_adjust(top=0.93) | |
| for i, (_, row) in enumerate(df_sorted.iterrows()): | |
| x_start = 0 | |
| for color in color_order: | |
| count = row[color] | |
| if count > 0: | |
| ax.barh(i, count, left=x_start, height=1.0, | |
| color=COLOR_HEX[color], edgecolor="none") | |
| x_start += count | |
| if is_duplicate[i]: | |
| gid = bag_to_group[i] | |
| marker_color = DUP_MARKER_COLORS[gid % len(DUP_MARKER_COLORS)] | |
| ax.plot(-1.0, i, "s", color=marker_color, markersize=6, | |
| markeredgecolor="white", markeredgewidth=0.3, zorder=5) | |
| for i in range(1, n_bags): | |
| ax.axhline(y=i - 0.5, color="white", linewidth=0.3, zorder=4) | |
| ax.set_yticks([]) | |
| max_skittles = int(df_sorted[color_order].sum(axis=1).max()) | |
| ax.set_xlim(-2.0, max_skittles + 0.5) | |
| ax.set_xticks(range(0, max_skittles + 1, 5)) | |
| ax.set_ylim(-0.5, n_bags - 0.5) | |
| ax.set_xlabel("Skittles per pack", fontsize=12) | |
| ax.spines["top"].set_visible(False) | |
| ax.spines["right"].set_visible(False) | |
| ax.spines["left"].set_visible(False) | |
| fig.text(0.5, 0.965, "Colors in Full-Size Packs of Skittles", | |
| ha="center", va="center", fontsize=22, fontweight="bold") | |
| fig.text(0.5, 0.943, | |
| f'{n_bags} packs (~60 Skittles each) \u2014 only {n_dupes} are identical to another', | |
| ha="center", va="center", fontsize=14, fontstyle="italic", color="#333333") | |
| ax.text(0.02, 0.02, | |
| f"\u25a0 = duplicate pack ({n_dupes} of {n_bags})\n" | |
| f" matching packs share a colour", | |
| transform=ax.transAxes, fontsize=9, ha="left", va="bottom", | |
| bbox=dict(boxstyle="round,pad=0.4", facecolor="white", edgecolor="#cccccc")) | |
| ax.text(max_skittles + 0.5, -n_bags * 0.03, | |
| "Data possibly-wrong \u2022 Graph by @iamreddave", | |
| ha="right", va="top", | |
| fontsize=9, style="italic", color="gray", clip_on=False) | |
| OUTPUT_FILE = f"skittles_possiblywrong_chart_{chart_version}.png" | |
| plt.savefig(OUTPUT_FILE, dpi=300, bbox_inches="tight", facecolor="white") | |
| print(f"Chart saved to {OUTPUT_FILE}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Skittles Analysis (fun-size) | |
| ----------------------------- | |
| Loads Clare Wallace's Excel data, cleans outliers, and plots stacked bars: | |
| • Colors left→right: red, orange, yellow, green, purple (rainbow / ROYGBV) | |
| • Rows sorted by total Skittles per bag (largest first), then by those counts | |
| Usage: | |
| pip install pandas openpyxl matplotlib | |
| python skittles_analysis.py | |
| """ | |
| import pandas as pd | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| from collections import Counter | |
| # ── 1. LOAD ─────────────────────────────────────────────────────────────────── | |
| EXCEL_FILE = "skittles-megalist-3-6-25.xlsx" | |
| df = pd.read_excel(EXCEL_FILE, sheet_name="Basic data") | |
| df = df.drop(columns=["Unnamed: 0", "Matches?"]) | |
| df.columns = ["red", "orange", "yellow", "green", "purple"] | |
| df = df.dropna(how="all").fillna(0).astype(int) | |
| print(f"Loaded: {len(df)} bags") | |
| # ── 2. CLEAN ────────────────────────────────────────────────────────────────── | |
| # Keep only bags with 15–18 skittles (standard fun-size range). | |
| # Outliers are consistent with data-entry errors, e.g. "26" instead of "2" + "6". | |
| MIN_SKITTLES = 15 | |
| MAX_SKITTLES = 18 | |
| totals = df.sum(axis=1) | |
| outliers = df[~totals.between(MIN_SKITTLES, MAX_SKITTLES)] | |
| df = df[totals.between(MIN_SKITTLES, MAX_SKITTLES)].reset_index(drop=True) | |
| print(f"Removed {len(outliers)} outlier bags (totals: {sorted(outliers.sum(axis=1).tolist())})") | |
| print(f"Clean dataset: {len(df)} bags") | |
| # ── 3. PREPARE ──────────────────────────────────────────────────────────────── | |
| COLORS = ["red", "orange", "yellow", "green", "purple"] | |
| # COLOR_HEX = { | |
| # "red": "#e60000", | |
| # "orange": "#ff9900", | |
| # "yellow": "#ffff00", | |
| # "green": "#00ff00", | |
| # "purple": "#660066", | |
| # } | |
| COLOR_HEX = { | |
| "red": "#c0043f", | |
| "orange": "#e64808", | |
| "yellow": "#f1be02", | |
| "green": "#048207", | |
| "purple": "#441349", | |
| } | |
| # Stack and tie-break order: rainbow (no blue in Skittles → purple as violet) | |
| color_order = ["red", "orange", "yellow", "green", "purple"] | |
| print(f"Colour order (rainbow): {color_order}") | |
| # Sort bags: total Skittles first (largest at top), then red, orange, yellow, green, purple | |
| sorted_indices = sorted( | |
| range(len(df)), | |
| key=lambda i: [df.loc[i, color_order].sum()] + [df.loc[i, c] for c in color_order], | |
| reverse=True, | |
| ) | |
| df_sorted = df.iloc[sorted_indices].reset_index(drop=True) | |
| # Find duplicates | |
| bag_tuples = [tuple(row) for row in df_sorted[color_order].values.tolist()] | |
| counts = Counter(bag_tuples) | |
| is_duplicate = [counts[t] > 1 for t in bag_tuples] | |
| n_dupes = sum(is_duplicate) | |
| n_unique = len(df_sorted) - n_dupes | |
| # ── 4. PLOT (stacked horizontal bars, v3 style improved) ───────────────────── | |
| n_bags = len(df_sorted) | |
| # Assign a duplicate group ID so matching bags can be linked | |
| bag_to_group = {} | |
| group_id = 0 | |
| tuple_to_group = {} | |
| for i, t in enumerate(bag_tuples): | |
| if counts[t] > 1: | |
| if t not in tuple_to_group: | |
| tuple_to_group[t] = group_id | |
| group_id += 1 | |
| bag_to_group[i] = tuple_to_group[t] | |
| DUP_MARKER_COLORS = [ | |
| "#000000", "#e60000", "#0066cc", "#ff9900", "#00aa44", | |
| "#9933cc", "#cc0066", "#006666", "#996600", "#3366ff", | |
| ] | |
| fig, ax = plt.subplots(figsize=(14, 22)) | |
| plt.subplots_adjust(top=0.93) | |
| for i, (_, row) in enumerate(df_sorted.iterrows()): | |
| x_start = 0 | |
| for color in color_order: | |
| count = row[color] | |
| if count > 0: | |
| ax.barh(i, count, left=x_start, height=1.0, | |
| color=COLOR_HEX[color], edgecolor="none") | |
| x_start += count | |
| if is_duplicate[i]: | |
| gid = bag_to_group[i] | |
| marker_color = DUP_MARKER_COLORS[gid % len(DUP_MARKER_COLORS)] | |
| ax.plot(-0.5, i, "s", color=marker_color, markersize=6, | |
| markeredgecolor="white", markeredgewidth=0.3, zorder=5) | |
| for i in range(1, n_bags): | |
| ax.axhline(y=i - 0.5, color="white", linewidth=0.3, zorder=4) | |
| ax.set_yticks([]) | |
| max_skittles = int(df_sorted[color_order].sum(axis=1).max()) | |
| ax.set_xlim(-1.0, max_skittles + 0.5) | |
| ax.set_xticks(range(0, max_skittles + 1)) | |
| ax.set_ylim(-0.5, n_bags - 0.5) | |
| ax.set_xlabel("Skittles per bag", fontsize=12) | |
| ax.spines["top"].set_visible(False) | |
| ax.spines["right"].set_visible(False) | |
| ax.spines["left"].set_visible(False) | |
| fig.text(0.5, 0.965, "Fun-size Skittles: every bag, rainbow order", | |
| ha="center", va="center", fontsize=22, fontweight="bold") | |
| fig.text(0.5, 0.943, | |
| f"Sorted by Skittles per bag (then R\u2192O\u2192Y\u2192G\u2192P) \u2022 " | |
| f'{n_dupes} of {n_bags} bags identical to another \u2022 "No two Rainbows are the same"', | |
| ha="center", va="center", fontsize=12, fontstyle="italic", color="#333333") | |
| ax.text(0.02, 0.02, | |
| f"\u25a0 = duplicate bag ({n_dupes} of {n_bags})\n" | |
| f" matching bags share a colour", | |
| transform=ax.transAxes, fontsize=9, ha="left", va="bottom", | |
| bbox=dict(boxstyle="round,pad=0.4", facecolor="white", edgecolor="#cccccc")) | |
| ax.text(max_skittles + 0.5, -n_bags * 0.03, | |
| "Data Clare Wallace \u2022 Graph by @iamreddave", | |
| ha="right", va="top", | |
| fontsize=9, style="italic", color="gray", clip_on=False) | |
| OUTPUT_FILE = "skittles_chart_v10.png" | |
| plt.savefig(OUTPUT_FILE, dpi=300, bbox_inches="tight", facecolor="white") | |
| print(f"Chart saved to {OUTPUT_FILE}") |
Author
cavedave
commented
Mar 15, 2026
Author
Author
Author
Author
You could graph summary values like Per-Color Totals Color Total Mean/bag Median Std Dev Min Max Orange 1,411 3.37 3.0 1.59 0 9 Red 1,386 3.31 3.0 1.78 0 10 Purple 1,392 3.32 3.0 1.60 0 8 Green 1,375 3.28 3.0 1.59 0 8 Yellow 1,322 3.16 3.0 1.66 0 8 Per-Color Totals Big bags Color Total Mean/pack Median Std Dev Min Max Yellow (Lemon) 5,663 12.15 12.0 3.24 2 24 Purple (Grape) 5,648 12.12 12.0 3.36 3 24 Red (Strawberry) 5,559 11.93 12.0 3.23 3 22 Orange 5,483 11.77 12.0 3.21 2 22 Green (Apple) 5,269 11.31 11.0 3.21 2 22 but I didn't
Author
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment




