Created
April 4, 2025 05:59
-
-
Save peterk/0488e5349057c8e5c3323864113446d9 to your computer and use it in GitHub Desktop.
MAFS Sankey diagram
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Sankey diagram visualization for Married at First Sight Australia data | |
import pandas as pd | |
import plotly.graph_objects as go | |
import numpy as np | |
# Load the data | |
df = pd.read_csv('mafs.csv', sep=';') | |
# Ensure couples are ordered by couple number | |
df['Couple #'] = pd.to_numeric(df['Couple #']) | |
df = df.sort_values(by='Couple #').reset_index(drop=True) | |
# Get unique values for final decision and status | |
final_decisions = df['Final Decision'].unique() | |
statuses = df['Status'].unique() | |
# Create nodes for each stage | |
couple_nodes = [f"{int(couple)}" for couple in sorted(df['Couple #'].unique())] | |
decision_nodes = list(final_decisions) | |
status_nodes = list(statuses) | |
# Combine all nodes in the right order | |
nodes = couple_nodes + decision_nodes + status_nodes | |
# Create a mapping of node labels to indices | |
node_indices = {node: i for i, node in enumerate(nodes)} | |
# Calculate explicit node coordinates | |
node_x = [] | |
node_y = [] | |
# Assign coordinates for couples (Stage 1) | |
num_couples = len(couple_nodes) | |
for i in range(num_couples): | |
node_x.append(0.01) | |
node_y.append(0.01 + (0.98 * i / (num_couples - 1)) if num_couples > 1 else 0.5) # Distribute vertically | |
# Assign coordinates for decisions (Stage 2) | |
num_decisions = len(decision_nodes) | |
sorted_decisions = sorted(decision_nodes) # Sort alphabetically for consistent order | |
decision_indices_sorted = {node: i for i, node in enumerate(sorted_decisions)} | |
for decision in decision_nodes: # Iterate in original order to match 'nodes' list | |
i = decision_indices_sorted[decision] | |
node_x.append(0.5) | |
node_y.append(0.01 + (0.98 * i / (num_decisions - 1)) if num_decisions > 1 else 0.5) | |
# Assign coordinates for statuses (Stage 3) | |
num_statuses = len(status_nodes) | |
sorted_statuses = sorted(status_nodes) # Sort alphabetically | |
status_indices_sorted = {node: i for i, node in enumerate(sorted_statuses)} | |
for status in status_nodes: # Iterate in original order to match 'nodes' list | |
i = status_indices_sorted[status] | |
node_x.append(0.99) | |
node_y.append(0.01 + (0.98 * i / (num_statuses - 1)) if num_statuses > 1 else 0.5) | |
# Indices for each section | |
couple_indices = {i: node_indices[f"{int(i)}"] for i in df['Couple #'].unique()} | |
decision_indices = {d: node_indices[d] for d in final_decisions} | |
status_indices = {s: node_indices[s] for s in statuses} | |
# Create source, target and value arrays for the Sankey diagram | |
sources = [] | |
targets = [] | |
values = [] | |
# Stage 1 to Stage 2: Individual couples to Final decision | |
for _, row in df.iterrows(): | |
couple = f"{int(row['Couple #'])}" | |
decision = row['Final Decision'] | |
sources.append(node_indices[couple]) | |
targets.append(node_indices[decision]) | |
values.append(1) # Each couple is one unit | |
# Stage 2 to Stage 3: Final decision to Status | |
status_incoming_totals = {node: 0 for node in status_nodes} # Initialize totals | |
for decision in final_decisions: | |
for status in statuses: | |
count = df[(df['Final Decision'] == decision) & (df['Status'] == status)].shape[0] | |
if count > 0: # Only add links with non-zero values | |
sources.append(node_indices[decision]) | |
targets.append(node_indices[status]) | |
values.append(count) | |
status_incoming_totals[status] += count # Accumulate incoming values | |
# Create modified labels including counts for the final stage | |
modified_labels = [] | |
for node in nodes: | |
if node in status_incoming_totals: | |
modified_labels.append(f"{node} ({status_incoming_totals[node]})") | |
else: | |
modified_labels.append(node) | |
# Create the Sankey diagram | |
fig = go.Figure(data=[go.Sankey( | |
node=dict( | |
pad=15, | |
thickness=20, | |
line=dict(color="black", width=0.5), | |
label=modified_labels, # Use modified labels | |
x=node_x, # Add x coordinates | |
y=node_y # Add y coordinates | |
), | |
link=dict( | |
source=sources, | |
target=targets, | |
value=values, | |
color="rgba(217, 217, 217, 0.5)" # Light gray with transparency | |
) | |
)]) | |
# Define stage labels and positions | |
stage_labels = [ | |
dict(x=0.01, y=1.05, text="<b>Couples</b>", showarrow=False, font=dict(size=14), xanchor='center'), | |
dict(x=0.5, y=1.05, text="<b>Final Decision</b>", showarrow=False, font=dict(size=14), xanchor='center'), | |
dict(x=0.99, y=1.05, text="<b>Current Status</b>", showarrow=False, font=dict(size=14), xanchor='center') | |
] | |
# Update the layout | |
fig.update_layout( | |
title_text="Married at First Sight Australia: Couple Journey", | |
font_size=12, | |
width=800 , | |
height=500, | |
annotations=stage_labels # Add annotations here | |
) | |
# Customize node colors | |
node_colors = [] | |
# Stage 1: Colors for individual couples - light blue | |
for _ in range(len(couple_nodes)): | |
node_colors.append("rgba(31, 119, 180, 0.8)") # Blue for all couples | |
# Stage 2: Final decision colors | |
for decision in final_decisions: | |
if decision == "Yes": | |
node_colors.append("rgba(44, 160, 44, 0.8)") # Green for "Yes" | |
else: | |
node_colors.append("rgba(214, 39, 40, 0.8)") # Red for all other decisions | |
# Stage 3: Colors for final status | |
for status in statuses: | |
if status == "Together": | |
node_colors.append("rgba(44, 160, 44, 0.8)") # Green | |
else: | |
node_colors.append("rgba(214, 39, 40, 0.8)") # Red | |
# Update node colors | |
fig.update_traces(node_color=node_colors) | |
# Save the figure | |
fig.write_html("mafs_sankey.html") | |
# Show the figure | |
fig.show() | |
print("Visualization complete! The result has been saved as 'mafs_sankey.html'") |
We can make this file beautiful and searchable if this error is corrected: It looks like row 57 should actually have 1 column, instead of 2 in line 56.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Couple #;Final Decision;Status | |
1;Yes;Separated | |
2;Broke up before final decision;Separated | |
3;Yes;Separated | |
4;Yes;Separated | |
5;Yes;Together | |
6;Yes;Separated | |
7;Broke up before final decision;Separated | |
8;No;Separated | |
9;Yes;Separated | |
10;Yes;Separated | |
11;Broke up before final decision;Separated | |
12;Yes;Separated | |
13;Broke up before final decision;Separated | |
14;No;Separated | |
15;Broke up before final decision;Separated | |
16;Broke up before final decision;Separated | |
17;Yes;Separated | |
18;Yes;Separated | |
19;Yes;Separated | |
20;Broke up before final decision;Separated | |
21;Broke up before final decision;Separated | |
22;No;Separated | |
23;Yes;Separated | |
24;Broke up before final decision;Separated | |
25;Yes;Separated | |
26;No;Separated | |
27;Broke up before final decision;Separated | |
28;Broke up before final decision;Separated | |
29;Broke up before final decision;Separated | |
30;Yes;Separated | |
31;Yes;Separated | |
32;No;Separated | |
33;Broke up before final decision;Separated | |
34;Broke up before final decision;Separated | |
35;Broke up before final decision;Separated | |
36;Yes;Together | |
37;Broke up before final decision;Separated | |
38;Broke up before final decision;Separated | |
39;Broke up before final decision;Separated | |
40;Yes;Separated | |
41;No;Separated | |
42;Broke up before final decision;Separated | |
43;Broke up before final decision;Separated | |
44;Broke up before final decision;Separated | |
45;Yes;Together | |
46;Broke up before final decision;Separated | |
47;Broke up before final decision;Separated | |
48;Yes;Separated | |
49;Broke up before final decision;Separated | |
50;Broke up before final decision;Separated | |
51;Broke up before final decision;Separated | |
52;Broke up before final decision;Separated | |
53;Removed from experiment;Separated | |
54;Broke up before final decision;Separated | |
55;No;Separated | |
56;Left experiment, but reconciled;Separated | |
57;No;Separated | |
58;Yes;Separated | |
59;Yes;Separated | |
60;Yes;Separated | |
61;Yes;Together | |
62;Yes;Separated | |
63;Broke up before final decision;Separated | |
64;Broke up before final decision;Separated | |
65;Broke up before final decision;Separated | |
66;Yes;Separated | |
67;Broke up before final decision;Separated | |
68;Yes;Separated | |
69;Broke up before final decision;Separated | |
70;No;Separated | |
71;Yes;Together | |
72;Broke up before final decision;Separated | |
73;Broke up before final decision;Separated | |
74;No;Separated | |
75;Yes;Separated | |
76;No;Separated | |
77;Broke up before final decision;Separated | |
78;Yes;Separated | |
79;Yes;Separated | |
80;Broke up before final decision;Separated | |
81;Broke up before final decision;Separated | |
82;Broke up before final decision;Separated | |
83;Broke up before final decision;Separated | |
84;No;Separated | |
85;Broke up before final decision;Separated | |
86;Broke up before final decision;Separated | |
87;Broke up before final decision;Separated | |
88;Yes;Separated | |
89;Broke up before final decision;Separated | |
90;Broke up before final decision;Separated | |
91;No;Separated | |
92;Yes;Separated | |
93;Broke up before final decision;Separated | |
94;Broke up before final decision;Separated | |
95;No;Separated | |
96;Yes;Separated | |
97;Broke up before final decision;Separated | |
98;Broke up before final decision;Separated | |
99;Yes;Separated | |
100;Broke up before final decision;Separated | |
101;Yes;Separated | |
102;Broke up before final decision;Separated | |
103;Broke up before final decision;Separated | |
104;No;Separated | |
105;Broke up before final decision;Separated | |
106;Broke up before final decision;Separated | |
107;Yes;Together |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment