Skip to content

Instantly share code, notes, and snippets.

@peterk
Created April 4, 2025 05:59
Show Gist options
  • Save peterk/0488e5349057c8e5c3323864113446d9 to your computer and use it in GitHub Desktop.
Save peterk/0488e5349057c8e5c3323864113446d9 to your computer and use it in GitHub Desktop.
MAFS Sankey diagram
#!/usr/bin/env python3
# Sankey diagram visualization for Married at First Sight Australia data
import pandas as pd
import plotly.graph_objects as go
import numpy as np
# Load the data
df = pd.read_csv('mafs.csv', sep=';')
# Ensure couples are ordered by couple number
df['Couple #'] = pd.to_numeric(df['Couple #'])
df = df.sort_values(by='Couple #').reset_index(drop=True)
# Get unique values for final decision and status
final_decisions = df['Final Decision'].unique()
statuses = df['Status'].unique()
# Create nodes for each stage
couple_nodes = [f"{int(couple)}" for couple in sorted(df['Couple #'].unique())]
decision_nodes = list(final_decisions)
status_nodes = list(statuses)
# Combine all nodes in the right order
nodes = couple_nodes + decision_nodes + status_nodes
# Create a mapping of node labels to indices
node_indices = {node: i for i, node in enumerate(nodes)}
# Calculate explicit node coordinates
node_x = []
node_y = []
# Assign coordinates for couples (Stage 1)
num_couples = len(couple_nodes)
for i in range(num_couples):
node_x.append(0.01)
node_y.append(0.01 + (0.98 * i / (num_couples - 1)) if num_couples > 1 else 0.5) # Distribute vertically
# Assign coordinates for decisions (Stage 2)
num_decisions = len(decision_nodes)
sorted_decisions = sorted(decision_nodes) # Sort alphabetically for consistent order
decision_indices_sorted = {node: i for i, node in enumerate(sorted_decisions)}
for decision in decision_nodes: # Iterate in original order to match 'nodes' list
i = decision_indices_sorted[decision]
node_x.append(0.5)
node_y.append(0.01 + (0.98 * i / (num_decisions - 1)) if num_decisions > 1 else 0.5)
# Assign coordinates for statuses (Stage 3)
num_statuses = len(status_nodes)
sorted_statuses = sorted(status_nodes) # Sort alphabetically
status_indices_sorted = {node: i for i, node in enumerate(sorted_statuses)}
for status in status_nodes: # Iterate in original order to match 'nodes' list
i = status_indices_sorted[status]
node_x.append(0.99)
node_y.append(0.01 + (0.98 * i / (num_statuses - 1)) if num_statuses > 1 else 0.5)
# Indices for each section
couple_indices = {i: node_indices[f"{int(i)}"] for i in df['Couple #'].unique()}
decision_indices = {d: node_indices[d] for d in final_decisions}
status_indices = {s: node_indices[s] for s in statuses}
# Create source, target and value arrays for the Sankey diagram
sources = []
targets = []
values = []
# Stage 1 to Stage 2: Individual couples to Final decision
for _, row in df.iterrows():
couple = f"{int(row['Couple #'])}"
decision = row['Final Decision']
sources.append(node_indices[couple])
targets.append(node_indices[decision])
values.append(1) # Each couple is one unit
# Stage 2 to Stage 3: Final decision to Status
status_incoming_totals = {node: 0 for node in status_nodes} # Initialize totals
for decision in final_decisions:
for status in statuses:
count = df[(df['Final Decision'] == decision) & (df['Status'] == status)].shape[0]
if count > 0: # Only add links with non-zero values
sources.append(node_indices[decision])
targets.append(node_indices[status])
values.append(count)
status_incoming_totals[status] += count # Accumulate incoming values
# Create modified labels including counts for the final stage
modified_labels = []
for node in nodes:
if node in status_incoming_totals:
modified_labels.append(f"{node} ({status_incoming_totals[node]})")
else:
modified_labels.append(node)
# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=modified_labels, # Use modified labels
x=node_x, # Add x coordinates
y=node_y # Add y coordinates
),
link=dict(
source=sources,
target=targets,
value=values,
color="rgba(217, 217, 217, 0.5)" # Light gray with transparency
)
)])
# Define stage labels and positions
stage_labels = [
dict(x=0.01, y=1.05, text="<b>Couples</b>", showarrow=False, font=dict(size=14), xanchor='center'),
dict(x=0.5, y=1.05, text="<b>Final Decision</b>", showarrow=False, font=dict(size=14), xanchor='center'),
dict(x=0.99, y=1.05, text="<b>Current Status</b>", showarrow=False, font=dict(size=14), xanchor='center')
]
# Update the layout
fig.update_layout(
title_text="Married at First Sight Australia: Couple Journey",
font_size=12,
width=800 ,
height=500,
annotations=stage_labels # Add annotations here
)
# Customize node colors
node_colors = []
# Stage 1: Colors for individual couples - light blue
for _ in range(len(couple_nodes)):
node_colors.append("rgba(31, 119, 180, 0.8)") # Blue for all couples
# Stage 2: Final decision colors
for decision in final_decisions:
if decision == "Yes":
node_colors.append("rgba(44, 160, 44, 0.8)") # Green for "Yes"
else:
node_colors.append("rgba(214, 39, 40, 0.8)") # Red for all other decisions
# Stage 3: Colors for final status
for status in statuses:
if status == "Together":
node_colors.append("rgba(44, 160, 44, 0.8)") # Green
else:
node_colors.append("rgba(214, 39, 40, 0.8)") # Red
# Update node colors
fig.update_traces(node_color=node_colors)
# Save the figure
fig.write_html("mafs_sankey.html")
# Show the figure
fig.show()
print("Visualization complete! The result has been saved as 'mafs_sankey.html'")
We can make this file beautiful and searchable if this error is corrected: It looks like row 57 should actually have 1 column, instead of 2 in line 56.
Couple #;Final Decision;Status
1;Yes;Separated
2;Broke up before final decision;Separated
3;Yes;Separated
4;Yes;Separated
5;Yes;Together
6;Yes;Separated
7;Broke up before final decision;Separated
8;No;Separated
9;Yes;Separated
10;Yes;Separated
11;Broke up before final decision;Separated
12;Yes;Separated
13;Broke up before final decision;Separated
14;No;Separated
15;Broke up before final decision;Separated
16;Broke up before final decision;Separated
17;Yes;Separated
18;Yes;Separated
19;Yes;Separated
20;Broke up before final decision;Separated
21;Broke up before final decision;Separated
22;No;Separated
23;Yes;Separated
24;Broke up before final decision;Separated
25;Yes;Separated
26;No;Separated
27;Broke up before final decision;Separated
28;Broke up before final decision;Separated
29;Broke up before final decision;Separated
30;Yes;Separated
31;Yes;Separated
32;No;Separated
33;Broke up before final decision;Separated
34;Broke up before final decision;Separated
35;Broke up before final decision;Separated
36;Yes;Together
37;Broke up before final decision;Separated
38;Broke up before final decision;Separated
39;Broke up before final decision;Separated
40;Yes;Separated
41;No;Separated
42;Broke up before final decision;Separated
43;Broke up before final decision;Separated
44;Broke up before final decision;Separated
45;Yes;Together
46;Broke up before final decision;Separated
47;Broke up before final decision;Separated
48;Yes;Separated
49;Broke up before final decision;Separated
50;Broke up before final decision;Separated
51;Broke up before final decision;Separated
52;Broke up before final decision;Separated
53;Removed from experiment;Separated
54;Broke up before final decision;Separated
55;No;Separated
56;Left experiment, but reconciled;Separated
57;No;Separated
58;Yes;Separated
59;Yes;Separated
60;Yes;Separated
61;Yes;Together
62;Yes;Separated
63;Broke up before final decision;Separated
64;Broke up before final decision;Separated
65;Broke up before final decision;Separated
66;Yes;Separated
67;Broke up before final decision;Separated
68;Yes;Separated
69;Broke up before final decision;Separated
70;No;Separated
71;Yes;Together
72;Broke up before final decision;Separated
73;Broke up before final decision;Separated
74;No;Separated
75;Yes;Separated
76;No;Separated
77;Broke up before final decision;Separated
78;Yes;Separated
79;Yes;Separated
80;Broke up before final decision;Separated
81;Broke up before final decision;Separated
82;Broke up before final decision;Separated
83;Broke up before final decision;Separated
84;No;Separated
85;Broke up before final decision;Separated
86;Broke up before final decision;Separated
87;Broke up before final decision;Separated
88;Yes;Separated
89;Broke up before final decision;Separated
90;Broke up before final decision;Separated
91;No;Separated
92;Yes;Separated
93;Broke up before final decision;Separated
94;Broke up before final decision;Separated
95;No;Separated
96;Yes;Separated
97;Broke up before final decision;Separated
98;Broke up before final decision;Separated
99;Yes;Separated
100;Broke up before final decision;Separated
101;Yes;Separated
102;Broke up before final decision;Separated
103;Broke up before final decision;Separated
104;No;Separated
105;Broke up before final decision;Separated
106;Broke up before final decision;Separated
107;Yes;Together
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment