Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save melvinwevers/75288399ddbac765623422d0b2097215 to your computer and use it in GitHub Desktop.
Save melvinwevers/75288399ddbac765623422d0b2097215 to your computer and use it in GitHub Desktop.
Modularity Effect Sizes
#!/usr/bin/env python3
import csv
from datetime import datetime
from cliffs_delta import cliffs_delta
from scipy.stats import mannwhitneyu
def interpret_cliffs_delta(delta):
"""Interpret Cliff's delta based on Meissel & Yao (2024) thresholds"""
abs_delta = abs(delta)
if abs_delta < 0.15:
return "negligible"
elif abs_delta < 0.33:
return "small"
elif abs_delta < 0.47:
return "medium"
else:
return "large"
def calculate_cliffs_delta(x1, x2):
"""Calculate Cliff's delta (non-parametric effect size)"""
delta_value, _ = cliffs_delta(x1, x2) # Ignore library interpretation
return delta_value, interpret_cliffs_delta(delta_value)
def mann_whitney_u(x1, x2):
"""Calculate Mann-Whitney U statistic and p-value using scipy"""
statistic, p_value = mannwhitneyu(x1, x2, alternative="two-sided")
return statistic, p_value
def analyze_modularity_nonparametric(csv_file):
"""Analyze modularity with non-parametric methods"""
data = []
with open(csv_file, "r") as f:
reader = csv.reader(f)
for row in reader:
if len(row) >= 3:
try:
date = datetime.strptime(row[0], "%Y-%m-%d")
modularity = float(row[2])
data.append((date, modularity))
except (ValueError, IndexError):
continue
data.sort(key=lambda x: x[0])
modularities = [mod for _, mod in data]
# Group by decades
decade_data = {}
for date, mod in data:
decade = (date.year // 10) * 10
if decade not in decade_data:
decade_data[decade] = []
decade_data[decade].append(mod)
decades = sorted(decade_data.keys())
for i in range(len(decades) - 1):
decade1 = decades[i]
decade2 = decades[i + 1]
mod1 = decade_data[decade1] # Earlier period
mod2 = decade_data[decade2] # Later period
# Cliff's delta
cliffs_delta, cliff_interp = calculate_cliffs_delta(mod2, mod1)
# Mann-Whitney U
u_stat, p_value = mann_whitney_u(mod1, mod2)
# Medians
mod1_sorted = sorted(mod1)
mod2_sorted = sorted(mod2)
median1 = (
mod1_sorted[len(mod1) // 2]
if len(mod1) % 2 == 1
else (mod1_sorted[len(mod1) // 2 - 1] + mod1_sorted[len(mod1) // 2]) / 2
)
median2 = (
mod2_sorted[len(mod2) // 2]
if len(mod2) % 2 == 1
else (mod2_sorted[len(mod2) // 2 - 1] + mod2_sorted[len(mod2) // 2]) / 2
)
print(f"{decade1}s → {decade2}s (n₁={len(mod1)}, n₂={len(mod2)}):")
print(f" Median {decade1}s: {median1:.4f}")
print(f" Median {decade2}s: {median2:.4f}")
print(f" Cliff's δ: {cliffs_delta:7.4f} ({cliff_interp})")
print(f" Mann-Whitney U: {u_stat:7.1f}")
if p_value is not None:
significance = (
"***"
if p_value < 0.001
else "**" if p_value < 0.01 else "*" if p_value < 0.05 else "ns"
)
print(f" p-value (approx): {p_value:7.4f} {significance}")
else:
print(f" p-value: [small sample - exact test needed]")
print(
f" Direction: {'increase' if cliffs_delta > 0 else 'decrease'}"
)
print()
# Overall comparison
midpoint = len(modularities) // 2
first_half = modularities[:midpoint]
second_half = modularities[midpoint:]
cliffs_delta_overall, cliff_interp_overall = calculate_cliffs_delta(
second_half, first_half
)
u_stat_overall, p_value_overall = mann_whitney_u(first_half, second_half)
# Calculate medians
first_sorted = sorted(first_half)
second_sorted = sorted(second_half)
median_first = (
first_sorted[len(first_half) // 2]
if len(first_half) % 2 == 1
else (
first_sorted[len(first_half) // 2 - 1] + first_sorted[len(first_half) // 2]
)
/ 2
)
median_second = (
second_sorted[len(second_half) // 2]
if len(second_half) % 2 == 1
else (
second_sorted[len(second_half) // 2 - 1]
+ second_sorted[len(second_half) // 2]
)
/ 2
)
print("=== Interpretation Guide ===")
# Meissel & Yao (2024). Practical Assessment, Research & Evaluation, Vol 29 No 2
print("Cliff's δ magnitude: |δ| < 0.15 (negligible), 0.15-0.33 (small),")
print(" 0.33-0.47 (medium), ≥ 0.47 (large)")
print("Reference: Meissel & Yao (2024), equivalent to Cohen's d thresholds")
print("Significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant")
if __name__ == "__main__":
analyze_modularity_nonparametric("mod-dens-diachronic.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment