Created
August 5, 2025 18:27
-
-
Save melvinwevers/75288399ddbac765623422d0b2097215 to your computer and use it in GitHub Desktop.
Modularity Effect Sizes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import csv | |
from datetime import datetime | |
from cliffs_delta import cliffs_delta | |
from scipy.stats import mannwhitneyu | |
def interpret_cliffs_delta(delta): | |
"""Interpret Cliff's delta based on Meissel & Yao (2024) thresholds""" | |
abs_delta = abs(delta) | |
if abs_delta < 0.15: | |
return "negligible" | |
elif abs_delta < 0.33: | |
return "small" | |
elif abs_delta < 0.47: | |
return "medium" | |
else: | |
return "large" | |
def calculate_cliffs_delta(x1, x2): | |
"""Calculate Cliff's delta (non-parametric effect size)""" | |
delta_value, _ = cliffs_delta(x1, x2) # Ignore library interpretation | |
return delta_value, interpret_cliffs_delta(delta_value) | |
def mann_whitney_u(x1, x2): | |
"""Calculate Mann-Whitney U statistic and p-value using scipy""" | |
statistic, p_value = mannwhitneyu(x1, x2, alternative="two-sided") | |
return statistic, p_value | |
def analyze_modularity_nonparametric(csv_file): | |
"""Analyze modularity with non-parametric methods""" | |
data = [] | |
with open(csv_file, "r") as f: | |
reader = csv.reader(f) | |
for row in reader: | |
if len(row) >= 3: | |
try: | |
date = datetime.strptime(row[0], "%Y-%m-%d") | |
modularity = float(row[2]) | |
data.append((date, modularity)) | |
except (ValueError, IndexError): | |
continue | |
data.sort(key=lambda x: x[0]) | |
modularities = [mod for _, mod in data] | |
# Group by decades | |
decade_data = {} | |
for date, mod in data: | |
decade = (date.year // 10) * 10 | |
if decade not in decade_data: | |
decade_data[decade] = [] | |
decade_data[decade].append(mod) | |
decades = sorted(decade_data.keys()) | |
for i in range(len(decades) - 1): | |
decade1 = decades[i] | |
decade2 = decades[i + 1] | |
mod1 = decade_data[decade1] # Earlier period | |
mod2 = decade_data[decade2] # Later period | |
# Cliff's delta | |
cliffs_delta, cliff_interp = calculate_cliffs_delta(mod2, mod1) | |
# Mann-Whitney U | |
u_stat, p_value = mann_whitney_u(mod1, mod2) | |
# Medians | |
mod1_sorted = sorted(mod1) | |
mod2_sorted = sorted(mod2) | |
median1 = ( | |
mod1_sorted[len(mod1) // 2] | |
if len(mod1) % 2 == 1 | |
else (mod1_sorted[len(mod1) // 2 - 1] + mod1_sorted[len(mod1) // 2]) / 2 | |
) | |
median2 = ( | |
mod2_sorted[len(mod2) // 2] | |
if len(mod2) % 2 == 1 | |
else (mod2_sorted[len(mod2) // 2 - 1] + mod2_sorted[len(mod2) // 2]) / 2 | |
) | |
print(f"{decade1}s → {decade2}s (n₁={len(mod1)}, n₂={len(mod2)}):") | |
print(f" Median {decade1}s: {median1:.4f}") | |
print(f" Median {decade2}s: {median2:.4f}") | |
print(f" Cliff's δ: {cliffs_delta:7.4f} ({cliff_interp})") | |
print(f" Mann-Whitney U: {u_stat:7.1f}") | |
if p_value is not None: | |
significance = ( | |
"***" | |
if p_value < 0.001 | |
else "**" if p_value < 0.01 else "*" if p_value < 0.05 else "ns" | |
) | |
print(f" p-value (approx): {p_value:7.4f} {significance}") | |
else: | |
print(f" p-value: [small sample - exact test needed]") | |
print( | |
f" Direction: {'increase' if cliffs_delta > 0 else 'decrease'}" | |
) | |
print() | |
# Overall comparison | |
midpoint = len(modularities) // 2 | |
first_half = modularities[:midpoint] | |
second_half = modularities[midpoint:] | |
cliffs_delta_overall, cliff_interp_overall = calculate_cliffs_delta( | |
second_half, first_half | |
) | |
u_stat_overall, p_value_overall = mann_whitney_u(first_half, second_half) | |
# Calculate medians | |
first_sorted = sorted(first_half) | |
second_sorted = sorted(second_half) | |
median_first = ( | |
first_sorted[len(first_half) // 2] | |
if len(first_half) % 2 == 1 | |
else ( | |
first_sorted[len(first_half) // 2 - 1] + first_sorted[len(first_half) // 2] | |
) | |
/ 2 | |
) | |
median_second = ( | |
second_sorted[len(second_half) // 2] | |
if len(second_half) % 2 == 1 | |
else ( | |
second_sorted[len(second_half) // 2 - 1] | |
+ second_sorted[len(second_half) // 2] | |
) | |
/ 2 | |
) | |
print("=== Interpretation Guide ===") | |
# Meissel & Yao (2024). Practical Assessment, Research & Evaluation, Vol 29 No 2 | |
print("Cliff's δ magnitude: |δ| < 0.15 (negligible), 0.15-0.33 (small),") | |
print(" 0.33-0.47 (medium), ≥ 0.47 (large)") | |
print("Reference: Meissel & Yao (2024), equivalent to Cohen's d thresholds") | |
print("Significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant") | |
if __name__ == "__main__": | |
analyze_modularity_nonparametric("mod-dens-diachronic.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment