melvinwevers · August 5, 2025 18:27
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/env python3
 import csv
 from datetime import datetime

 from cliffs_delta import cliffs_delta
 from scipy.stats import mannwhitneyu


 def interpret_cliffs_delta(delta):
    """Interpret Cliff's delta based on Meissel & Yao (2024) thresholds"""
    abs_delta = abs(delta)
    if abs_delta < 0.15:
        return "negligible"
    elif abs_delta < 0.33:
        return "small"
    elif abs_delta < 0.47:
        return "medium"
    else:
        return "large"


 def calculate_cliffs_delta(x1, x2):
    """Calculate Cliff's delta (non-parametric effect size)"""
    delta_value, _ = cliffs_delta(x1, x2)  # Ignore library interpretation
    return delta_value, interpret_cliffs_delta(delta_value)


 def mann_whitney_u(x1, x2):
    """Calculate Mann-Whitney U statistic and p-value using scipy"""
    statistic, p_value = mannwhitneyu(x1, x2, alternative="two-sided")
    return statistic, p_value


 def analyze_modularity_nonparametric(csv_file):
    """Analyze modularity with non-parametric methods"""

    data = []
    with open(csv_file, "r") as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 3:
                try:
                    date = datetime.strptime(row[0], "%Y-%m-%d")
                    modularity = float(row[2])
                    data.append((date, modularity))
                except (ValueError, IndexError):
                    continue

    data.sort(key=lambda x: x[0])
    modularities = [mod for _, mod in data]

    # Group by decades
    decade_data = {}
    for date, mod in data:
        decade = (date.year // 10) * 10
        if decade not in decade_data:
            decade_data[decade] = []
        decade_data[decade].append(mod)

    decades = sorted(decade_data.keys())

    for i in range(len(decades) - 1):
        decade1 = decades[i]
        decade2 = decades[i + 1]

        mod1 = decade_data[decade1]  # Earlier period
        mod2 = decade_data[decade2]  # Later period

        # Cliff's delta
        cliffs_delta, cliff_interp = calculate_cliffs_delta(mod2, mod1)

        # Mann-Whitney U
        u_stat, p_value = mann_whitney_u(mod1, mod2)

        # Medians
        mod1_sorted = sorted(mod1)
        mod2_sorted = sorted(mod2)
        median1 = (
            mod1_sorted[len(mod1) // 2]
            if len(mod1) % 2 == 1
            else (mod1_sorted[len(mod1) // 2 - 1] + mod1_sorted[len(mod1) // 2]) / 2
        )
        median2 = (
            mod2_sorted[len(mod2) // 2]
            if len(mod2) % 2 == 1
            else (mod2_sorted[len(mod2) // 2 - 1] + mod2_sorted[len(mod2) // 2]) / 2
        )

        print(f"{decade1}s → {decade2}s (n₁={len(mod1)}, n₂={len(mod2)}):")
        print(f"  Median {decade1}s:     {median1:.4f}")
        print(f"  Median {decade2}s:     {median2:.4f}")
        print(f"  Cliff's δ:           {cliffs_delta:7.4f} ({cliff_interp})")
        print(f"  Mann-Whitney U:      {u_stat:7.1f}")
        if p_value is not None:
            significance = (
                "***"
                if p_value < 0.001
                else "**" if p_value < 0.01 else "*" if p_value < 0.05 else "ns"
            )
            print(f"  p-value (approx):    {p_value:7.4f} {significance}")
        else:
            print(f"  p-value:             [small sample - exact test needed]")
        print(
            f"  Direction:           {'increase' if cliffs_delta > 0 else 'decrease'}"
        )
        print()

    # Overall comparison
    midpoint = len(modularities) // 2
    first_half = modularities[:midpoint]
    second_half = modularities[midpoint:]

    cliffs_delta_overall, cliff_interp_overall = calculate_cliffs_delta(
        second_half, first_half
    )
    u_stat_overall, p_value_overall = mann_whitney_u(first_half, second_half)

    # Calculate medians
    first_sorted = sorted(first_half)
    second_sorted = sorted(second_half)
    median_first = (
        first_sorted[len(first_half) // 2]
        if len(first_half) % 2 == 1
        else (
            first_sorted[len(first_half) // 2 - 1] + first_sorted[len(first_half) // 2]
        )
        / 2
    )
    median_second = (
        second_sorted[len(second_half) // 2]
        if len(second_half) % 2 == 1
        else (
            second_sorted[len(second_half) // 2 - 1]
            + second_sorted[len(second_half) // 2]
        )
        / 2
    )

    print("=== Interpretation Guide ===")
    # Meissel & Yao (2024). Practical Assessment, Research & Evaluation, Vol 29 No 2
    print("Cliff's δ magnitude: |δ| < 0.15 (negligible), 0.15-0.33 (small),")
    print("                     0.33-0.47 (medium), ≥ 0.47 (large)")
    print("Reference: Meissel & Yao (2024), equivalent to Cohen's d thresholds")
    print("Significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant")


 if __name__ == "__main__":
    analyze_modularity_nonparametric("mod-dens-diachronic.csv")
	#!/usr/bin/env python3
	import csv
	from datetime import datetime

	from cliffs_delta import cliffs_delta
	from scipy.stats import mannwhitneyu


	def interpret_cliffs_delta(delta):
	"""Interpret Cliff's delta based on Meissel & Yao (2024) thresholds"""
	abs_delta = abs(delta)
	if abs_delta < 0.15:
	return "negligible"
	elif abs_delta < 0.33:
	return "small"
	elif abs_delta < 0.47:
	return "medium"
	else:
	return "large"


	def calculate_cliffs_delta(x1, x2):
	"""Calculate Cliff's delta (non-parametric effect size)"""
	delta_value, _ = cliffs_delta(x1, x2) # Ignore library interpretation
	return delta_value, interpret_cliffs_delta(delta_value)


	def mann_whitney_u(x1, x2):
	"""Calculate Mann-Whitney U statistic and p-value using scipy"""
	statistic, p_value = mannwhitneyu(x1, x2, alternative="two-sided")
	return statistic, p_value


	def analyze_modularity_nonparametric(csv_file):
	"""Analyze modularity with non-parametric methods"""

	data = []
	with open(csv_file, "r") as f:
	reader = csv.reader(f)
	for row in reader:
	if len(row) >= 3:
	try:
	date = datetime.strptime(row[0], "%Y-%m-%d")
	modularity = float(row[2])
	data.append((date, modularity))
	except (ValueError, IndexError):
	continue

	data.sort(key=lambda x: x[0])
	modularities = [mod for _, mod in data]

	# Group by decades
	decade_data = {}
	for date, mod in data:
	decade = (date.year // 10) * 10
	if decade not in decade_data:
	decade_data[decade] = []
	decade_data[decade].append(mod)

	decades = sorted(decade_data.keys())

	for i in range(len(decades) - 1):
	decade1 = decades[i]
	decade2 = decades[i + 1]

	mod1 = decade_data[decade1] # Earlier period
	mod2 = decade_data[decade2] # Later period

	# Cliff's delta
	cliffs_delta, cliff_interp = calculate_cliffs_delta(mod2, mod1)

	# Mann-Whitney U
	u_stat, p_value = mann_whitney_u(mod1, mod2)

	# Medians
	mod1_sorted = sorted(mod1)
	mod2_sorted = sorted(mod2)
	median1 = (
	mod1_sorted[len(mod1) // 2]
	if len(mod1) % 2 == 1
	else (mod1_sorted[len(mod1) // 2 - 1] + mod1_sorted[len(mod1) // 2]) / 2
	)
	median2 = (
	mod2_sorted[len(mod2) // 2]
	if len(mod2) % 2 == 1
	else (mod2_sorted[len(mod2) // 2 - 1] + mod2_sorted[len(mod2) // 2]) / 2
	)

	print(f"{decade1}s → {decade2}s (n₁={len(mod1)}, n₂={len(mod2)}):")
	print(f" Median {decade1}s: {median1:.4f}")
	print(f" Median {decade2}s: {median2:.4f}")
	print(f" Cliff's δ: {cliffs_delta:7.4f} ({cliff_interp})")
	print(f" Mann-Whitney U: {u_stat:7.1f}")
	if p_value is not None:
	significance = (
	"***"
	if p_value < 0.001
	else "*" if p_value < 0.01 else "" if p_value < 0.05 else "ns"
	)
	print(f" p-value (approx): {p_value:7.4f} {significance}")
	else:
	print(f" p-value: [small sample - exact test needed]")
	print(
	f" Direction: {'increase' if cliffs_delta > 0 else 'decrease'}"
	)
	print()

	# Overall comparison
	midpoint = len(modularities) // 2
	first_half = modularities[:midpoint]
	second_half = modularities[midpoint:]

	cliffs_delta_overall, cliff_interp_overall = calculate_cliffs_delta(
	second_half, first_half
	)
	u_stat_overall, p_value_overall = mann_whitney_u(first_half, second_half)

	# Calculate medians
	first_sorted = sorted(first_half)
	second_sorted = sorted(second_half)
	median_first = (
	first_sorted[len(first_half) // 2]
	if len(first_half) % 2 == 1
	else (
	first_sorted[len(first_half) // 2 - 1] + first_sorted[len(first_half) // 2]
	)
	/ 2
	)
	median_second = (
	second_sorted[len(second_half) // 2]
	if len(second_half) % 2 == 1
	else (
	second_sorted[len(second_half) // 2 - 1]
	+ second_sorted[len(second_half) // 2]
	)
	/ 2
	)

	print("=== Interpretation Guide ===")
	# Meissel & Yao (2024). Practical Assessment, Research & Evaluation, Vol 29 No 2
	print("Cliff's δ magnitude: \|δ\| < 0.15 (negligible), 0.15-0.33 (small),")
	print(" 0.33-0.47 (medium), ≥ 0.47 (large)")
	print("Reference: Meissel & Yao (2024), equivalent to Cohen's d thresholds")
	print("Significance: * p<0.001, p<0.01, * p<0.05, ns = not significant")


	if __name__ == "__main__":
	analyze_modularity_nonparametric("mod-dens-diachronic.csv")