nickodell · March 16, 2025 01:35
diff --git a/1.14.1 bench.sh b/1.14.1 bench.sh
 # 1.14.1 was benchmarked outside of SciPy's build directory
 # It was done with the following command
 for i in $(seq 1 7); do python3 bug-22655b.py; done
 # This runs the benchmark fewer times than normal, but this is fine because 1.14.1 is very clearly faster.
diff --git a/bug-22655-bench.py b/bug-22655-bench.py
 # This script is responsible for rebuilding, and runnning benchmarks.

 import subprocess
 import random
 import time

 def run_bench_one_iter():
    # Run benchmarks in random order
    commits = ["cf073e0", "f8ffbb3", "0d022b7", "e83cfb1"]
    random.shuffle(commits)
    for commit in commits:
        subprocess.check_call(['git', 'checkout', commit])
        subprocess.check_call(['python3', 'dev.py', 'build'])
        # Reduce interference of build process
        time.sleep(1)
        for i in range(7):
            # run proc multiple times, to allow for ASAN to affect cache timings
            subprocess.check_call(['./wrapper.sh', '../foo/bug-22655b.py'])
        print(f"{commit} done")

 for i in range(7):
    run_bench_one_iter()
diff --git a/bug-22655-dataviz.py b/bug-22655-dataviz.py
 import pandas as pd
 df = pd.read_csv('bench-results.csv')
 df['version'] = df['version'].replace({
    '1.16.0.dev0+git20250315.cf073e0': 'main',
    '1.16.0.dev0+git20250315.f8ffbb3': 'loop with func',
    '1.16.0.dev0+git20250315.0d022b7': 'numpy with func',
    '1.16.0.dev0+git20250315.e83cfb1': 'numpy+operator',
 })

 agg_by_version = df.groupby('version')['timing'].agg(['mean', 'std', 'sem'])
 print("summary stats without filtering")
 print(agg_by_version)
 min_only = df.groupby(['version', 'run_id']).agg(['min'])
 min_only.columns = min_only.columns.droplevel(level=1)
 mean_baseline = min_only.groupby('version')['timing'].mean().loc['main']
 print("Dropping all but min()")
 print(min_only.groupby('version')['timing'].agg(['mean', 'std', 'sem']))
 print("From now on, express timings as % faster or slower than mean of main branch")
 min_only['timing'] = 100 * ((min_only['timing'] / mean_baseline) - 1)
 summary_df = min_only.groupby('version')['timing'].agg(['mean', 'std', 'sem']).round(5)
 summary_df['t'] = summary_df['mean'] / summary_df['sem']
 summary_df['plus_minus'] = summary_df['sem'] * 1.96
 print(summary_df)
diff --git a/bug-22655b.py b/bug-22655b.py
 from time import perf_counter
 import time
 import csv

 import numpy as np
 from scipy import sparse
 from scipy.optimize import linprog
 from scipy.sparse import csr_matrix
 import functools
 import scipy
 import os


 def coherent_linear_quantile_regression(
    X,
    y,
    *,
    quantiles,
    sample_weight = None,
    coherence_buffer = 3,
 ):
    """Solve a Coherent Linear Quantile Regression problem.

    Minimizes the quantile loss:

        ∑ᵢ,ⱼ {
                 qⱼ (yᵢ - ŷ⁽ʲ⁾ᵢ) : yᵢ ≥ ŷ⁽ʲ⁾ᵢ,
            (1 - qⱼ)(ŷ⁽ʲ⁾ᵢ - yᵢ) : ŷ⁽ʲ⁾ᵢ > yᵢ
        }

    for the linear model ŷ⁽ʲ⁾ := Xβ⁽ʲ⁾, given an input dataset X, target y, and quantile ranks qⱼ.

    We achieve so-called 'coherent' quantiles by enforcing monotonicity of the predicted quantiles
    with the constraint Xβ⁽ʲ⁾ ≤ Xβ⁽ʲ⁺¹⁾ for each consecutive pair of quantile ranks in an extended
    set of quantile ranks that comprises the requested quantile ranks and a number of auxiliary
    quantile ranks in between.

    The optimization problem is formulated as a linear program by introducing the auxiliary residual
    vectors Δ⁽ʲ⁾⁺, Δ⁽ʲ⁾⁻ ≥ 0 so that Xβ⁽ʲ⁾ - y = Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁾⁻. The objective then becomes
    ∑ᵢ,ⱼ qⱼΔ⁽ʲ⁾⁻ᵢ + (1 - qⱼ)Δ⁽ʲ⁾⁺ᵢ + αt⁽ʲ⁾ᵢ for t⁽ʲ⁾ := |β⁽ʲ⁾|. The L1 regularization parameter α is
    automatically determined to minimize the impact on the solution β.

    Parameters
    ----------
    X
        The feature matrix.
    y
        The target values.
    quantiles
        The quantiles to estimate (between 0 and 1).
    sample_weight
        The optional sample weight to use for each sample.
    coherence_buffer
        The number of auxiliary quantiles to introduce. Smaller is faster, larger yields more
        coherent quantiles.

    Returns
    -------
    β
        The estimated regression coefficients so that Xβ produces quantile predictions ŷ.
    β_full
        The estimated regression coefficients including all auxiliary quantiles.
    """
    # Learn the input dimensions.
    num_samples, num_features = X.shape
    # Add buffer quantile ranks in between the given quantile ranks so that we have an even stronger
    # guarantee on the monotonicity of the predicted quantiles.
    quantiles = np.interp(
        x=np.linspace(0, len(quantiles) - 1, (len(quantiles) - 1) * (1 + coherence_buffer) + 1),
        xp=np.arange(len(quantiles)),
        fp=quantiles,
    ).astype(quantiles.dtype)
    num_quantiles = len(quantiles)
    # Validate the input.
    assert np.array_equal(quantiles, np.sort(quantiles)), "Quantile ranks must be sorted."
    assert sample_weight is None or np.all(sample_weight >= 0), "Sample weights must be >= 0."
    # Normalise the sample weights.
    sample_weight = np.ones(num_samples, dtype=y.dtype) if sample_weight is None else sample_weight
    sample_weight /= np.sum(sample_weight)
    eps = np.finfo(y.dtype).eps
    α = eps**0.25 / (num_quantiles * num_features)
    # Construct the objective function ∑ᵢ,ⱼ qⱼΔ⁽ʲ⁾⁻ᵢ + (1 - qⱼ)Δ⁽ʲ⁾⁺ᵢ + αt⁽ʲ⁾ᵢ for t⁽ʲ⁾ := |β⁽ʲ⁾|.
    c = np.hstack(
        [
            np.zeros(num_quantiles * num_features, dtype=y.dtype),  # β⁽ʲ⁾ for each qⱼ
            α * np.ones(num_quantiles * num_features, dtype=y.dtype),  # t⁽ʲ⁾ for each qⱼ
            np.kron((1 - quantiles) / num_quantiles, sample_weight),  # Δ⁽ʲ⁾⁺ for each qⱼ
            np.kron(quantiles / num_quantiles, sample_weight),  # Δ⁽ʲ⁾⁻ for each qⱼ
        ]
    )
    # Construct the equalities Xβ⁽ʲ⁾ - y = Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁾⁻ for each quantile rank qⱼ.
    A_eq = sparse.hstack(
        [
            # Xβ⁽ʲ⁾ for each qⱼ (block diagonal matrix)
            sparse.kron(sparse.eye(num_quantiles, dtype=X.dtype), X),
            # t⁽ʲ⁾ not used in this constraint
            csr_matrix((num_quantiles * num_samples, num_quantiles * num_features), dtype=X.dtype),
            # -Δ⁽ʲ⁾⁺ for each qⱼ (block diagonal matrix)
            -sparse.eye(num_quantiles * num_samples, dtype=X.dtype),
            # Δ⁽ʲ⁾⁻ for each qⱼ (block diagonal matrix)
            sparse.eye(num_quantiles * num_samples, dtype=X.dtype),
        ]
    )
    b_eq = np.tile(y, num_quantiles)
    # Construct the inequalities -t⁽ʲ⁾ <= β⁽ʲ⁾ <= t⁽ʲ⁾ for each quantile rank qⱼ so that
    # t⁽ʲ⁾ := |β⁽ʲ⁾|. Also construct the monotonicity constraint Xβ⁽ʲ⁾ <= Xβ⁽ʲ⁺¹⁾ for each qⱼ,
    # equivalent to Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁾⁻ <= Δ⁽ʲ⁺¹⁾⁺ - Δ⁽ʲ⁺¹⁾⁻.
    zeros_Δ = csr_matrix(
        (num_quantiles * num_features, 2 * num_quantiles * num_samples), dtype=X.dtype
    )
    zeros_βt = csr_matrix(
        ((num_quantiles - 1) * num_samples, 2 * num_quantiles * num_features), dtype=X.dtype
    )
    A_ub = sparse.vstack(
        [
            sparse.hstack(
                [
                    sparse.eye(num_quantiles * num_features, dtype=X.dtype),  # β⁽ʲ⁾
                    -sparse.eye(num_quantiles * num_features, dtype=X.dtype),  # -t⁽ʲ⁾
                    zeros_Δ,  # Δ⁽ʲ⁾⁺ and Δ⁽ʲ⁾⁺ not used for this constraint
                ]
            ),
            sparse.hstack(
                [
                    -sparse.eye(num_quantiles * num_features, dtype=X.dtype),  # -β⁽ʲ⁾
                    -sparse.eye(num_quantiles * num_features, dtype=X.dtype),  # -t⁽ʲ⁾
                    zeros_Δ,  # Δ⁽ʲ⁾⁺ and Δ⁽ʲ⁾⁺ not used for this constraint
                ]
            ),
            sparse.hstack(
                [
                    zeros_βt,
                    sparse.kron(
                        sparse.diags(
                            diagonals=[1, -1],  # Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁺¹⁾⁺
                            offsets=[0, 1],
                            shape=(num_quantiles - 1, num_quantiles),
                            dtype=X.dtype,
                        ),
                        sparse.eye(num_samples, dtype=X.dtype),
                    ),
                    sparse.kron(
                        sparse.diags(
                            diagonals=[-1, 1],  # -Δ⁽ʲ⁾⁻ + Δ⁽ʲ⁺¹⁾⁻
                            offsets=[0, 1],
                            shape=(num_quantiles - 1, num_quantiles),
                            dtype=X.dtype,
                        ),
                        sparse.eye(num_samples, dtype=X.dtype),
                    ),
                ]
            ),
        ]
    )
    b_ub = np.zeros(A_ub.shape[0], dtype=X.dtype)
    # Construct the bounds.
    bounds = (
        ([(None, None)] * num_quantiles * num_features)  # β⁽ʲ⁾ for each qⱼ
        + ([(0, None)] * num_quantiles * num_features)  # t⁽ʲ⁾ for each qⱼ
        + ([(0, None)] * num_quantiles * num_samples)  # Δ⁽ʲ⁾⁺ for each qⱼ
        + ([(0, None)] * num_quantiles * num_samples)  # Δ⁽ʲ⁾⁻ for each qⱼ
    )
    # Solve the Coherent Quantile Regression LP.
    t0 = perf_counter()
    result = linprog(c=c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method="highs")
    t1 = perf_counter()
    # Extract the solution.
    β_full = result.x[: num_quantiles * num_features].astype(y.dtype)
    β_full = β_full.reshape(num_quantiles, num_features).T
    # Drop the buffer quantile ranks we introduced earlier.
    β = β_full[:, 0 :: (coherence_buffer + 1)]
    return β, β_full, t1 - t0


 # Construct a problem instance.
 np.random.seed(42)
 n, d = 500, 5
 X = np.random.randn(n, d)
 y = np.random.randn(n)
 quantiles = np.asarray((0.25, 0.5, 0.75))
 #timings = {}

 # Not used by benchmark
 def time_func_transparent(func, name):
    @functools.wraps(func)
    def func(*args, **kwargs):
        print(f'in {name}')
        t0 = perf_counter()
        ret = func(*args, **kwargs)
        t1 = perf_counter()
        duration = t1 - t0
        timings['name'].append(duration)
        return ret
    if name not in timings:
        timings[name] = []
    return func


 from scipy.optimize._highspy import _highs_wrapper
 #if hasattr(_highs_wrapper, '_get_marg_bnds'):
 #    _get_marg_bnds = _highs_wrapper._get_marg_bnds
 #    _highs_wrapper._get_marg_bnds = time_func_transparent(_get_marg_bnds, '_get_marg_bnds')
 #    print(id(_get_marg_bnds))
 #    print(id(_highs_wrapper._get_marg_bnds))
 #_highs_wrapper._highs_wrapper = time_func_transparent(_highs_wrapper._highs_wrapper, 'wrapper')

 # Time how long it takes to solve the instance.
 durations = []
 run_id = os.getpid()
 for i in range(7):
    _, _, duration = coherent_linear_quantile_regression(X, y, quantiles=quantiles)
    durations.append(duration)

 print("version", scipy.__version__)
 print(f"Solved in {min(durations)}s")
 path = os.path.expanduser('~/scipy/bench-results.csv')
 with open('bench-results.csv', 'at') as f:
    for duration in durations:
        line = ",".join((str(run_id), scipy.__version__, str(duration)))
        f.write(line + '\n')
diff --git a/dataviz_output.txt b/dataviz_output.txt
 summary stats without filtering
                     mean       std       sem
 version                                      
 1.14.1           1.570627  0.009306  0.001329
 loop with func   1.916735  0.013079  0.000706
 main             1.918645  0.012619  0.000681
 numpy with func  1.904207  0.013091  0.000707
 numpy+operator   1.905540  0.012783  0.000690
 Dropping all but min()
                     mean       std       sem
 version                                      
 1.14.1           1.563484  0.005320  0.002011
 loop with func   1.906295  0.006383  0.000912
 main             1.909204  0.005652  0.000807
 numpy with func  1.893368  0.005159  0.000737
 numpy+operator   1.894948  0.005898  0.000843
 From now on, express timings as % faster or slower than mean of main branch
                     mean      std      sem           t  plus_minus
 version                                                            
 1.14.1          -18.10808  0.27865  0.10532 -171.933916    0.206427
 loop with func   -0.15237  0.33435  0.04776   -3.190327    0.093610
 main              0.00000  0.29604  0.04229    0.000000    0.082888
 numpy with func  -0.82947  0.27022  0.03860  -21.488860    0.075656
 numpy+operator   -0.74667  0.30890  0.04413  -16.919782    0.086495
diff --git a/wrapper.sh b/wrapper.sh
 #!/usr/bin/env bash
 # Wrapper for running Python scripts inside the SciPy build dir without use of dev.py (introduces extra noise)
 # Adjust PYTHONPATH for your sys.path
 export PYTHONPATH='/home/nodell/scipy/build-install/lib/python3.13/site-packages:/home/nodell/scipy:/home/nodell/.miniforge3/envs/scipy-dev/lib/python313.zip:/home/nodell/.miniforge3/envs/scipy-dev/lib/python3.13:/home/nodell/.miniforge3/envs/scipy-dev/lib/python3.13/lib-dynload:/home/nodell/.miniforge3/envs/scipy-dev/lib/python3.13/site-packages'
 export PYTHONSAFEPATH=1
 python "$@"
	# 1.14.1 was benchmarked outside of SciPy's build directory
	# It was done with the following command
	for i in $(seq 1 7); do python3 bug-22655b.py; done
	# This runs the benchmark fewer times than normal, but this is fine because 1.14.1 is very clearly faster.
	# This script is responsible for rebuilding, and runnning benchmarks.

	import subprocess
	import random
	import time

	def run_bench_one_iter():
	# Run benchmarks in random order
	commits = ["cf073e0", "f8ffbb3", "0d022b7", "e83cfb1"]
	random.shuffle(commits)
	for commit in commits:
	subprocess.check_call(['git', 'checkout', commit])
	subprocess.check_call(['python3', 'dev.py', 'build'])
	# Reduce interference of build process
	time.sleep(1)
	for i in range(7):
	# run proc multiple times, to allow for ASAN to affect cache timings
	subprocess.check_call(['./wrapper.sh', '../foo/bug-22655b.py'])
	print(f"{commit} done")

	for i in range(7):
	run_bench_one_iter()
	import pandas as pd
	df = pd.read_csv('bench-results.csv')
	df['version'] = df['version'].replace({
	'1.16.0.dev0+git20250315.cf073e0': 'main',
	'1.16.0.dev0+git20250315.f8ffbb3': 'loop with func',
	'1.16.0.dev0+git20250315.0d022b7': 'numpy with func',
	'1.16.0.dev0+git20250315.e83cfb1': 'numpy+operator',
	})

	agg_by_version = df.groupby('version')['timing'].agg(['mean', 'std', 'sem'])
	print("summary stats without filtering")
	print(agg_by_version)
	min_only = df.groupby(['version', 'run_id']).agg(['min'])
	min_only.columns = min_only.columns.droplevel(level=1)
	mean_baseline = min_only.groupby('version')['timing'].mean().loc['main']
	print("Dropping all but min()")
	print(min_only.groupby('version')['timing'].agg(['mean', 'std', 'sem']))
	print("From now on, express timings as % faster or slower than mean of main branch")
	min_only['timing'] = 100 * ((min_only['timing'] / mean_baseline) - 1)
	summary_df = min_only.groupby('version')['timing'].agg(['mean', 'std', 'sem']).round(5)
	summary_df['t'] = summary_df['mean'] / summary_df['sem']
	summary_df['plus_minus'] = summary_df['sem'] * 1.96
	print(summary_df)
	from time import perf_counter
	import time
	import csv

	import numpy as np
	from scipy import sparse
	from scipy.optimize import linprog
	from scipy.sparse import csr_matrix
	import functools
	import scipy
	import os


	def coherent_linear_quantile_regression(
	X,
	y,
	*,
	quantiles,
	sample_weight = None,
	coherence_buffer = 3,
	):
	"""Solve a Coherent Linear Quantile Regression problem.

	Minimizes the quantile loss:

	∑ᵢ,ⱼ {
	qⱼ (yᵢ - ŷ⁽ʲ⁾ᵢ) : yᵢ ≥ ŷ⁽ʲ⁾ᵢ,
	(1 - qⱼ)(ŷ⁽ʲ⁾ᵢ - yᵢ) : ŷ⁽ʲ⁾ᵢ > yᵢ
	}

	for the linear model ŷ⁽ʲ⁾ := Xβ⁽ʲ⁾, given an input dataset X, target y, and quantile ranks qⱼ.

	We achieve so-called 'coherent' quantiles by enforcing monotonicity of the predicted quantiles
	with the constraint Xβ⁽ʲ⁾ ≤ Xβ⁽ʲ⁺¹⁾ for each consecutive pair of quantile ranks in an extended
	set of quantile ranks that comprises the requested quantile ranks and a number of auxiliary
	quantile ranks in between.

	The optimization problem is formulated as a linear program by introducing the auxiliary residual
	vectors Δ⁽ʲ⁾⁺, Δ⁽ʲ⁾⁻ ≥ 0 so that Xβ⁽ʲ⁾ - y = Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁾⁻. The objective then becomes
	∑ᵢ,ⱼ qⱼΔ⁽ʲ⁾⁻ᵢ + (1 - qⱼ)Δ⁽ʲ⁾⁺ᵢ + αt⁽ʲ⁾ᵢ for t⁽ʲ⁾ := \|β⁽ʲ⁾\|. The L1 regularization parameter α is
	automatically determined to minimize the impact on the solution β.

	Parameters
	----------
	X
	The feature matrix.
	y
	The target values.
	quantiles
	The quantiles to estimate (between 0 and 1).
	sample_weight
	The optional sample weight to use for each sample.
	coherence_buffer
	The number of auxiliary quantiles to introduce. Smaller is faster, larger yields more
	coherent quantiles.

	Returns
	-------
	β
	The estimated regression coefficients so that Xβ produces quantile predictions ŷ.
	β_full
	The estimated regression coefficients including all auxiliary quantiles.
	"""
	# Learn the input dimensions.
	num_samples, num_features = X.shape
	# Add buffer quantile ranks in between the given quantile ranks so that we have an even stronger
	# guarantee on the monotonicity of the predicted quantiles.
	quantiles = np.interp(
	x=np.linspace(0, len(quantiles) - 1, (len(quantiles) - 1) * (1 + coherence_buffer) + 1),
	xp=np.arange(len(quantiles)),
	fp=quantiles,
	).astype(quantiles.dtype)
	num_quantiles = len(quantiles)
	# Validate the input.
	assert np.array_equal(quantiles, np.sort(quantiles)), "Quantile ranks must be sorted."
	assert sample_weight is None or np.all(sample_weight >= 0), "Sample weights must be >= 0."
	# Normalise the sample weights.
	sample_weight = np.ones(num_samples, dtype=y.dtype) if sample_weight is None else sample_weight
	sample_weight /= np.sum(sample_weight)
	eps = np.finfo(y.dtype).eps
	α = eps*0.25 / (num_quantiles num_features)
	# Construct the objective function ∑ᵢ,ⱼ qⱼΔ⁽ʲ⁾⁻ᵢ + (1 - qⱼ)Δ⁽ʲ⁾⁺ᵢ + αt⁽ʲ⁾ᵢ for t⁽ʲ⁾ := \|β⁽ʲ⁾\|.
	c = np.hstack(
	[
	np.zeros(num_quantiles * num_features, dtype=y.dtype), # β⁽ʲ⁾ for each qⱼ
	α * np.ones(num_quantiles * num_features, dtype=y.dtype), # t⁽ʲ⁾ for each qⱼ
	np.kron((1 - quantiles) / num_quantiles, sample_weight), # Δ⁽ʲ⁾⁺ for each qⱼ
	np.kron(quantiles / num_quantiles, sample_weight), # Δ⁽ʲ⁾⁻ for each qⱼ
	]
	)
	# Construct the equalities Xβ⁽ʲ⁾ - y = Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁾⁻ for each quantile rank qⱼ.
	A_eq = sparse.hstack(
	[
	# Xβ⁽ʲ⁾ for each qⱼ (block diagonal matrix)
	sparse.kron(sparse.eye(num_quantiles, dtype=X.dtype), X),
	# t⁽ʲ⁾ not used in this constraint
	csr_matrix((num_quantiles * num_samples, num_quantiles * num_features), dtype=X.dtype),
	# -Δ⁽ʲ⁾⁺ for each qⱼ (block diagonal matrix)
	-sparse.eye(num_quantiles * num_samples, dtype=X.dtype),
	# Δ⁽ʲ⁾⁻ for each qⱼ (block diagonal matrix)
	sparse.eye(num_quantiles * num_samples, dtype=X.dtype),
	]
	)
	b_eq = np.tile(y, num_quantiles)
	# Construct the inequalities -t⁽ʲ⁾ <= β⁽ʲ⁾ <= t⁽ʲ⁾ for each quantile rank qⱼ so that
	# t⁽ʲ⁾ := \|β⁽ʲ⁾\|. Also construct the monotonicity constraint Xβ⁽ʲ⁾ <= Xβ⁽ʲ⁺¹⁾ for each qⱼ,
	# equivalent to Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁾⁻ <= Δ⁽ʲ⁺¹⁾⁺ - Δ⁽ʲ⁺¹⁾⁻.
	zeros_Δ = csr_matrix(
	(num_quantiles * num_features, 2 * num_quantiles * num_samples), dtype=X.dtype
	)
	zeros_βt = csr_matrix(
	((num_quantiles - 1) * num_samples, 2 * num_quantiles * num_features), dtype=X.dtype
	)
	A_ub = sparse.vstack(
	[
	sparse.hstack(
	[
	sparse.eye(num_quantiles * num_features, dtype=X.dtype), # β⁽ʲ⁾
	-sparse.eye(num_quantiles * num_features, dtype=X.dtype), # -t⁽ʲ⁾
	zeros_Δ, # Δ⁽ʲ⁾⁺ and Δ⁽ʲ⁾⁺ not used for this constraint
	]
	),
	sparse.hstack(
	[
	-sparse.eye(num_quantiles * num_features, dtype=X.dtype), # -β⁽ʲ⁾
	-sparse.eye(num_quantiles * num_features, dtype=X.dtype), # -t⁽ʲ⁾
	zeros_Δ, # Δ⁽ʲ⁾⁺ and Δ⁽ʲ⁾⁺ not used for this constraint
	]
	),
	sparse.hstack(
	[
	zeros_βt,
	sparse.kron(
	sparse.diags(
	diagonals=[1, -1], # Δ⁽ʲ⁾⁺ - Δ⁽ʲ⁺¹⁾⁺
	offsets=[0, 1],
	shape=(num_quantiles - 1, num_quantiles),
	dtype=X.dtype,
	),
	sparse.eye(num_samples, dtype=X.dtype),
	),
	sparse.kron(
	sparse.diags(
	diagonals=[-1, 1], # -Δ⁽ʲ⁾⁻ + Δ⁽ʲ⁺¹⁾⁻
	offsets=[0, 1],
	shape=(num_quantiles - 1, num_quantiles),
	dtype=X.dtype,
	),
	sparse.eye(num_samples, dtype=X.dtype),
	),
	]
	),
	]
	)
	b_ub = np.zeros(A_ub.shape[0], dtype=X.dtype)
	# Construct the bounds.
	bounds = (
	([(None, None)] * num_quantiles * num_features) # β⁽ʲ⁾ for each qⱼ
	+ ([(0, None)] * num_quantiles * num_features) # t⁽ʲ⁾ for each qⱼ
	+ ([(0, None)] * num_quantiles * num_samples) # Δ⁽ʲ⁾⁺ for each qⱼ
	+ ([(0, None)] * num_quantiles * num_samples) # Δ⁽ʲ⁾⁻ for each qⱼ
	)
	# Solve the Coherent Quantile Regression LP.
	t0 = perf_counter()
	result = linprog(c=c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method="highs")
	t1 = perf_counter()
	# Extract the solution.
	β_full = result.x[: num_quantiles * num_features].astype(y.dtype)
	β_full = β_full.reshape(num_quantiles, num_features).T
	# Drop the buffer quantile ranks we introduced earlier.
	β = β_full[:, 0 :: (coherence_buffer + 1)]
	return β, β_full, t1 - t0


	# Construct a problem instance.
	np.random.seed(42)
	n, d = 500, 5
	X = np.random.randn(n, d)
	y = np.random.randn(n)
	quantiles = np.asarray((0.25, 0.5, 0.75))
	#timings = {}

	# Not used by benchmark
	def time_func_transparent(func, name):
	@functools.wraps(func)
	def func(args, *kwargs):
	print(f'in {name}')
	t0 = perf_counter()
	ret = func(args, *kwargs)
	t1 = perf_counter()
	duration = t1 - t0
	timings['name'].append(duration)
	return ret
	if name not in timings:
	timings[name] = []
	return func


	from scipy.optimize._highspy import _highs_wrapper
	#if hasattr(_highs_wrapper, '_get_marg_bnds'):
	# _get_marg_bnds = _highs_wrapper._get_marg_bnds
	# _highs_wrapper._get_marg_bnds = time_func_transparent(_get_marg_bnds, '_get_marg_bnds')
	# print(id(_get_marg_bnds))
	# print(id(_highs_wrapper._get_marg_bnds))
	#_highs_wrapper._highs_wrapper = time_func_transparent(_highs_wrapper._highs_wrapper, 'wrapper')

	# Time how long it takes to solve the instance.
	durations = []
	run_id = os.getpid()
	for i in range(7):
	_, _, duration = coherent_linear_quantile_regression(X, y, quantiles=quantiles)
	durations.append(duration)

	print("version", scipy.__version__)
	print(f"Solved in {min(durations)}s")
	path = os.path.expanduser('~/scipy/bench-results.csv')
	with open('bench-results.csv', 'at') as f:
	for duration in durations:
	line = ",".join((str(run_id), scipy.__version__, str(duration)))
	f.write(line + '\n')
	summary stats without filtering
	mean std sem
	version
	1.14.1 1.570627 0.009306 0.001329
	loop with func 1.916735 0.013079 0.000706
	main 1.918645 0.012619 0.000681
	numpy with func 1.904207 0.013091 0.000707
	numpy+operator 1.905540 0.012783 0.000690
	Dropping all but min()
	mean std sem
	version
	1.14.1 1.563484 0.005320 0.002011
	loop with func 1.906295 0.006383 0.000912
	main 1.909204 0.005652 0.000807
	numpy with func 1.893368 0.005159 0.000737
	numpy+operator 1.894948 0.005898 0.000843
	From now on, express timings as % faster or slower than mean of main branch
	mean std sem t plus_minus
	version
	1.14.1 -18.10808 0.27865 0.10532 -171.933916 0.206427
	loop with func -0.15237 0.33435 0.04776 -3.190327 0.093610
	main 0.00000 0.29604 0.04229 0.000000 0.082888
	numpy with func -0.82947 0.27022 0.03860 -21.488860 0.075656
	numpy+operator -0.74667 0.30890 0.04413 -16.919782 0.086495
	#!/usr/bin/env bash
	# Wrapper for running Python scripts inside the SciPy build dir without use of dev.py (introduces extra noise)
	# Adjust PYTHONPATH for your sys.path
	export PYTHONPATH='/home/nodell/scipy/build-install/lib/python3.13/site-packages:/home/nodell/scipy:/home/nodell/.miniforge3/envs/scipy-dev/lib/python313.zip:/home/nodell/.miniforge3/envs/scipy-dev/lib/python3.13:/home/nodell/.miniforge3/envs/scipy-dev/lib/python3.13/lib-dynload:/home/nodell/.miniforge3/envs/scipy-dev/lib/python3.13/site-packages'
	export PYTHONSAFEPATH=1
	python "$@"