Last active
January 11, 2025 15:23
-
-
Save aeturrell/874fd6dbaa903ad231e99dd5834741c2 to your computer and use it in GitHub Desktop.
Self-contained benchmarking script for vanilla regressions with Pyfixest and Statsmodels
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # python = ">=3.10" | |
| # dependencies = [ | |
| # "numpy>=1.21.0", | |
| # "pandas>=1.3.0", | |
| # "statsmodels>=0.13.0", | |
| # "pyfixest>=0.11.0", | |
| # "matplotlib>=3.4.0" | |
| # ] | |
| # /// | |
| import time | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from statsmodels.regression.linear_model import OLS | |
| from statsmodels.tools import add_constant | |
| from pyfixest import feols | |
| def generate_data(n_samples, n_features): | |
| """Generate synthetic data for regression testing.""" | |
| np.random.seed(42) | |
| X = np.random.randn(n_samples, n_features) | |
| coefficients = np.random.randn(n_features) | |
| y = X @ coefficients + np.random.randn(n_samples) * 0.1 | |
| return X, y | |
| def run_benchmark(func, X, y, iterations=100): | |
| """Run a timing benchmark on a regression function.""" | |
| times = [] | |
| for _ in range(iterations): | |
| start = time.perf_counter() | |
| func(X, y) | |
| times.append(time.perf_counter() - start) | |
| return np.mean(times), np.std(times) | |
| def statsmodels_regression(X, y): | |
| """Run OLS regression using statsmodels.""" | |
| X_with_constant = add_constant(X) | |
| return OLS(y, X_with_constant).fit() | |
| def pyfixest_regression(X, y): | |
| """Run OLS regression using pyfixest.""" | |
| df = pd.DataFrame(X, columns=[f'x{i}' for i in range(X.shape[1])]) | |
| df['y'] = y | |
| formula = 'y ~ ' + ' + '.join(df.columns[:-1]) | |
| return feols(formula, data=df) | |
| def create_comparison_plot(title, xlabel): | |
| """Create a plot with common styling.""" | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| ax.set_xlabel(xlabel) | |
| ax.set_ylabel('Time (seconds)') | |
| ax.set_title(title) | |
| ax.grid(True) | |
| ax.set_xscale('log') | |
| ax.set_yscale('log') | |
| return fig, ax | |
| def add_result_to_plot(ax, x_values, times, label, marker): | |
| """Add benchmark results to the plot.""" | |
| means, stds = zip(*times) | |
| ax.errorbar(x_values, means, yerr=stds, | |
| label=label, marker=marker, | |
| linestyle='-', capsize=3) | |
| def run_size_comparison(): | |
| """Compare performance across different sample sizes.""" | |
| sizes = [100, 500, 1000, 5000, 10000, 100000] | |
| n_features = 10 | |
| results = {'statsmodels': [], 'pyfixest': []} | |
| for size in sizes: | |
| print(f"\nTesting with {size} samples...") | |
| X, y = generate_data(size, n_features) | |
| results['statsmodels'].append(run_benchmark(statsmodels_regression, X, y)) | |
| results['pyfixest'].append(run_benchmark(pyfixest_regression, X, y)) | |
| return sizes, results | |
| def run_feature_comparison(): | |
| """Compare performance across different numbers of features.""" | |
| n_samples = 10000 | |
| features = [2, 10, 100, 200] | |
| results = {'statsmodels': [], 'pyfixest': []} | |
| for n_features in features: | |
| print(f"\nTesting with {n_features} features...") | |
| X, y = generate_data(n_samples, n_features) | |
| results['statsmodels'].append(run_benchmark(statsmodels_regression, X, y)) | |
| results['pyfixest'].append(run_benchmark(pyfixest_regression, X, y)) | |
| return features, results | |
| # Run both comparisons | |
| print("Running sample size comparison...") | |
| sizes, size_results = run_size_comparison() | |
| print("\nRunning feature count comparison...") | |
| features, feature_results = run_feature_comparison() | |
| # Create sample size comparison plot | |
| fig1, ax1 = create_comparison_plot( | |
| 'OLS Performance vs Sample Size', 'Number of Samples' | |
| ) | |
| add_result_to_plot(ax1, sizes, size_results['statsmodels'], 'statsmodels', 'o') | |
| add_result_to_plot(ax1, sizes, size_results['pyfixest'], 'pyfixest', 's') | |
| ax1.legend() | |
| ax1.set_xlim(10, max(sizes)*5) | |
| plt.tight_layout() | |
| # Create feature count comparison plot | |
| fig2, ax2 = create_comparison_plot( | |
| 'OLS Performance vs Number of Features', 'Number of Features' | |
| ) | |
| add_result_to_plot(ax2, features, feature_results['statsmodels'], 'statsmodels', 'o') | |
| add_result_to_plot(ax2, features, feature_results['pyfixest'], 'pyfixest', 's') | |
| ax2.legend() | |
| ax2.set_xlim(1, max(features)*5) | |
| plt.tight_layout() | |
| plt.show() |
Author
Hi, thanks @aeturrell! I updated your script to add a comparison against statsmodels formula API: link . Things start to look slightly more favorably, but statsmodels is still faster (though not at significant margins):
Possible reasons for remaining performance differences between both formula APIs I can think of:
- different default checks
- different OLS solvers (pyfixest supports a range of options, I'm not 100% sure what sm does)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment


The charts below show the results of running the code.