Created
June 14, 2022 16:41
-
-
Save Micky774/9f72b1d9532b1bc73b8aeac299276c77 to your computer and use it in GitHub Desktop.
assert_all_finite benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # %% | |
| import numpy as np | |
| import scipy.sparse as sp | |
| def generate_data(n_samples, n_features, X_density=1, y_sparse=False, dtype=np.float64, random_state=None): | |
| rng = np.random.RandomState(random_state) | |
| if X_density < 1: | |
| X = sp.random(n_samples, n_features, format="csr", density=X_density, random_state=rng) | |
| else: | |
| X = np.round(rng.rand(n_samples,n_features)*50).astype(dtype) | |
| y = np.round(rng.rand(n_samples,)+1).astype(dtype) | |
| if y_sparse: | |
| y = sp.csr_matrix(y) | |
| if y_sparse and y.shape[0] == 1: | |
| y = y.T | |
| return X, y | |
| def make_non_finite(X, p_inf=0, p_nan=0, random_state=None): | |
| rng = np.random.RandomState(random_state) | |
| p = p_inf+p_nan | |
| if p==0: | |
| return X | |
| X = X.ravel() | |
| indices = rng.choice(np.arange(X.size), replace=False, size=int(X.size * p)) | |
| partition = 0 | |
| partition = int(indices.size * (p_inf/p)) | |
| if p_inf>0: | |
| X[indices[:partition]] = np.inf | |
| if p_nan>0: | |
| X[indices[partition:]] = np.nan | |
| return X | |
| def generate_non_finite_data(p_inf=0, p_nan=0, *args, **kwargs): | |
| X = generate_data(*args, **kwargs)[0] | |
| return make_non_finite(X, p_inf, p_nan, kwargs["random_state"]) | |
| # %% | |
| from functools import partial | |
| from time import perf_counter | |
| from statistics import mean, stdev | |
| from itertools import product | |
| import csv | |
| from sklearn.utils.validation import _assert_all_finite | |
| results_path = 'local_artifacts/benchmarks/assert_all_finite/' | |
| branch = "main" | |
| def __assert_all_finite(*args, **kwargs): | |
| try: | |
| _assert_all_finite(*args, **kwargs) | |
| except ValueError: | |
| return | |
| benchmark_config = [ | |
| ( | |
| __assert_all_finite, | |
| partial(generate_non_finite_data, n_samples=10_000, n_features=1_000), | |
| product( | |
| [0, 0.01], | |
| [0, 0.01], | |
| [True, False], | |
| ), | |
| ), | |
| ] | |
| N_REPEATS = 10 | |
| with open(f'{results_path}{branch}.csv', 'w', newline='') as csvfile: | |
| writer = csv.DictWriter( | |
| csvfile, | |
| fieldnames=[ | |
| "p_inf", | |
| "p_nan", | |
| "allow_nan", | |
| "n_repeat", | |
| "duration", | |
| ], | |
| ) | |
| writer.writeheader() | |
| for func, make_data, items in benchmark_config: | |
| for p_inf, p_nan, allow_nan in items: | |
| time_results = [] | |
| for n_repeat in range(N_REPEATS): | |
| X= make_data(random_state=n_repeat, p_inf=p_inf, p_nan=p_nan) | |
| start = perf_counter() | |
| func(X, allow_nan=allow_nan) | |
| duration = perf_counter() - start | |
| time_results.append(duration) | |
| writer.writerow( | |
| { | |
| "p_inf": p_inf, | |
| "p_nan": p_nan, | |
| "allow_nan": allow_nan, | |
| "n_repeat": n_repeat, | |
| "duration": duration, | |
| } | |
| ) | |
| results_mean, results_stdev = mean(time_results), stdev(time_results) | |
| print( | |
| f" {p_inf=} {p_nan=} {allow_nan=}|" | |
| f" {results_mean:.3f} +/- {results_stdev:.3f}" | |
| ) | |
| # %% | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import seaborn as sns | |
| plt.rc('font', size=12) | |
| _branches = ("main", "PR") | |
| branches = {br:pd.read_csv(f'{results_path}{br}.csv') for br in _branches} | |
| df = pd.concat([branches[br].assign(branch=br) for br in _branches]) | |
| group_by_attrs = ["p_inf", "p_nan"] | |
| grouped = list(df.groupby(group_by_attrs)) | |
| fig, axis = plt.subplots(2, 2, figsize=(9, 8), constrained_layout=True) | |
| fig.patch.set_facecolor('white') | |
| for (grouped_attrs, subset), ax in zip(grouped, axis.reshape(-1)): | |
| sns.violinplot(data=subset, y="duration", x="branch", ax=ax, split=True, hue="allow_nan") | |
| ax.set_title(" | ".join([f"{k}={v}" for k, v in zip(group_by_attrs, grouped_attrs)])) | |
| ax.set_xlabel("") | |
| for ax in axis[:, 1:].ravel(): | |
| ax.set_ylabel("") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment