Skip to content

Instantly share code, notes, and snippets.

View ogrisel's full-sized avatar

Olivier Grisel ogrisel

View GitHub Profile
import numpy as np
import pytest
from sklearn.datasets import load_breast_cancer
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve
@pytest.mark.parametrize("loss", ['huber', 'ls', 'lad', 'quantile'])
@pytest.mark.parametrize("use_sample_weight", [False, True])
def test_regressor_train_loss_convergence(loss, use_sample_weight):
rng = np.random.RandomState(42)
n_samples, n_features = 30, 5
n_estimators = 300
# Make random data (without duplicated samples) to make sure
# it's possible to build an invertible (overfitting) mapping
# from X to y that therefore should lead to a regression loss
(conda-forge-compilers) 0 [~/code/scikit-learn (master)]$ pip install -e . -v
Created temporary directory: /private/var/folders/69/7jxl92h50w10b4v998qt4tj00000gn/T/pip-ephem-wheel-cache-cn0u3xn5
Created temporary directory: /private/var/folders/69/7jxl92h50w10b4v998qt4tj00000gn/T/pip-req-tracker-7xtixh31
Created requirements tracker '/private/var/folders/69/7jxl92h50w10b4v998qt4tj00000gn/T/pip-req-tracker-7xtixh31'
Created temporary directory: /private/var/folders/69/7jxl92h50w10b4v998qt4tj00000gn/T/pip-install-q8mggn78
Obtaining file:///Users/ogrisel/code/scikit-learn
Added file:///Users/ogrisel/code/scikit-learn to build tracker '/private/var/folders/69/7jxl92h50w10b4v998qt4tj00000gn/T/pip-req-tracker-7xtixh31'
Running setup.py (path:/Users/ogrisel/code/scikit-learn/setup.py) egg_info for package from file:///Users/ogrisel/code/scikit-learn
Running command python setup.py egg_info
running egg_info
from time import time
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.stats import expon, randint, uniform
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder
@ogrisel
ogrisel / debug_hist_gbdt_missing_values.ipynb
Last active July 18, 2019 15:52
debug missing values for hist GBDT
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@ogrisel
ogrisel / ms-python-server.log
Created January 9, 2019 09:54
Microsoft Python Language Server version 0.1.75.0 on scikit-learn
Starting Microsoft Python language server.
##########Linting Output - flake8##########
Microsoft Python Language Server version 0.1.75.0
Initializing for /opt/venvs/py37/bin/python
Loading files from /home/ogrisel/code/scikit-learn
Parsing document file:///home/ogrisel/code/scikit-learn/setup.py
Parse complete for file:///home/ogrisel/code/scikit-learn/setup.py at version -1
Analysis queued for file:///home/ogrisel/code/scikit-learn/setup.py
Parsing document file:///home/ogrisel/code/scikit-learn/conftest.py
Parse complete for file:///home/ogrisel/code/scikit-learn/conftest.py at version -1
@ogrisel
ogrisel / non_degenerate_mlp_gram.py
Last active March 8, 2022 22:30
Spectrum of the extended feature Gram matrix of an single hidden layer ReLU MLP
"""Empirical evaluation of the extended feature Gram matrix of a ReLU MLP
Here we try to estimate the spectrum of the H^\infty matrix as defined in:
Gradient Descent Provably Optimizes Over-parameterized Neural Networks (2018)
Simon S. Du, Xiyu Zhai, Barnabas Poczos, Aarti Singh
https://arxiv.org/abs/1810.02054
Theorem 4.1 relies on the assumption that H^\infty has a strictly positive
minimum eigenvalue. The following computes an estimate of this eigenvalue
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.externals import joblib
m = joblib.Memory(cachedir='/tmp/joblib')
make_blobs = m.cache(make_blobs)
data, labels = make_blobs(n_samples=10**5, n_features=50, cluster_std=100,
centers=10, random_state=777)
@ogrisel
ogrisel / numpy_pickle_protocol_5.py
Last active October 13, 2019 09:17
Draft use of pickle protocol 5 (PEP 574) for zero-copy numpy array pickling
from pickle import Pickler, load
try:
from pickle import PickleBuffer
except ImportError:
PickleBuffer = None
import copyreg
import os
import numpy as np
import time
@ogrisel
ogrisel / large_pickle_dump.py
Last active April 20, 2018 09:06
Memory profiling for Python pickling of large buffers
from pickle import Pickler, _Pickler, Unpickler, _Unpickler, HIGHEST_PROTOCOL
import os
import time
import sys
import gc
from multiprocessing import get_context
PROTOCOL = HIGHEST_PROTOCOL
ctx = get_context('spawn')