Last active
October 14, 2021 17:46
-
-
Save fwhigh/c6f9c88cf94cedf2e96d6900ac0f1226 to your computer and use it in GitHub Desktop.
Blog post: lightgbm-vs-keras-metaflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make a directory you can blow away in full later | |
mkdir -p aicamp_demo && cd aicamp_demo | |
# Clone and pin the fwhigh/metaflow-helper git repo | |
git clone https://github.com/fwhigh/metaflow-helper.git | |
cd metaflow-helper | |
git checkout v0.0.1 | |
# Set up and active a virtual environment | |
# Install the metaflow-helper package in editable model and dependencies | |
python -m venv venv && . venv/bin/activate | |
python -m pip install --upgrade pip | |
python -m pip install -e . | |
brew install lightgbm | |
python -m pip install -r example-requirements.txt | |
# Test runs and flow visualization | |
python examples/model-selection/train.py run --help | |
python examples/model-selection/train.py run --configuration test_randomized_config | |
brew install graphviz | |
python examples/model-selection/train.py output-dot | dot -Tpng -o model-selection-flow.png | |
# Full run | |
python examples/model-selection/train.py run --configuration randomized_config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from importlib import import_module | |
import subprocess | |
def system_command_with_retry(cmd: list): | |
for i in range(0, 5): | |
wait_seconds = 2 ** i | |
try: | |
status = subprocess.run(cmd) | |
if status.returncode != 0: | |
print(f'command status was {status}, retrying after {wait_seconds} seconds') | |
time.sleep(wait_seconds) | |
continue | |
except subprocess.CalledProcessError: | |
print(f'command failed, retrying after {wait_seconds} seconds') | |
time.sleep(wait_seconds) | |
continue | |
break | |
def install_dependencies(dependencies: list): | |
for dependency in dependencies: | |
for k, v in dependency.items(): | |
try: | |
module_ = import_module(k) | |
except ModuleNotFoundError: | |
system_command_with_retry(['pip', 'install', v]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import randint, loguniform | |
contenders_spec = [ | |
{ | |
# This is the algo | |
'__model': ['metaflow_helper.models.LightGBMRegressor'], | |
# These go to the model initializer | |
'__model__init_kwargs__learning_rate': loguniform(1e-2, 1e-1), | |
'__model__init_kwargs__max_depth': randint(1, 4), | |
'__model__init_kwargs__n_estimators': [10_000], | |
# These go to the model fitter | |
'__model__fit_kwargs__eval_metric': ['mse'], | |
'__model__fit_kwargs__early_stopping_rounds': [10], | |
'__model__fit_kwargs__verbose': [0], | |
# The presence of this key triggers randomized search | |
'__n_iter': 5, | |
}, | |
{ | |
# This is the algo | |
'__model': ['metaflow_helper.models.KerasRegressor'], | |
# These go to the model initializer | |
'__model__init_kwargs__build_model': ['metaflow_helper.models.build_keras_regression_model'], | |
'__model__init_kwargs__metric': ['mse'], | |
'__model__init_kwargs__dense_layer_widths': [(), (15,), (15, 15,), (15 * 15,)], | |
'__model__init_kwargs__l1_factor': loguniform(1e-8, 1e-2), | |
'__model__init_kwargs__l2_factor': loguniform(1e-8, 1e-2), | |
# These go to the model fitter | |
'__model__fit_kwargs__batch_size': [None], | |
'__model__fit_kwargs__epochs': [10_000], | |
'__model__fit_kwargs__validation_split': [0.2], | |
'__model__fit_kwargs__monitor': ['val_mse'], | |
'__model__fit_kwargs__verbose': [0], | |
'__model__fit_kwargs__patience': [10], | |
'__model__fit_kwargs__min_delta': [0.1], | |
# The presence of this key triggers randomized search | |
'__n_iter': 5, | |
}, | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python -m venv metaflow-helper-venv && . metaflow-helper-venv/bin/activate | |
# checkout a tagged commit | |
git clone https://github.com/fwhigh/metaflow-helper.git | |
cd metaflow-helper | |
git checkout v0.0.1 | |
# the package is also available via `pip install metaflow-helper==0.0.1` | |
python -m pip install --upgrade pip | |
python -m pip install -e . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python -m pip install -r example-requirements.txt | |
python examples/model-selection/train.py run --help | |
# --configuration TEXT Which config.py file to use. Available configs: | |
# randomized_config (default), | |
# test_randomized_config, grid_config, | |
# test_grid_config [default: randomized_config] | |
python examples/model-selection/train.py run --configuration test_randomized_config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from metaflow import FlowSpec, Parameter, step | |
import commmon | |
class Train(FlowSpec): | |
test_mode = Parameter( | |
'test_mode', | |
help="Run in test mode?", | |
type=bool, | |
default=False, | |
) | |
@step | |
def start(self): | |
if self.test_mode: | |
# Get a subset of data and reduce parallelism here | |
self.df = common.get_dataframe(max_rows=100) | |
self.max_epochs = 10 | |
self.patience = 1 | |
else: | |
self.df = common.get_dataframe() | |
self.epochs = 10_000 | |
self.patience = 50 | |
# Do stuff here | |
self.next(self.end) | |
@step | |
def end(self): | |
pass | |
if __name__ == '__main__': | |
Train() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from metaflow import FlowSpec, step | |
import common | |
class Train(FlowSpec): | |
@step | |
def start(self): | |
self.df = common.get_df() | |
# Do stuff here | |
self.next(self.end) | |
@step | |
def end(self): | |
pass | |
if __name__ == '__main__': | |
Train() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment