Skip to content

Instantly share code, notes, and snippets.

@stsievert
Last active June 20, 2018 22:30
Show Gist options
  • Save stsievert/b2441010b855169f2a062cc1fa87eb42 to your computer and use it in GitHub Desktop.
Save stsievert/b2441010b855169f2a062cc1fa87eb42 to your computer and use it in GitHub Desktop.
Hyperband sketch
import numpy as np
from distributed import Client, as_completed
from sklearn.base import clone
from sklearn.svm import SVC
import dask.array as da
from time import sleep
from sklearn.base import BaseEstimator
def set_and_clone(x):
# TODO
return x
def get_params(x=0):
# TODO
return {"x": 1}
def get_n():
# TODO
return range(2)
def get_r():
# TODO
return range(4)
def partial_fit(model_and_meta, x_block, y_block, meta=None):
# TODO
model, m = model_and_meta
if meta is None:
meta = m
assert {'resource', 'eta', 'model_id', 'iterations'}.issubset(meta.keys())
assert isinstance(model, BaseEstimator) and x_block is None and y_block is None
assert isinstance(meta, dict)
sleep(1)
meta["iterations"] += 1
return model, meta
def score(model_and_meta, x, y):
# TODO
assert isinstance(model_and_meta, tuple)
model, meta = model_and_meta
assert isinstance(model, BaseEstimator) and isinstance(meta, dict)
return {"score": np.random.rand(), **meta}
def should_promote(result, history):
# TODO
assert isinstance(result, dict)
assert isinstance(history, list)
if len(history) > 20:
return []
return [result]
def get_submit_args(result):
# TODO
assert isinstance(result, dict)
result["resource"] *= result["eta"]
return result
def create_model(model, p):
# TODO
assert model is None
assert isinstance(p, dict)
return SVC(), {'iterations': 0}
def _hyperband(client, model, param_space, x, y):
model_futures = {f"{r}-{n}": client.submit(create_model, model, get_params(param_space))
for n in get_n() for r in get_r()}
info = {r: [{"resource": r, "eta": 3, "model_id": f"{r}-{n}", "iterations": 0}
for n in get_n()] for r in get_r()}
model_and_meta_futures = {meta["model_id"]: client.submit(partial_fit,
model_futures[meta["model_id"]],
x, y, meta=meta)
for _, r_metas in info.items() for meta in r_metas}
score_futures = [client.submit(score, model_and_meta_future, x, y)
for _, model_and_meta_future in model_and_meta_futures.items()]
history = []
seq = as_completed(score_futures)
for future in seq:
result = future.result()
assert set(result.keys()) == {"resource", "eta", "model_id", "iterations", "score"}
history += [result]
# TODO if we want to map partial_fit/score to blocks
# i = get_xy_block_id(result)
print(result)
jobs = should_promote(result, history)
for result in jobs:
args = get_submit_args(result)
model_future = model_futures[args["model_id"]]
model_future = client.submit(partial_fit, model_future, x, y, args)
model_futures[args["model_id"]] = model_future
score_future = client.submit(score, model_future, x, y)
seq.add(score_future)
return None
class HyperBand:
def __init__(self, client):
self.param_space = [get_params() for _ in range(10)]
self.client = client
def fit(self, x, y):
x = y = model = None
_hyperband(self.client, model, self.param_space, x, y)
# TODO: set best index, best model, etc
return self
if __name__ == "__main__":
client = Client()
search = HyperBand(client)
search.fit(None, None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment