Last active
July 28, 2018 02:42
-
-
Save stsievert/facdf6c1427c810cafecd30848fe20d9 to your computer and use it in GitHub Desktop.
Successive Halving with _incremental.fit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Incremental Model Selection\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"from sklearn.linear_model import SGDClassifier\n", | |
"\n", | |
"import dask\n", | |
"from dask.distributed import Client\n", | |
"from dask_ml.datasets import make_classification\n", | |
"from dask_ml.model_selection._incremental import fit" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table style=\"border: 2px solid white;\">\n", | |
"<tr>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3>Client</h3>\n", | |
"<ul>\n", | |
" <li><b>Scheduler: </b>tcp://127.0.0.1:56260\n", | |
" <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n", | |
"</ul>\n", | |
"</td>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3>Cluster</h3>\n", | |
"<ul>\n", | |
" <li><b>Workers: </b>8</li>\n", | |
" <li><b>Cores: </b>8</li>\n", | |
" <li><b>Memory: </b>17.18 GB</li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Client: scheduler='tcp://127.0.0.1:56260' processes=8 cores=8>" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client = Client(processes=True)\n", | |
"client" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Make data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X, y = make_classification(n_samples=5000000, n_features=20,\n", | |
" chunks=100000)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Incremental.fit" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X, y = make_classification(n_samples=5000000, n_features=20,\n", | |
" chunks=100000)\n", | |
"model = SGDClassifier(tol=1e-3, penalty='elasticnet')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_iter = 100\n", | |
"patience = 10\n", | |
"tol = 0.001\n", | |
"\n", | |
"def adapt(info):\n", | |
" [info] = info.values()\n", | |
" if max_iter is not None and len(info) > max_iter:\n", | |
" return {0: 0}\n", | |
"\n", | |
" if len(info) > patience:\n", | |
" old = info[-patience]['score']\n", | |
" if all(d['score'] < old + tol for d in info[-patience:]):\n", | |
" return {0: 0}\n", | |
"\n", | |
" return {0: 1}\n", | |
"\n", | |
"from dask_ml.model_selection._incremental import fit\n", | |
"X_test = X.blocks[-1]\n", | |
"X = X.blocks[:-1]\n", | |
"y_test = y.blocks[-1]\n", | |
"y = y.blocks[:-1]\n", | |
"info, models, history = fit(model, [{}], X, y, X_test, y_test, adapt,\n", | |
" {'classes': [0, 1]})\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'model_id': 0, 'params': {}, 'partial_fit_calls': 19, 'score': 0.58955},\n", | |
" {'model_id': 0, 'params': {}, 'partial_fit_calls': 20, 'score': 0.58888},\n", | |
" {'model_id': 0, 'params': {}, 'partial_fit_calls': 21, 'score': 0.59109},\n", | |
" {'model_id': 0, 'params': {}, 'partial_fit_calls': 22, 'score': 0.58677},\n", | |
" {'model_id': 0, 'params': {}, 'partial_fit_calls': 23, 'score': 0.58385}]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"info[0][-5:]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### RandomSearch" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_models = {}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0}\n" | |
] | |
} | |
], | |
"source": [ | |
"max_iter = 81\n", | |
"patience = 10\n", | |
"tol = 0.001\n", | |
"\n", | |
"def adapt(info):\n", | |
" out = {}\n", | |
" for ident, records in info.items():\n", | |
" if max_iter is not None and len(records) > max_iter:\n", | |
" out[ident] = 0\n", | |
"\n", | |
" elif len(records) > patience:\n", | |
" old = records[-patience]['score']\n", | |
" if all(d['score'] < old + tol for d in records[-patience:]):\n", | |
" out[ident] = 0\n", | |
" else:\n", | |
" out[ident] = 1\n", | |
" \n", | |
" else:\n", | |
" out[ident] = 1\n", | |
" print(out)\n", | |
" return out\n", | |
"\n", | |
"X, y = make_classification(n_samples=5000000, n_features=20,\n", | |
" chunks=100000)\n", | |
"model = SGDClassifier(tol=1e-3, penalty='elasticnet')\n", | |
"\n", | |
"params = {'alpha': np.logspace(-2, 1, num=1000),\n", | |
" 'l1_ratio': np.linspace(0, 1, num=1000),\n", | |
" 'average': [True, False]}\n", | |
"\n", | |
"from sklearn.model_selection import ParameterSampler\n", | |
"params_list = list(ParameterSampler(params, 5))\n", | |
"\n", | |
"from dask_ml.model_selection._incremental import fit\n", | |
"X_test = X.blocks[-1]\n", | |
"X = X.blocks[:-1]\n", | |
"y_test = y.blocks[-1]\n", | |
"y = y.blocks[:-1]\n", | |
"info, models, history = fit(model, params_list, X, y, X_test, y_test, adapt,\n", | |
" {'classes': [0, 1]})\n", | |
"all_models['random'] = info" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'model_id': 3,\n", | |
" 'params': {'l1_ratio': 0.5495495495495496,\n", | |
" 'average': True,\n", | |
" 'alpha': 0.26510836019085376},\n", | |
" 'partial_fit_calls': 15,\n", | |
" 'score': 0.64588},\n", | |
" {'model_id': 3,\n", | |
" 'params': {'l1_ratio': 0.5495495495495496,\n", | |
" 'average': True,\n", | |
" 'alpha': 0.26510836019085376},\n", | |
" 'partial_fit_calls': 16,\n", | |
" 'score': 0.64604},\n", | |
" {'model_id': 3,\n", | |
" 'params': {'l1_ratio': 0.5495495495495496,\n", | |
" 'average': True,\n", | |
" 'alpha': 0.26510836019085376},\n", | |
" 'partial_fit_calls': 17,\n", | |
" 'score': 0.64607},\n", | |
" {'model_id': 3,\n", | |
" 'params': {'l1_ratio': 0.5495495495495496,\n", | |
" 'average': True,\n", | |
" 'alpha': 0.26510836019085376},\n", | |
" 'partial_fit_calls': 18,\n", | |
" 'score': 0.6459},\n", | |
" {'model_id': 3,\n", | |
" 'params': {'l1_ratio': 0.5495495495495496,\n", | |
" 'average': True,\n", | |
" 'alpha': 0.26510836019085376},\n", | |
" 'partial_fit_calls': 19,\n", | |
" 'score': 0.64594}]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"best = max(info, key=lambda k: info[k][-1]['score'])\n", | |
"info[best][-5:]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Successive halving" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import toolz\n", | |
"import math\n", | |
"\n", | |
"class SHA:\n", | |
" def __init__(self, n, r, eta=3):\n", | |
" self.steps = 0\n", | |
" self.n = n\n", | |
" self.r = r\n", | |
" self.eta = eta\n", | |
" \n", | |
" def fit(self, info):\n", | |
" n = self.n\n", | |
" r = self.r\n", | |
" eta = self.eta\n", | |
" \n", | |
" n_i = math.floor(n * eta ** -self.steps)\n", | |
" r_i = r * eta**self.steps\n", | |
" iters = {v[-1]['partial_fit_calls'] for v in info.values()}\n", | |
" if self.steps == 0:\n", | |
" self.steps = 1\n", | |
" return {k: r_i for k in info}\n", | |
" \n", | |
" best = toolz.topk(n_i // eta, info, key=lambda k: info[k][-1]['score'])\n", | |
"\n", | |
" if len(best) == 1:\n", | |
" self._best_arm = best\n", | |
" elif len(best) == 0:\n", | |
" [best] = self._best_arm\n", | |
" print({best: info[best][-1]['partial_fit_calls']})\n", | |
" return {best: 0}\n", | |
"\n", | |
" out = {k: r_i - info[k][-1]['partial_fit_calls']\n", | |
" for k in best}\n", | |
"\n", | |
" print(\"iters =\", iters)\n", | |
" print(out)\n", | |
" self.steps += 1\n", | |
" return out\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(81, 1)" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = SGDClassifier(tol=1e-3, penalty='elasticnet')\n", | |
"\n", | |
"params = {'alpha': np.logspace(-2, 1, num=1000),\n", | |
" 'l1_ratio': np.linspace(0, 1, num=1000),\n", | |
" 'average': [True, False]}\n", | |
"params = {'alpha': np.logspace(-2, 1, num=1000),\n", | |
" 'l1_ratio': np.linspace(0, 1, num=1000),\n", | |
" 'average': [True, False]}\n", | |
"\n", | |
"from sklearn.model_selection import ParameterSampler\n", | |
"\n", | |
"R = 81\n", | |
"eta = 3\n", | |
"# def hyperband(...):\n", | |
"s_max = math.floor(math.log(R, eta))\n", | |
"B = (s_max + 1) * R\n", | |
"# for s in [...]:\n", | |
"s = s_max # pick the most exploratory bracket\n", | |
"n = math.ceil(B / R * eta**s / (s + 1))\n", | |
"r = int(R * eta**-s)\n", | |
"n, r" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"iters = {1}\n", | |
"{46: 2, 8: 2, 36: 2, 28: 2, 64: 2, 24: 2, 21: 2, 27: 2, 40: 2}\n", | |
"iters = {3}\n", | |
"{8: 6, 24: 6, 36: 6}\n", | |
"iters = {9}\n", | |
"{24: 18}\n", | |
"{24: 27}\n" | |
] | |
} | |
], | |
"source": [ | |
"alg = SHA(n, r)\n", | |
"\n", | |
"params_list = list(ParameterSampler(params, n))\n", | |
"\n", | |
"from dask_ml.model_selection._incremental import fit\n", | |
"X_test = X.blocks[-1]\n", | |
"X = X.blocks[:-1]\n", | |
"y_test = y.blocks[-1]\n", | |
"y = y.blocks[:-1]\n", | |
"\n", | |
"info, models, history = fit(model, params_list, X, y, X_test, y_test,\n", | |
" alg.fit, {'classes': [0, 1]})\n", | |
"# all_models['hyperband'] = info" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'model_id': 27,\n", | |
" 'params': {'l1_ratio': 0.34634634634634637,\n", | |
" 'average': False,\n", | |
" 'alpha': 0.26510836019085376},\n", | |
" 'partial_fit_calls': 3,\n", | |
" 'score': 0.66754},\n", | |
" {'model_id': 24,\n", | |
" 'params': {'l1_ratio': 0.9159159159159159,\n", | |
" 'average': False,\n", | |
" 'alpha': 0.18891927762076663},\n", | |
" 'partial_fit_calls': 9,\n", | |
" 'score': 0.6679},\n", | |
" {'model_id': 8,\n", | |
" 'params': {'l1_ratio': 0.7927927927927928,\n", | |
" 'average': False,\n", | |
" 'alpha': 0.12563166024741207},\n", | |
" 'partial_fit_calls': 9,\n", | |
" 'score': 0.66776},\n", | |
" {'model_id': 36,\n", | |
" 'params': {'l1_ratio': 0.1931931931931932,\n", | |
" 'average': False,\n", | |
" 'alpha': 0.08890965989529158},\n", | |
" 'partial_fit_calls': 9,\n", | |
" 'score': 0.66755},\n", | |
" {'model_id': 24,\n", | |
" 'params': {'l1_ratio': 0.9159159159159159,\n", | |
" 'average': False,\n", | |
" 'alpha': 0.18891927762076663},\n", | |
" 'partial_fit_calls': 27,\n", | |
" 'score': 0.66742}]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"history[-5:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{0, 1, 3, 9, 27}" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"{v['partial_fit_calls'] for v in history}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment