Last active
July 28, 2018 02:42
-
-
Save stsievert/facdf6c1427c810cafecd30848fe20d9 to your computer and use it in GitHub Desktop.
Successive Halving with _incremental.fit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Incremental Model Selection\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "\n", | |
| "from sklearn.linear_model import SGDClassifier\n", | |
| "\n", | |
| "import dask\n", | |
| "from dask.distributed import Client\n", | |
| "from dask_ml.datasets import make_classification\n", | |
| "from dask_ml.model_selection._incremental import fit" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table style=\"border: 2px solid white;\">\n", | |
| "<tr>\n", | |
| "<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
| "<h3>Client</h3>\n", | |
| "<ul>\n", | |
| " <li><b>Scheduler: </b>tcp://127.0.0.1:56260\n", | |
| " <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n", | |
| "</ul>\n", | |
| "</td>\n", | |
| "<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
| "<h3>Cluster</h3>\n", | |
| "<ul>\n", | |
| " <li><b>Workers: </b>8</li>\n", | |
| " <li><b>Cores: </b>8</li>\n", | |
| " <li><b>Memory: </b>17.18 GB</li>\n", | |
| "</ul>\n", | |
| "</td>\n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<Client: scheduler='tcp://127.0.0.1:56260' processes=8 cores=8>" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "client = Client(processes=True)\n", | |
| "client" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Make data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X, y = make_classification(n_samples=5000000, n_features=20,\n", | |
| " chunks=100000)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Incremental.fit" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X, y = make_classification(n_samples=5000000, n_features=20,\n", | |
| " chunks=100000)\n", | |
| "model = SGDClassifier(tol=1e-3, penalty='elasticnet')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "max_iter = 100\n", | |
| "patience = 10\n", | |
| "tol = 0.001\n", | |
| "\n", | |
| "def adapt(info):\n", | |
| " [info] = info.values()\n", | |
| " if max_iter is not None and len(info) > max_iter:\n", | |
| " return {0: 0}\n", | |
| "\n", | |
| " if len(info) > patience:\n", | |
| " old = info[-patience]['score']\n", | |
| " if all(d['score'] < old + tol for d in info[-patience:]):\n", | |
| " return {0: 0}\n", | |
| "\n", | |
| " return {0: 1}\n", | |
| "\n", | |
| "from dask_ml.model_selection._incremental import fit\n", | |
| "X_test = X.blocks[-1]\n", | |
| "X = X.blocks[:-1]\n", | |
| "y_test = y.blocks[-1]\n", | |
| "y = y.blocks[:-1]\n", | |
| "info, models, history = fit(model, [{}], X, y, X_test, y_test, adapt,\n", | |
| " {'classes': [0, 1]})\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'model_id': 0, 'params': {}, 'partial_fit_calls': 19, 'score': 0.58955},\n", | |
| " {'model_id': 0, 'params': {}, 'partial_fit_calls': 20, 'score': 0.58888},\n", | |
| " {'model_id': 0, 'params': {}, 'partial_fit_calls': 21, 'score': 0.59109},\n", | |
| " {'model_id': 0, 'params': {}, 'partial_fit_calls': 22, 'score': 0.58677},\n", | |
| " {'model_id': 0, 'params': {}, 'partial_fit_calls': 23, 'score': 0.58385}]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "info[0][-5:]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### RandomSearch" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "all_models = {}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 1, 1: 1, 2: 1, 3: 1, 4: 1}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 1, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 1, 2: 0, 3: 0, 4: 0}\n", | |
| "{0: 0, 1: 0, 2: 0, 3: 0, 4: 0}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "max_iter = 81\n", | |
| "patience = 10\n", | |
| "tol = 0.001\n", | |
| "\n", | |
| "def adapt(info):\n", | |
| " out = {}\n", | |
| " for ident, records in info.items():\n", | |
| " if max_iter is not None and len(records) > max_iter:\n", | |
| " out[ident] = 0\n", | |
| "\n", | |
| " elif len(records) > patience:\n", | |
| " old = records[-patience]['score']\n", | |
| " if all(d['score'] < old + tol for d in records[-patience:]):\n", | |
| " out[ident] = 0\n", | |
| " else:\n", | |
| " out[ident] = 1\n", | |
| " \n", | |
| " else:\n", | |
| " out[ident] = 1\n", | |
| " print(out)\n", | |
| " return out\n", | |
| "\n", | |
| "X, y = make_classification(n_samples=5000000, n_features=20,\n", | |
| " chunks=100000)\n", | |
| "model = SGDClassifier(tol=1e-3, penalty='elasticnet')\n", | |
| "\n", | |
| "params = {'alpha': np.logspace(-2, 1, num=1000),\n", | |
| " 'l1_ratio': np.linspace(0, 1, num=1000),\n", | |
| " 'average': [True, False]}\n", | |
| "\n", | |
| "from sklearn.model_selection import ParameterSampler\n", | |
| "params_list = list(ParameterSampler(params, 5))\n", | |
| "\n", | |
| "from dask_ml.model_selection._incremental import fit\n", | |
| "X_test = X.blocks[-1]\n", | |
| "X = X.blocks[:-1]\n", | |
| "y_test = y.blocks[-1]\n", | |
| "y = y.blocks[:-1]\n", | |
| "info, models, history = fit(model, params_list, X, y, X_test, y_test, adapt,\n", | |
| " {'classes': [0, 1]})\n", | |
| "all_models['random'] = info" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'model_id': 3,\n", | |
| " 'params': {'l1_ratio': 0.5495495495495496,\n", | |
| " 'average': True,\n", | |
| " 'alpha': 0.26510836019085376},\n", | |
| " 'partial_fit_calls': 15,\n", | |
| " 'score': 0.64588},\n", | |
| " {'model_id': 3,\n", | |
| " 'params': {'l1_ratio': 0.5495495495495496,\n", | |
| " 'average': True,\n", | |
| " 'alpha': 0.26510836019085376},\n", | |
| " 'partial_fit_calls': 16,\n", | |
| " 'score': 0.64604},\n", | |
| " {'model_id': 3,\n", | |
| " 'params': {'l1_ratio': 0.5495495495495496,\n", | |
| " 'average': True,\n", | |
| " 'alpha': 0.26510836019085376},\n", | |
| " 'partial_fit_calls': 17,\n", | |
| " 'score': 0.64607},\n", | |
| " {'model_id': 3,\n", | |
| " 'params': {'l1_ratio': 0.5495495495495496,\n", | |
| " 'average': True,\n", | |
| " 'alpha': 0.26510836019085376},\n", | |
| " 'partial_fit_calls': 18,\n", | |
| " 'score': 0.6459},\n", | |
| " {'model_id': 3,\n", | |
| " 'params': {'l1_ratio': 0.5495495495495496,\n", | |
| " 'average': True,\n", | |
| " 'alpha': 0.26510836019085376},\n", | |
| " 'partial_fit_calls': 19,\n", | |
| " 'score': 0.64594}]" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "best = max(info, key=lambda k: info[k][-1]['score'])\n", | |
| "info[best][-5:]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Successive halving" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import toolz\n", | |
| "import math\n", | |
| "\n", | |
| "class SHA:\n", | |
| " def __init__(self, n, r, eta=3):\n", | |
| " self.steps = 0\n", | |
| " self.n = n\n", | |
| " self.r = r\n", | |
| " self.eta = eta\n", | |
| " \n", | |
| " def fit(self, info):\n", | |
| " n = self.n\n", | |
| " r = self.r\n", | |
| " eta = self.eta\n", | |
| " \n", | |
| " n_i = math.floor(n * eta ** -self.steps)\n", | |
| " r_i = r * eta**self.steps\n", | |
| " iters = {v[-1]['partial_fit_calls'] for v in info.values()}\n", | |
| " if self.steps == 0:\n", | |
| " self.steps = 1\n", | |
| " return {k: r_i for k in info}\n", | |
| " \n", | |
| " best = toolz.topk(n_i // eta, info, key=lambda k: info[k][-1]['score'])\n", | |
| "\n", | |
| " if len(best) == 1:\n", | |
| " self._best_arm = best\n", | |
| " elif len(best) == 0:\n", | |
| " [best] = self._best_arm\n", | |
| " print({best: info[best][-1]['partial_fit_calls']})\n", | |
| " return {best: 0}\n", | |
| "\n", | |
| " out = {k: r_i - info[k][-1]['partial_fit_calls']\n", | |
| " for k in best}\n", | |
| "\n", | |
| " print(\"iters =\", iters)\n", | |
| " print(out)\n", | |
| " self.steps += 1\n", | |
| " return out\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(81, 1)" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model = SGDClassifier(tol=1e-3, penalty='elasticnet')\n", | |
| "\n", | |
| "params = {'alpha': np.logspace(-2, 1, num=1000),\n", | |
| " 'l1_ratio': np.linspace(0, 1, num=1000),\n", | |
| " 'average': [True, False]}\n", | |
| "params = {'alpha': np.logspace(-2, 1, num=1000),\n", | |
| " 'l1_ratio': np.linspace(0, 1, num=1000),\n", | |
| " 'average': [True, False]}\n", | |
| "\n", | |
| "from sklearn.model_selection import ParameterSampler\n", | |
| "\n", | |
| "R = 81\n", | |
| "eta = 3\n", | |
| "# def hyperband(...):\n", | |
| "s_max = math.floor(math.log(R, eta))\n", | |
| "B = (s_max + 1) * R\n", | |
| "# for s in [...]:\n", | |
| "s = s_max # pick the most exploratory bracket\n", | |
| "n = math.ceil(B / R * eta**s / (s + 1))\n", | |
| "r = int(R * eta**-s)\n", | |
| "n, r" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "iters = {1}\n", | |
| "{46: 2, 8: 2, 36: 2, 28: 2, 64: 2, 24: 2, 21: 2, 27: 2, 40: 2}\n", | |
| "iters = {3}\n", | |
| "{8: 6, 24: 6, 36: 6}\n", | |
| "iters = {9}\n", | |
| "{24: 18}\n", | |
| "{24: 27}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "alg = SHA(n, r)\n", | |
| "\n", | |
| "params_list = list(ParameterSampler(params, n))\n", | |
| "\n", | |
| "from dask_ml.model_selection._incremental import fit\n", | |
| "X_test = X.blocks[-1]\n", | |
| "X = X.blocks[:-1]\n", | |
| "y_test = y.blocks[-1]\n", | |
| "y = y.blocks[:-1]\n", | |
| "\n", | |
| "info, models, history = fit(model, params_list, X, y, X_test, y_test,\n", | |
| " alg.fit, {'classes': [0, 1]})\n", | |
| "# all_models['hyperband'] = info" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'model_id': 27,\n", | |
| " 'params': {'l1_ratio': 0.34634634634634637,\n", | |
| " 'average': False,\n", | |
| " 'alpha': 0.26510836019085376},\n", | |
| " 'partial_fit_calls': 3,\n", | |
| " 'score': 0.66754},\n", | |
| " {'model_id': 24,\n", | |
| " 'params': {'l1_ratio': 0.9159159159159159,\n", | |
| " 'average': False,\n", | |
| " 'alpha': 0.18891927762076663},\n", | |
| " 'partial_fit_calls': 9,\n", | |
| " 'score': 0.6679},\n", | |
| " {'model_id': 8,\n", | |
| " 'params': {'l1_ratio': 0.7927927927927928,\n", | |
| " 'average': False,\n", | |
| " 'alpha': 0.12563166024741207},\n", | |
| " 'partial_fit_calls': 9,\n", | |
| " 'score': 0.66776},\n", | |
| " {'model_id': 36,\n", | |
| " 'params': {'l1_ratio': 0.1931931931931932,\n", | |
| " 'average': False,\n", | |
| " 'alpha': 0.08890965989529158},\n", | |
| " 'partial_fit_calls': 9,\n", | |
| " 'score': 0.66755},\n", | |
| " {'model_id': 24,\n", | |
| " 'params': {'l1_ratio': 0.9159159159159159,\n", | |
| " 'average': False,\n", | |
| " 'alpha': 0.18891927762076663},\n", | |
| " 'partial_fit_calls': 27,\n", | |
| " 'score': 0.66742}]" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "history[-5:]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{0, 1, 3, 9, 27}" | |
| ] | |
| }, | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "{v['partial_fit_calls'] for v in history}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.5" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment