Forked from bfarzin/prototype do_the_magic-inital_bs_test.ipynb
Last active
March 8, 2019 04:34
-
-
Save stas00/8f0b32d371a2c3ffb84c27fb44ec8688 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Find max model / batch_size combo for GPU Ram" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%reload_ext autoreload\n", | |
| "%autoreload 2\n", | |
| "%matplotlib inline" | |
| ] | |
| }, | |
| { | |
| "cell_type": "raw", | |
| "metadata": {}, | |
| "source": [ | |
| "import os\n", | |
| "#os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from fastai.vision import *\n", | |
| "from fastai.callbacks.mem import *\n", | |
| "from fastai.callbacks.misc import StopAfterNBatches\n", | |
| "from ipyexperiments import *\n", | |
| "from ipyexperiments.utils.mem import *" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<torch._C.Generator at 0x7f9fd18cdaf0>" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "seed = 42\n", | |
| "\n", | |
| "# python RNG\n", | |
| "random.seed(seed)\n", | |
| "\n", | |
| "# pytorch RNGs\n", | |
| "import torch\n", | |
| "torch.manual_seed(seed)\n", | |
| "torch.backends.cudnn.deterministic = True\n", | |
| "if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)\n", | |
| "\n", | |
| "# numpy RNG\n", | |
| "import numpy as np\n", | |
| "np.random.seed(seed)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "path = untar_data(URLs.PETS)\n", | |
| "path_img = path/'images'\n", | |
| "fnames = get_image_files(path_img)\n", | |
| "pat = r'/([^/]+)_\\d+.jpg$'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "res_models = [models.resnet18,models.resnet34,models.resnet50,models.resnet101,models.resnet152]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "bs = 64\n", | |
| "img_size = 64\n", | |
| "data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs).normalize(imagenet_stats)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def try_run(bs):\n", | |
| " arch = res_models[0]\n", | |
| " learn = cnn_learner(data, arch)\n", | |
| " learn.unfreeze()\n", | |
| " learn.data.batch_size = bs\n", | |
| " with progress_disabled(learn) as learn:\n", | |
| " learn.fit(1)\n", | |
| " del learn; gc.collect()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def test_from_max_bs(max_bs:int=1024,r_factor:float=0.80,verbose:bool=True,memtrace=None)->int:\n", | |
| " ''' Binary search for max batch size with given learner'''\n", | |
| " if memtrace is None: memtrace = GPUMemTrace()\n", | |
| " test_dict = dict()\n", | |
| "\n", | |
| " low,hi = 2,max_bs\n", | |
| " \n", | |
| " #first test low, if that does not fit, return\n", | |
| " try:\n", | |
| " try_run(bs=low)\n", | |
| " out = low #this could be our best if the next loop fails\n", | |
| " except RuntimeError as e:\n", | |
| " if verbose: print(e,f'➜ tried bs:{bs}')\n", | |
| " return None,test_dict #can't fit min\n", | |
| " \n", | |
| " bs = hi\n", | |
| " while (hi-low) > 10: # larger margin is fine while experimenting - ends much faster\n", | |
| " #learn.purge()\n", | |
| " try:\n", | |
| " try_run(bs=bs)\n", | |
| " out,low = bs,bs\n", | |
| " except RuntimeError as e:\n", | |
| " if verbose: print(e,f'tried bs:{bs}')\n", | |
| " hi = bs #bs is the new high value, keep cutting till we fail again.\n", | |
| "\n", | |
| " bs = (low + hi) // 2 \n", | |
| " if bs == hi: bs = low\n", | |
| " \n", | |
| " print(f'➜ trying {bs}, lo:{low},hi:{hi}')\n", | |
| " if verbose: memtrace.report(f'bs:{bs}')\n", | |
| " test_dict[bs]=tuple(memtrace.data())\n", | |
| " \n", | |
| " return out,test_dict" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def do_the_magic(max_bs=32,verbose=True,memtrace=None):\n", | |
| " defaults.extra_callbacks = [StopAfterNBatches(n_batches=1)]\n", | |
| " out, bs_test_data = test_from_max_bs(max_bs=max_bs,verbose=verbose,memtrace=memtrace)\n", | |
| " defaults.extra_callbacks = None\n", | |
| " return out, bs_test_data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import fastai\n", | |
| "import fastprogress\n", | |
| "\n", | |
| "class progress_disabled():\n", | |
| " ''' Context manager to disable the progress update bar and Recorder print'''\n", | |
| " def __init__(self,learner:Learner):\n", | |
| " self.learn = learner\n", | |
| " def __enter__(self):\n", | |
| " #silence progress bar\n", | |
| " fastprogress.fastprogress.NO_BAR = True\n", | |
| " fastai.basic_train.master_bar, fastai.basic_train.progress_bar = fastprogress.force_console_behavior()\n", | |
| " self.learn.callback_fns[0] = partial(Recorder,add_time=True) #,silent=True) #silence recorder\n", | |
| " \n", | |
| " return self.learn \n", | |
| " def __exit__(self,type,value,traceback):\n", | |
| " fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar,progress_bar\n", | |
| " self.learn.callback_fns[0] = partial(Recorder,add_time=True)\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def my_experiment(try_bs):\n", | |
| "\n", | |
| " free_mem = 1000\n", | |
| " x1 = gpu_mem_leave_free_mbs(free_mem)\n", | |
| "\n", | |
| " got_bs,tested_bs_data = do_the_magic(max_bs=try_bs,verbose=True)\n", | |
| " print(f\"\\n\\n*** Started with bs={try_bs}, got bs={got_bs} ***\\n\\n\\n\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "*** Experiment started with the Pytorch backend\n", | |
| "Device: ID 0, GeForce GTX 1070 Ti (8119 RAM)\n", | |
| "\n", | |
| "・ RAM: △Consumed △Peaked Used Total | Exec time 0:00:00.000\n", | |
| "・ CPU: 0 0 1,897 MB |\n", | |
| "・ GPU: 0 0 427 MB |\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "exp = IPyExperimentsPytorch(exp_enable=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "epoch train_loss valid_loss time \n", | |
| "0 3.571518 4.885113 00:04 \n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 62.50 MiB (GPU 0; 7.93 GiB total capacity; 7.42 GiB already allocated; 30.56 MiB free; 1.75 MiB cached) tried bs:1000\n", | |
| "➜ trying 501, lo:2,hi:1000\n", | |
| "△Used Peaked MB: 970 0 (bs:501)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 4.00 MiB (GPU 0; 7.93 GiB total capacity; 7.44 GiB already allocated; 4.56 MiB free; 12.83 MiB cached) tried bs:501\n", | |
| "➜ trying 251, lo:2,hi:501\n", | |
| "△Used Peaked MB: 992 4 (bs:251)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.141698 5.137645 00:09 \n", | |
| "➜ trying 376, lo:251,hi:501\n", | |
| "△Used Peaked MB: 112 884 (bs:376)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.220157 4.747365 00:10 \n", | |
| "➜ trying 438, lo:376,hi:501\n", | |
| "△Used Peaked MB: 112 886 (bs:438)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.133040 4.167121 00:11 \n", | |
| "➜ trying 469, lo:438,hi:501\n", | |
| "△Used Peaked MB: 112 886 (bs:469)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.215500 5.591762 00:10 \n", | |
| "➜ trying 485, lo:469,hi:501\n", | |
| "△Used Peaked MB: 112 886 (bs:485)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.240120 4.564884 00:10 \n", | |
| "➜ trying 493, lo:485,hi:501\n", | |
| "△Used Peaked MB: 112 886 (bs:493)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.081361 4.604852 00:10 \n", | |
| "➜ trying 497, lo:493,hi:501\n", | |
| "△Used Peaked MB: 112 886 (bs:497)\n", | |
| "\n", | |
| "\n", | |
| "*** Started with bs=1000, got bs=493 ***\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "・ RAM: △Consumed △Peaked Used Total | Exec time 0:01:26.084\n", | |
| "・ CPU: 0 0 2,185 MB |\n", | |
| "・ GPU: 112 7,578 539 MB |\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "my_experiment(try_bs=1000)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "epoch train_loss valid_loss time \n", | |
| "0 3.256937 5.645082 00:03 \n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 62.50 MiB (GPU 0; 7.93 GiB total capacity; 7.37 GiB already allocated; 34.56 MiB free; 1.78 MiB cached) tried bs:1000\n", | |
| "➜ trying 501, lo:2,hi:1000\n", | |
| "△Used Peaked MB: 966 0 (bs:501)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 125.25 MiB (GPU 0; 7.93 GiB total capacity; 6.87 GiB already allocated; 34.56 MiB free; 398.27 MiB cached) tried bs:501\n", | |
| "➜ trying 251, lo:2,hi:501\n", | |
| "△Used Peaked MB: 966 32 (bs:251)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.323106 4.774740 00:08 \n", | |
| "➜ trying 376, lo:251,hi:501\n", | |
| "△Used Peaked MB: 298 700 (bs:376)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.291206 4.536068 00:10 \n", | |
| "➜ trying 438, lo:376,hi:501\n", | |
| "△Used Peaked MB: 298 700 (bs:438)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.250433 4.245210 00:12 \n", | |
| "➜ trying 469, lo:438,hi:501\n", | |
| "△Used Peaked MB: 298 700 (bs:469)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 9.00 MiB (GPU 0; 7.93 GiB total capacity; 7.43 GiB already allocated; 10.56 MiB free; 24.59 MiB cached) tried bs:469\n", | |
| "➜ trying 453, lo:438,hi:469\n", | |
| "△Used Peaked MB: 990 8 (bs:453)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 9.00 MiB (GPU 0; 7.93 GiB total capacity; 7.40 GiB already allocated; 6.56 MiB free; 32.97 MiB cached) tried bs:453\n", | |
| "➜ trying 445, lo:438,hi:453\n", | |
| "△Used Peaked MB: 994 4 (bs:445)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.253715 4.268240 00:11 \n", | |
| "➜ trying 449, lo:445,hi:453\n", | |
| "△Used Peaked MB: 298 700 (bs:449)\n", | |
| "\n", | |
| "\n", | |
| "*** Started with bs=1000, got bs=445 ***\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "・ RAM: △Consumed △Peaked Used Total | Exec time 0:01:21.425\n", | |
| "・ CPU: 0 0 2,246 MB |\n", | |
| "・ GPU: 298 7,280 837 MB |\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "my_experiment(try_bs=1000)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "epoch train_loss valid_loss time \n", | |
| "0 3.621640 4.942792 00:03 \n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 62.50 MiB (GPU 0; 7.93 GiB total capacity; 7.36 GiB already allocated; 6.56 MiB free; 83.15 MiB cached) tried bs:1000\n", | |
| "➜ trying 501, lo:2,hi:1000\n", | |
| "△Used Peaked MB: 994 0 (bs:501)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "CUDA out of memory. Tried to allocate 125.25 MiB (GPU 0; 7.93 GiB total capacity; 6.77 GiB already allocated; 118.56 MiB free; 455.40 MiB cached) tried bs:501\n", | |
| "➜ trying 251, lo:2,hi:501\n", | |
| "△Used Peaked MB: 756 238 (bs:251)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.253004 5.322557 00:07 \n", | |
| "➜ trying 376, lo:251,hi:501\n", | |
| "△Used Peaked MB: 0 994 (bs:376)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.266721 4.913308 00:09 \n", | |
| "➜ trying 438, lo:376,hi:501\n", | |
| "△Used Peaked MB: 0 994 (bs:438)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.189319 4.424547 00:11 \n", | |
| "➜ trying 469, lo:438,hi:501\n", | |
| "△Used Peaked MB: 0 994 (bs:469)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.273203 4.379632 00:09 \n", | |
| "➜ trying 485, lo:469,hi:501\n", | |
| "△Used Peaked MB: 0 994 (bs:485)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.198289 4.679458 00:10 \n", | |
| "➜ trying 493, lo:485,hi:501\n", | |
| "△Used Peaked MB: 0 994 (bs:493)\n", | |
| "epoch train_loss valid_loss time \n", | |
| "0 4.297161 4.539411 00:09 \n", | |
| "➜ trying 497, lo:493,hi:501\n", | |
| "△Used Peaked MB: 0 994 (bs:497)\n", | |
| "\n", | |
| "\n", | |
| "*** Started with bs=1000, got bs=493 ***\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "・ RAM: △Consumed △Peaked Used Total | Exec time 0:01:20.136\n", | |
| "・ CPU: 0 0 2,247 MB |\n", | |
| "・ GPU: 0 7,276 837 MB |\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "my_experiment(try_bs=1000)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "hide_input": false, | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.2" | |
| }, | |
| "toc": { | |
| "base_numbering": 1, | |
| "nav_menu": {}, | |
| "number_sections": true, | |
| "sideBar": true, | |
| "skip_h1_title": false, | |
| "title_cell": "Table of Contents", | |
| "title_sidebar": "Contents", | |
| "toc_cell": false, | |
| "toc_position": {}, | |
| "toc_section_display": true, | |
| "toc_window_display": true | |
| }, | |
| "varInspector": { | |
| "cols": { | |
| "lenName": 16, | |
| "lenType": 16, | |
| "lenVar": 40 | |
| }, | |
| "kernels_config": { | |
| "python": { | |
| "delete_cmd_postfix": "", | |
| "delete_cmd_prefix": "del ", | |
| "library": "var_list.py", | |
| "varRefreshCmd": "print(var_dic_list())" | |
| }, | |
| "r": { | |
| "delete_cmd_postfix": ") ", | |
| "delete_cmd_prefix": "rm(", | |
| "library": "var_list.r", | |
| "varRefreshCmd": "cat(var_dic_list()) " | |
| } | |
| }, | |
| "types_to_exclude": [ | |
| "module", | |
| "function", | |
| "builtin_function_or_method", | |
| "instance", | |
| "_Feature" | |
| ], | |
| "window_display": false | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment