Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save stas00/8f0b32d371a2c3ffb84c27fb44ec8688 to your computer and use it in GitHub Desktop.
Save stas00/8f0b32d371a2c3ffb84c27fb44ec8688 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Find max model / batch_size combo for GPU Ram"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"%matplotlib inline"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"import os\n",
"#os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from fastai.vision import *\n",
"from fastai.callbacks.mem import *\n",
"from fastai.callbacks.misc import StopAfterNBatches\n",
"from ipyexperiments import *\n",
"from ipyexperiments.utils.mem import *"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<torch._C.Generator at 0x7f9fd18cdaf0>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seed = 42\n",
"\n",
"# python RNG\n",
"random.seed(seed)\n",
"\n",
"# pytorch RNGs\n",
"import torch\n",
"torch.manual_seed(seed)\n",
"torch.backends.cudnn.deterministic = True\n",
"if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)\n",
"\n",
"# numpy RNG\n",
"import numpy as np\n",
"np.random.seed(seed)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"path = untar_data(URLs.PETS)\n",
"path_img = path/'images'\n",
"fnames = get_image_files(path_img)\n",
"pat = r'/([^/]+)_\\d+.jpg$'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"res_models = [models.resnet18,models.resnet34,models.resnet50,models.resnet101,models.resnet152]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"bs = 64\n",
"img_size = 64\n",
"data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs).normalize(imagenet_stats)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def try_run(bs):\n",
" arch = res_models[0]\n",
" learn = cnn_learner(data, arch)\n",
" learn.unfreeze()\n",
" learn.data.batch_size = bs\n",
" with progress_disabled(learn) as learn:\n",
" learn.fit(1)\n",
" del learn; gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def test_from_max_bs(max_bs:int=1024,r_factor:float=0.80,verbose:bool=True,memtrace=None)->int:\n",
" ''' Binary search for max batch size with given learner'''\n",
" if memtrace is None: memtrace = GPUMemTrace()\n",
" test_dict = dict()\n",
"\n",
" low,hi = 2,max_bs\n",
" \n",
" #first test low, if that does not fit, return\n",
" try:\n",
" try_run(bs=low)\n",
" out = low #this could be our best if the next loop fails\n",
" except RuntimeError as e:\n",
" if verbose: print(e,f'➜ tried bs:{bs}')\n",
" return None,test_dict #can't fit min\n",
" \n",
" bs = hi\n",
" while (hi-low) > 10: # larger margin is fine while experimenting - ends much faster\n",
" #learn.purge()\n",
" try:\n",
" try_run(bs=bs)\n",
" out,low = bs,bs\n",
" except RuntimeError as e:\n",
" if verbose: print(e,f'tried bs:{bs}')\n",
" hi = bs #bs is the new high value, keep cutting till we fail again.\n",
"\n",
" bs = (low + hi) // 2 \n",
" if bs == hi: bs = low\n",
" \n",
" print(f'➜ trying {bs}, lo:{low},hi:{hi}')\n",
" if verbose: memtrace.report(f'bs:{bs}')\n",
" test_dict[bs]=tuple(memtrace.data())\n",
" \n",
" return out,test_dict"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def do_the_magic(max_bs=32,verbose=True,memtrace=None):\n",
" defaults.extra_callbacks = [StopAfterNBatches(n_batches=1)]\n",
" out, bs_test_data = test_from_max_bs(max_bs=max_bs,verbose=verbose,memtrace=memtrace)\n",
" defaults.extra_callbacks = None\n",
" return out, bs_test_data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import fastai\n",
"import fastprogress\n",
"\n",
"class progress_disabled():\n",
" ''' Context manager to disable the progress update bar and Recorder print'''\n",
" def __init__(self,learner:Learner):\n",
" self.learn = learner\n",
" def __enter__(self):\n",
" #silence progress bar\n",
" fastprogress.fastprogress.NO_BAR = True\n",
" fastai.basic_train.master_bar, fastai.basic_train.progress_bar = fastprogress.force_console_behavior()\n",
" self.learn.callback_fns[0] = partial(Recorder,add_time=True) #,silent=True) #silence recorder\n",
" \n",
" return self.learn \n",
" def __exit__(self,type,value,traceback):\n",
" fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar,progress_bar\n",
" self.learn.callback_fns[0] = partial(Recorder,add_time=True)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def my_experiment(try_bs):\n",
"\n",
" free_mem = 1000\n",
" x1 = gpu_mem_leave_free_mbs(free_mem)\n",
"\n",
" got_bs,tested_bs_data = do_the_magic(max_bs=try_bs,verbose=True)\n",
" print(f\"\\n\\n*** Started with bs={try_bs}, got bs={got_bs} ***\\n\\n\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"*** Experiment started with the Pytorch backend\n",
"Device: ID 0, GeForce GTX 1070 Ti (8119 RAM)\n",
"\n",
"・ RAM: △Consumed △Peaked Used Total | Exec time 0:00:00.000\n",
"・ CPU: 0 0 1,897 MB |\n",
"・ GPU: 0 0 427 MB |\n"
]
}
],
"source": [
"exp = IPyExperimentsPytorch(exp_enable=False)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch train_loss valid_loss time \n",
"0 3.571518 4.885113 00:04 \n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 62.50 MiB (GPU 0; 7.93 GiB total capacity; 7.42 GiB already allocated; 30.56 MiB free; 1.75 MiB cached) tried bs:1000\n",
"➜ trying 501, lo:2,hi:1000\n",
"△Used Peaked MB: 970 0 (bs:501)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 4.00 MiB (GPU 0; 7.93 GiB total capacity; 7.44 GiB already allocated; 4.56 MiB free; 12.83 MiB cached) tried bs:501\n",
"➜ trying 251, lo:2,hi:501\n",
"△Used Peaked MB: 992 4 (bs:251)\n",
"epoch train_loss valid_loss time \n",
"0 4.141698 5.137645 00:09 \n",
"➜ trying 376, lo:251,hi:501\n",
"△Used Peaked MB: 112 884 (bs:376)\n",
"epoch train_loss valid_loss time \n",
"0 4.220157 4.747365 00:10 \n",
"➜ trying 438, lo:376,hi:501\n",
"△Used Peaked MB: 112 886 (bs:438)\n",
"epoch train_loss valid_loss time \n",
"0 4.133040 4.167121 00:11 \n",
"➜ trying 469, lo:438,hi:501\n",
"△Used Peaked MB: 112 886 (bs:469)\n",
"epoch train_loss valid_loss time \n",
"0 4.215500 5.591762 00:10 \n",
"➜ trying 485, lo:469,hi:501\n",
"△Used Peaked MB: 112 886 (bs:485)\n",
"epoch train_loss valid_loss time \n",
"0 4.240120 4.564884 00:10 \n",
"➜ trying 493, lo:485,hi:501\n",
"△Used Peaked MB: 112 886 (bs:493)\n",
"epoch train_loss valid_loss time \n",
"0 4.081361 4.604852 00:10 \n",
"➜ trying 497, lo:493,hi:501\n",
"△Used Peaked MB: 112 886 (bs:497)\n",
"\n",
"\n",
"*** Started with bs=1000, got bs=493 ***\n",
"\n",
"\n",
"\n",
"・ RAM: △Consumed △Peaked Used Total | Exec time 0:01:26.084\n",
"・ CPU: 0 0 2,185 MB |\n",
"・ GPU: 112 7,578 539 MB |\n"
]
}
],
"source": [
"my_experiment(try_bs=1000)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch train_loss valid_loss time \n",
"0 3.256937 5.645082 00:03 \n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 62.50 MiB (GPU 0; 7.93 GiB total capacity; 7.37 GiB already allocated; 34.56 MiB free; 1.78 MiB cached) tried bs:1000\n",
"➜ trying 501, lo:2,hi:1000\n",
"△Used Peaked MB: 966 0 (bs:501)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 125.25 MiB (GPU 0; 7.93 GiB total capacity; 6.87 GiB already allocated; 34.56 MiB free; 398.27 MiB cached) tried bs:501\n",
"➜ trying 251, lo:2,hi:501\n",
"△Used Peaked MB: 966 32 (bs:251)\n",
"epoch train_loss valid_loss time \n",
"0 4.323106 4.774740 00:08 \n",
"➜ trying 376, lo:251,hi:501\n",
"△Used Peaked MB: 298 700 (bs:376)\n",
"epoch train_loss valid_loss time \n",
"0 4.291206 4.536068 00:10 \n",
"➜ trying 438, lo:376,hi:501\n",
"△Used Peaked MB: 298 700 (bs:438)\n",
"epoch train_loss valid_loss time \n",
"0 4.250433 4.245210 00:12 \n",
"➜ trying 469, lo:438,hi:501\n",
"△Used Peaked MB: 298 700 (bs:469)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 9.00 MiB (GPU 0; 7.93 GiB total capacity; 7.43 GiB already allocated; 10.56 MiB free; 24.59 MiB cached) tried bs:469\n",
"➜ trying 453, lo:438,hi:469\n",
"△Used Peaked MB: 990 8 (bs:453)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 9.00 MiB (GPU 0; 7.93 GiB total capacity; 7.40 GiB already allocated; 6.56 MiB free; 32.97 MiB cached) tried bs:453\n",
"➜ trying 445, lo:438,hi:453\n",
"△Used Peaked MB: 994 4 (bs:445)\n",
"epoch train_loss valid_loss time \n",
"0 4.253715 4.268240 00:11 \n",
"➜ trying 449, lo:445,hi:453\n",
"△Used Peaked MB: 298 700 (bs:449)\n",
"\n",
"\n",
"*** Started with bs=1000, got bs=445 ***\n",
"\n",
"\n",
"\n",
"・ RAM: △Consumed △Peaked Used Total | Exec time 0:01:21.425\n",
"・ CPU: 0 0 2,246 MB |\n",
"・ GPU: 298 7,280 837 MB |\n"
]
}
],
"source": [
"my_experiment(try_bs=1000)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch train_loss valid_loss time \n",
"0 3.621640 4.942792 00:03 \n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 62.50 MiB (GPU 0; 7.93 GiB total capacity; 7.36 GiB already allocated; 6.56 MiB free; 83.15 MiB cached) tried bs:1000\n",
"➜ trying 501, lo:2,hi:1000\n",
"△Used Peaked MB: 994 0 (bs:501)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 125.25 MiB (GPU 0; 7.93 GiB total capacity; 6.77 GiB already allocated; 118.56 MiB free; 455.40 MiB cached) tried bs:501\n",
"➜ trying 251, lo:2,hi:501\n",
"△Used Peaked MB: 756 238 (bs:251)\n",
"epoch train_loss valid_loss time \n",
"0 4.253004 5.322557 00:07 \n",
"➜ trying 376, lo:251,hi:501\n",
"△Used Peaked MB: 0 994 (bs:376)\n",
"epoch train_loss valid_loss time \n",
"0 4.266721 4.913308 00:09 \n",
"➜ trying 438, lo:376,hi:501\n",
"△Used Peaked MB: 0 994 (bs:438)\n",
"epoch train_loss valid_loss time \n",
"0 4.189319 4.424547 00:11 \n",
"➜ trying 469, lo:438,hi:501\n",
"△Used Peaked MB: 0 994 (bs:469)\n",
"epoch train_loss valid_loss time \n",
"0 4.273203 4.379632 00:09 \n",
"➜ trying 485, lo:469,hi:501\n",
"△Used Peaked MB: 0 994 (bs:485)\n",
"epoch train_loss valid_loss time \n",
"0 4.198289 4.679458 00:10 \n",
"➜ trying 493, lo:485,hi:501\n",
"△Used Peaked MB: 0 994 (bs:493)\n",
"epoch train_loss valid_loss time \n",
"0 4.297161 4.539411 00:09 \n",
"➜ trying 497, lo:493,hi:501\n",
"△Used Peaked MB: 0 994 (bs:497)\n",
"\n",
"\n",
"*** Started with bs=1000, got bs=493 ***\n",
"\n",
"\n",
"\n",
"・ RAM: △Consumed △Peaked Used Total | Exec time 0:01:20.136\n",
"・ CPU: 0 0 2,247 MB |\n",
"・ GPU: 0 7,276 837 MB |\n"
]
}
],
"source": [
"my_experiment(try_bs=1000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment