Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save bfarzin/dc27a4f8b91a8e43d84504294b9ecf7c to your computer and use it in GitHub Desktop.
Save bfarzin/dc27a4f8b91a8e43d84504294b9ecf7c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Find max model / batch_size combo for GPU Ram"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"%matplotlib inline"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"import os\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from fastai.vision import *\n",
"from fastai.callbacks.mem import *\n",
"from ipyexperiments import *\n",
"from ipyexperiments.utils.mem import *"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"seed = 42\n",
"\n",
"# python RNG\n",
"random.seed(seed)\n",
"\n",
"# pytorch RNGs\n",
"import torch\n",
"torch.manual_seed(seed)\n",
"torch.backends.cudnn.deterministic = True\n",
"if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)\n",
"\n",
"# numpy RNG\n",
"import numpy as np\n",
"np.random.seed(seed)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"path = untar_data(URLs.PETS)\n",
"path_img = path/'images'\n",
"fnames = get_image_files(path_img)\n",
"pat = r'/([^/]+)_\\d+.jpg$'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from fastai.callback import Callback\n",
"\n",
"class FitNBatch(Callback):\n",
" def __init__(self, n_batch:int=2): \n",
" self.stop,self.n_batch = False,n_batch\n",
" \n",
" def on_batch_end(self, iteration, **kwargs):\n",
" if iteration >= self.n_batch: return {'stop_epoch': True, 'stop_training': True}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def test_from_max_bs(learn,max_bs:int=1024,r_factor:float=0.80,verbose:bool=True,memtrace=None)->int:\n",
" ''' Binary search for max batch size with given learner'''\n",
" if memtrace is None: memtrace = GPUMemTrace()\n",
" test_dict = dict()\n",
"\n",
" low,hi = 2,max_bs\n",
" \n",
" #first test low, if that does not fit, return\n",
" try:\n",
" learn.data.batch_size = low\n",
" learn.fit(1)\n",
" out = low #this could be our best if the next loop fails\n",
" except RuntimeError as e:\n",
" if verbose: print(e,f'tried bs:{bs}')\n",
" return None,test_dict #can't fit min\n",
" \n",
" bs = hi\n",
" while (hi-low) > 1:\n",
" try:\n",
" learn.data.batch_size = bs\n",
" learn.fit(1)\n",
" out,low = bs,bs\n",
" except RuntimeError as e:\n",
" if verbose: print(e,f'tried bs:{bs}')\n",
" hi = bs #bs is the new high value, keep cutting till we fail again.\n",
"\n",
" bs = (low + hi) // 2 \n",
" if bs == hi: bs = low\n",
" \n",
" print(f'trying {bs}, lo:{low},hi:{hi}')\n",
" if verbose: memtrace.report(f'bs:{bs}')\n",
" test_dict[bs]=tuple(memtrace.data())\n",
" \n",
" return out,test_dict"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def do_the_magic(learn,max_bs=32,verbose=True,memtrace=None):\n",
" defaults.extra_callbacks = [FitNBatch()]\n",
" bs_test_data = test_from_max_bs(learn,max_bs=max_bs,verbose=verbose,memtrace=memtrace)\n",
" defaults.extra_callbacks = None\n",
" return bs_test_data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"res_models = [models.resnet18,models.resnet34,models.resnet50,models.resnet101,models.resnet152]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"bs = 64"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import fastai\n",
"import fastprogress\n",
"\n",
"class progress_diabled():\n",
" ''' Context manager to disable the progress update bar and Recorder print'''\n",
" def __init__(self,learner:Learner):\n",
" self.learn = learner\n",
" def __enter__(self):\n",
" #silence progress bar\n",
" fastprogress.fastprogress.NO_BAR = True\n",
" fastai.basic_train.master_bar, fastai.basic_train.progress_bar = fastprogress.force_console_behavior()\n",
" self.learn.callback_fns[0] = partial(Recorder,add_time=True) #,silent=True) #silence recorder\n",
" \n",
" return self.learn \n",
" def __exit__(self,type,value,traceback):\n",
" fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar,progress_bar\n",
" self.learn.callback_fns[0] = partial(Recorder,add_time=True)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"*** Experiment started with the Pytorch backend\n",
"Device: ID 0, TITAN Xp (12194 RAM)\n",
"\n",
"\n",
"*** Current state:\n",
"RAM: Used Free Total Util\n",
"CPU: 2,261 109,691 128,856 MB 1.76% \n",
"GPU: 577 11,617 12,194 MB 4.73% \n",
"\n",
"\n",
"epoch train_loss valid_loss time \n",
"1 3.839389 2.447133 00:02 \n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 11.91 GiB total capacity; 10.73 GiB already allocated; 595.06 MiB free; 3.81 MiB cached) tried bs:4000\n",
"trying 2001, lo:2,hi:4000\n",
"△Used Peaked MB: 1,348 466 (bs:2001)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 500.25 MiB (GPU 0; 11.91 GiB total capacity; 10.82 GiB already allocated; 501.06 MiB free; 3.80 MiB cached) tried bs:2001\n",
"trying 1001, lo:2,hi:2001\n",
"△Used Peaked MB: 254 1,560 (bs:1001)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 7.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:1001\n",
"trying 501, lo:2,hi:1001\n",
"△Used Peaked MB: 1,940 0 (bs:501)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 23.50 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:501\n",
"trying 251, lo:2,hi:501\n",
"△Used Peaked MB: 1,940 0 (bs:251)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 11.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:251\n",
"trying 126, lo:2,hi:251\n",
"△Used Peaked MB: 1,940 0 (bs:126)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 6.00 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:126\n",
"trying 64, lo:2,hi:126\n",
"△Used Peaked MB: 1,940 0 (bs:64)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 3.00 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:64\n",
"trying 33, lo:2,hi:64\n",
"△Used Peaked MB: 1,940 0 (bs:33)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 8.25 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:33\n",
"trying 17, lo:2,hi:33\n",
"△Used Peaked MB: 1,940 0 (bs:17)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 4.25 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.25 MiB cached) tried bs:17\n",
"trying 9, lo:2,hi:17\n",
"△Used Peaked MB: 1,940 0 (bs:9)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 2.62 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 5.34 MiB cached) tried bs:9\n",
"trying 5, lo:2,hi:9\n",
"△Used Peaked MB: 1,940 0 (bs:5)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 2.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 7.56 MiB cached) tried bs:5\n",
"trying 3, lo:2,hi:5\n",
"△Used Peaked MB: 1,940 0 (bs:3)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 9.00 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 6.81 MiB cached) tried bs:3\n",
"trying 2, lo:2,hi:3\n",
"△Used Peaked MB: 1,940 0 (bs:2)\n",
"・ RAM: △Consumed △Peaked Used Total | Exec time 0:02:34.515\n",
"・ CPU: 2 2 2,385 MB |\n",
"・ GPU: 9,834 1,780 10,411 MB |\n",
"\n",
"IPyExperimentsPytorch: Finishing\n",
"\n",
"*** Experiment finished in 00:02:34 (elapsed wallclock time)\n",
"\n",
"*** Newly defined local variables:\n",
"Deleted: data, learn, x1\n",
"Kept: max_bs, tested_bs_data\n",
"\n",
"*** Circular ref objects gc collected during the experiment:\n",
"cleared 133 objects (only temporary leakage)\n",
"\n",
"*** Experiment memory:\n",
"RAM: Consumed Reclaimed\n",
"CPU: 124 89 MB ( 71.65%)\n",
"GPU: 9,834 9,818 MB ( 99.84%)\n",
"\n",
"*** Current state:\n",
"RAM: Used Free Total Util\n",
"CPU: 2,296 109,667 128,856 MB 1.78% \n",
"GPU: 593 11,601 12,194 MB 4.86% \n",
"\n",
"\n"
]
},
{
"data": {
"text/plain": [
"22"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arch = res_models[0]\n",
"max_mem = 2000\n",
"img_size = 64\n",
"\n",
"exp = IPyExperimentsPytorch()\n",
"x1 = gpu_mem_leave_free_mbs(max_mem)\n",
"data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
" ).normalize(imagenet_stats)\n",
"learn = create_cnn(data, arch)\n",
"learn.unfreeze()\n",
"with progress_diabled(learn) as learn:\n",
" max_bs,tested_bs_data = do_the_magic(learn,max_bs=4000,verbose=True)\n",
"\n",
"exp.keep_var_names('max_bs','tested_bs_data')\n",
"del exp\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"*** Experiment started with the Pytorch backend\n",
"Device: ID 0, TITAN Xp (12194 RAM)\n",
"\n",
"\n",
"*** Current state:\n",
"RAM: Used Free Total Util\n",
"CPU: 2,297 109,667 128,856 MB 1.78% \n",
"GPU: 593 11,601 12,194 MB 4.86% \n",
"\n",
"\n",
"epoch train_loss valid_loss time \n",
"1 3.830230 3.242904 00:02 \n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 11.91 GiB total capacity; 10.71 GiB already allocated; 619.06 MiB free; 9.31 MiB cached) tried bs:4000\n",
"trying 2001, lo:2,hi:4000\n",
"△Used Peaked MB: 1,330 460 (bs:2001)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 500.25 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 23.06 MiB free; 9.30 MiB cached) tried bs:2001\n",
"trying 1001, lo:2,hi:2001\n",
"△Used Peaked MB: 738 1,188 (bs:1001)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 62.62 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 21.06 MiB free; 9.31 MiB cached) tried bs:1001\n",
"trying 501, lo:2,hi:1001\n",
"△Used Peaked MB: 1,928 0 (bs:501)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 23.50 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 21.06 MiB free; 9.31 MiB cached) tried bs:501\n",
"trying 251, lo:2,hi:501\n",
"△Used Peaked MB: 1,928 0 (bs:251)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 11.88 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 9.06 MiB free; 9.31 MiB cached) tried bs:251\n",
"trying 126, lo:2,hi:251\n",
"△Used Peaked MB: 1,940 0 (bs:126)\n",
"epoch train_loss valid_loss time \n",
"CUDA out of memory. Tried to allocate 6.00 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 15.19 MiB cached) tried bs:126\n",
"trying 64, lo:2,hi:126\n",
"△Used Peaked MB: 1,940 6 (bs:64)\n",
"epoch train_loss valid_loss time \n",
"1 4.269319 3.768133 00:07 \n",
"trying 95, lo:64,hi:126\n",
"△Used Peaked MB: 142 1,804 (bs:95)\n",
"epoch train_loss valid_loss time \n",
"1 3.999418 3.658793 00:08 \n",
"trying 110, lo:95,hi:126\n",
"△Used Peaked MB: 142 1,804 (bs:110)\n",
"epoch train_loss valid_loss time \n",
"1 3.807487 3.473353 00:09 \n",
"trying 118, lo:110,hi:126\n",
"△Used Peaked MB: 142 1,804 (bs:118)\n",
"epoch train_loss valid_loss time \n",
"1 3.646755 3.263668 00:10 \n",
"trying 122, lo:118,hi:126\n",
"△Used Peaked MB: 142 1,804 (bs:122)\n",
"epoch train_loss valid_loss time \n",
"1 3.469121 3.124530 00:10 \n",
"trying 124, lo:122,hi:126\n",
"△Used Peaked MB: 142 1,804 (bs:124)\n",
"epoch train_loss valid_loss time \n",
"1 3.436059 2.996242 00:10 \n",
"trying 125, lo:124,hi:126\n",
"△Used Peaked MB: 142 1,804 (bs:125)\n",
"epoch train_loss valid_loss time \n",
"1 3.171064 2.925839 00:11 \n",
"trying 125, lo:125,hi:126\n",
"△Used Peaked MB: 142 1,804 (bs:125)\n",
"・ RAM: △Consumed △Peaked Used Total | Exec time 0:03:38.286\n",
"・ CPU: 0 3 2,386 MB |\n",
"・ GPU: 9,794 1,804 10,387 MB |\n",
"\n",
"IPyExperimentsPytorch: Finishing\n",
"\n",
"*** Experiment finished in 00:03:38 (elapsed wallclock time)\n",
"\n",
"*** Newly defined local variables:\n",
"Deleted: data, learn, x1\n",
"Kept: max_bs, tested_bs_data\n",
"\n",
"*** Circular ref objects gc collected during the experiment:\n",
"cleared 55 objects (only temporary leakage)\n",
"\n",
"*** Experiment memory:\n",
"RAM: Consumed Reclaimed\n",
"CPU: 89 89 MB ( 99.74%)\n",
"GPU: 9,794 9,794 MB (100.00%)\n",
"\n",
"*** Current state:\n",
"RAM: Used Free Total Util\n",
"CPU: 2,297 109,660 128,856 MB 1.78% \n",
"GPU: 593 11,601 12,194 MB 4.86% \n",
"\n",
"\n"
]
},
{
"data": {
"text/plain": [
"22"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exp = IPyExperimentsPytorch()\n",
"x1 = gpu_mem_leave_free_mbs(max_mem)\n",
"data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
" ).normalize(imagenet_stats)\n",
"learn = create_cnn(data, arch)\n",
"learn.unfreeze()\n",
"with progress_diabled(learn) as learn:\n",
" max_bs,tested_bs_data = do_the_magic(learn,max_bs=4000,verbose=True)\n",
"\n",
"exp.keep_var_names('max_bs','tested_bs_data')\n",
"del exp\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"*** Experiment started with the Pytorch backend\n",
"Device: ID 0, TITAN Xp (12194 RAM)\n",
"\n",
"\n",
"*** Current state:\n",
"RAM: Used Free Total Util\n",
"CPU: 2,297 109,660 128,856 MB 1.78% \n",
"GPU: 593 11,601 12,194 MB 4.86% \n",
"\n",
"\n",
"epoch train_loss valid_loss time \n",
"1 4.184824 5.174956 00:01 \n",
"epoch train_loss valid_loss time \n",
"1 4.089300 3.800783 00:28 \n",
"trying 800, lo:800,hi:800\n",
"△Used Peaked MB: 142 1,686 (bs:800)\n"
]
},
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"・ RAM: △Consumed △Peaked Used Total | Exec time 0:00:37.295\n",
"・ CPU: 0 114 2,388 MB |\n",
"・ GPU: 9,794 1,686 10,387 MB |\n",
"\n",
"IPyExperimentsPytorch: Finishing\n",
"\n",
"*** Experiment finished in 00:00:37 (elapsed wallclock time)\n",
"\n",
"*** Newly defined local variables:\n",
"Deleted: data, learn, x1\n",
"Kept: max_bs, tested_bs_data\n",
"\n",
"*** Circular ref objects gc collected during the experiment:\n",
"cleared 55 objects (only temporary leakage)\n",
"\n",
"*** Experiment memory:\n",
"RAM: Consumed Reclaimed\n",
"CPU: 90 90 MB ( 99.23%)\n",
"GPU: 9,794 9,794 MB (100.00%)\n"
]
}
],
"source": [
"exp = IPyExperimentsPytorch()\n",
"x1 = gpu_mem_leave_free_mbs(max_mem)\n",
"data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
" ).normalize(imagenet_stats)\n",
"learn = create_cnn(data, arch)\n",
"learn.unfreeze()\n",
"with progress_diabled(learn) as learn:\n",
" max_bs,tested_bs_data = do_the_magic(learn,max_bs=800,verbose=True)\n",
"\n",
"exp.keep_var_names('max_bs','tested_bs_data')\n",
"del exp\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7 fasta.ai1 DEV",
"language": "python",
"name": "fastai1_dev"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment