Skip to content

Instantly share code, notes, and snippets.

@PiotrCzapla
Last active December 9, 2022 14:22
Show Gist options
  • Save PiotrCzapla/90c446929d2d699a8fdce65bc8a95f38 to your computer and use it in GitHub Desktop.
Save PiotrCzapla/90c446929d2d699a8fdce65bc8a95f38 to your computer and use it in GitHub Desktop.
Clean up a memory leak after unhandled exception in jupyter notebooks.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "65ec7d4d-f3e0-4d08-8258-02d70e79befc",
"metadata": {},
"outputs": [],
"source": [
"!pip install -Uqq fastai fastcore kaggle\n",
"!pip install -Uqq 'timm>=0.6.2.dev0' pynvml \n",
"!pip install -Uq objgraph\n",
"!pip install -Uq fastkaggle"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a67420a1",
"metadata": {},
"outputs": [],
"source": [
"interactive=False"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c0539d9a-0007-4073-908e-88b0163dbaf8",
"metadata": {},
"outputs": [],
"source": [
"from fastkaggle import *"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "52c62e9f-81e2-4212-9e75-bc9666586040",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fri Dec 9 13:47:56 2022 \n",
"+-----------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 515.65.01 Driver Version: 515.65.01 CUDA Version: 11.7 |\n",
"|-------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|===============================+======================+======================|\n",
"| 0 NVIDIA GeForce ... Off | 00000000:06:00.0 Off | N/A |\n",
"| 18% 41C P0 43W / 250W | 0MiB / 11264MiB | 0% Default |\n",
"| | | N/A |\n",
"+-------------------------------+----------------------+----------------------+\n",
" \n",
"+-----------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=============================================================================|\n",
"| No running processes found |\n",
"+-----------------------------------------------------------------------------+\n"
]
}
],
"source": [
"!nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "473d8626-0d50-4acd-bada-25dcd2c6ba2d",
"metadata": {},
"outputs": [],
"source": [
"import kaggle\n",
"import timm"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "00ebb94d-c325-4772-814a-6deb01a30626",
"metadata": {},
"outputs": [],
"source": [
"comp = 'paddy-disease-classification'\n",
"path = setup_comp(comp, install='fastai \"timm>=0.6.2.dev0\"')\n",
"from fastai.vision.all import *\n",
"set_seed(42)\n",
"\n",
"tst_files = get_image_files(path/'test_images').sorted()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "56b32cf8-82e1-4214-b9ee-abf0f912d0ba",
"metadata": {},
"outputs": [],
"source": [
"trn_path = path/'train_images_min'"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "79d01af4",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"for v in (path/'train_images').glob('*'):\n",
" dst = trn_path.absolute()/v.name\n",
" dst.mkdir(exist_ok=True, parents=True)\n",
" for p in list(sorted(v.glob('*')))[:20]:\n",
" f = dst/p.name\n",
" if not f.exists(): \n",
" f.symlink_to(p.absolute())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e39a463d-ead3-4698-b5c4-8975d867c7cd",
"metadata": {},
"outputs": [],
"source": [
"def broken_error_rate(inp, targ, axis=-1):\n",
" return 1/0\n",
"\n",
"def train(arch, accum=1, raise_exception=False, cbs=[]):\n",
" epochs=1; bs=96; size=192; val_seed=42\n",
" dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.3, item_tfms=Resize(480, method='squish'),\n",
" splitter=RandomSplitter(0.3, seed=val_seed),\n",
" batch_tfms=aug_transforms(size=size, min_scale=0.75), bs=bs//accum)\n",
" cbs = cbs + [GradientAccumulation(bs)]\n",
" metrics = broken_error_rate if raise_exception else error_rate\n",
" learn = vision_learner(dls, arch, metrics=metrics, cbs=cbs).to_fp16()\n",
" lr = 0.01\n",
" learn.unfreeze()\n",
" learn.fit_one_cycle(epochs, lr)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "06857628",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Counter()\n",
"Reserved mem in use: 0 MB\n"
]
}
],
"source": [
"import sys,gc\n",
"import objgraph\n",
"def print_tensor_count(message=\"\"):\n",
" gc.collect()\n",
" torch.cuda.empty_cache()\n",
" if message:\n",
" print(message)\n",
" print(collections.Counter(v.device.type if v.numel() else 'empty' for v in objgraph.by_type('torch.Tensor')))\n",
" print(\"Reserved mem in use:\", torch.cuda.memory_stats().get('reserved_bytes.all.current',0)//1024**2, \"MB\")\n",
"print_tensor_count()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8664bf5b",
"metadata": {},
"outputs": [],
"source": [
"def train_safe(arch='convnext_large', accum=4,**kwargs): \n",
" def execute():\n",
" print(f'Using {arch}, with accum={accum}')\n",
" try: \n",
" train(arch, accum=accum, **kwargs)\n",
" return True\n",
" except Exception as e:\n",
" print('Exception', e)\n",
" if not execute():\n",
" gc.collect()\n",
" torch.cuda.empty_cache()\n",
" gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "7826a643",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"to ensure we don't have any tensors yet\n",
"Counter()\n",
"Reserved mem in use: 0 MB\n"
]
}
],
"source": [
"print_tensor_count(\"to ensure we don't have any tensors yet\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "1fc4c2ff",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using convnext_large, with accum=4\n"
]
},
{
"data": {
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>error_rate</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>3.980382</td>\n",
" <td>11.090388</td>\n",
" <td>0.900000</td>\n",
" <td>00:02</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"train_safe() # this should pass"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "d590a5cb",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Do we have any tensors hanging around?\n",
"Counter({'cuda': 9})\n",
"Reserved mem in use: 4 MB\n"
]
}
],
"source": [
"print_tensor_count(\"Do we have any tensors hanging around?\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d0a71a8e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using convnext_large, with accum=4\n"
]
},
{
"data": {
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>error_rate</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>3.679147</td>\n",
" <td>3.564103</td>\n",
" <td>0.933333</td>\n",
" <td>00:02</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"train_safe() # and after another pass on large model"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "eebde136",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is it still 7 after another pass?\n",
"Counter({'cuda': 9})\n",
"Reserved mem in use: 6 MB\n"
]
}
],
"source": [
"print_tensor_count(\"Is it still 7 after another pass?\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "887b66fb",
"metadata": {},
"outputs": [],
"source": [
"# lets raise some exceptions"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "a83908ff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using convnext_large, with accum=4\n"
]
},
{
"data": {
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <progress value='0' class='' max='1' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 0.00% [0/1 00:00&lt;?]\n",
" </div>\n",
" \n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>broken_error_rate</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table><p>\n",
"\n",
" <div>\n",
" <progress value='0' class='' max='3' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 0.00% [0/3 00:00&lt;?]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exception Exception occured in `Recorder` when calling event `after_batch`:\n",
"\tdivision by zero\n"
]
}
],
"source": [
"train_safe(raise_exception=True) "
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "c5ef5e48",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is it still 7 after another pass?\n",
"Counter({'cuda': 9})\n",
"Reserved mem in use: 4 MB\n"
]
}
],
"source": [
"print_tensor_count(\"Is it still 7 after another pass?\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "75d51d6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"806"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum([v.numel() for v in objgraph.by_type('torch.Tensor')]) # are they still small"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "e671584c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using convnext_large, with accum=4\n"
]
},
{
"data": {
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>error_rate</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>3.905663</td>\n",
" <td>3.984396</td>\n",
" <td>0.850000</td>\n",
" <td>00:02</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"train_safe() # let see if we can run a model that bearly fits in our memory"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "7c2bcd40",
"metadata": {},
"outputs": [],
"source": [
"# And here is what happens if we don't handle exceptions "
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "a9653192",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is it still 7 after another pass?\n",
"Counter({'cuda': 9})\n",
"Reserved mem in use: 6 MB\n",
"|===========================================================================|\n",
"| PyTorch CUDA memory summary, device ID 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| CUDA OOMs: 0 | cudaMalloc retries: 0 |\n",
"|===========================================================================|\n",
"| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocated memory | 3584 B | 6745 MB | 414539 MB | 414539 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Active memory | 3584 B | 6745 MB | 414539 MB | 414539 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved memory | 6144 KB | 7338 MB | 26026 MB | 26020 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable memory | 6140 KB | 545942 KB | 174387 MB | 174381 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocations | 7 | 1957 | 59399 | 59392 |\n",
"|---------------------------------------------------------------------------|\n",
"| Active allocs | 7 | 1957 | 59399 | 59392 |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved segments | 3 | 349 | 1203 | 1200 |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable allocs | 8 | 289 | 38714 | 38706 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize allocations | 0 | 0 | 0 | 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize GPU segments | 0 | 0 | 0 | 0 |\n",
"|===========================================================================|\n",
"\n"
]
}
],
"source": [
"print_tensor_count(\"Is it still 7 after another pass?\")\n",
"print(torch.cuda.memory_summary(abbreviated=True))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "1dc9fe4c",
"metadata": {},
"outputs": [],
"source": [
"from contextlib import contextmanager\n",
"\n",
"@contextmanager\n",
"def nbdev_allow_exception(*args, **kwds):\n",
" try:\n",
" yield None\n",
" except Exception as e:\n",
" if interactive: raise\n",
" else:\n",
" print(\"Simulating unhandled exception!\")\n",
" sys.last_value=e\n",
" sys.last_traceback=e.__traceback__\n",
" sys.last_type=type(e)\n",
" sys.excepthook(sys.last_type, e, e.__traceback__)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "44a4770e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/pczapla/.local/share/miniconda/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
" warnings.warn(\n",
"/home/pczapla/.local/share/miniconda/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ConvNeXt_Large_Weights.IMAGENET1K_V1`. You can also use `weights=ConvNeXt_Large_Weights.DEFAULT` to get the most up-to-date weights.\n",
" warnings.warn(msg)\n"
]
},
{
"data": {
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <progress value='0' class='' max='1' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 0.00% [0/1 00:00&lt;?]\n",
" </div>\n",
" \n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>broken_error_rate</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table><p>\n",
"\n",
" <div>\n",
" <progress value='0' class='' max='3' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 0.00% [0/3 00:00&lt;?]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Simulating unhandled exception!\n"
]
},
{
"ename": "ZeroDivisionError",
"evalue": "Exception occured in `Recorder` when calling event `after_batch`:\n\tdivision by zero",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn [24], line 6\u001b[0m, in \u001b[0;36mnbdev_allow_exception\u001b[0;34m(*args, **kwds)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;129m@contextmanager\u001b[39m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnbdev_allow_exception\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds):\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m interactive: \u001b[38;5;28;01mraise\u001b[39;00m\n",
"Cell \u001b[0;32mIn [25], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m nbdev_allow_exception(): train(convnext_large, accum\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m, raise_exception\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"Cell \u001b[0;32mIn [9], line 14\u001b[0m, in \u001b[0;36mtrain\u001b[0;34m(arch, accum, raise_exception, cbs)\u001b[0m\n\u001b[1;32m 12\u001b[0m lr \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.01\u001b[39m\n\u001b[1;32m 13\u001b[0m learn\u001b[38;5;241m.\u001b[39munfreeze()\n\u001b[0;32m---> 14\u001b[0m \u001b[43mlearn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_one_cycle\u001b[49m\u001b[43m(\u001b[49m\u001b[43mepochs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/callback/schedule.py:119\u001b[0m, in \u001b[0;36mfit_one_cycle\u001b[0;34m(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt, start_epoch)\u001b[0m\n\u001b[1;32m 116\u001b[0m lr_max \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray([h[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlr\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m h \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt\u001b[38;5;241m.\u001b[39mhypers])\n\u001b[1;32m 117\u001b[0m scheds \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlr\u001b[39m\u001b[38;5;124m'\u001b[39m: combined_cos(pct_start, lr_max\u001b[38;5;241m/\u001b[39mdiv, lr_max, lr_max\u001b[38;5;241m/\u001b[39mdiv_final),\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmom\u001b[39m\u001b[38;5;124m'\u001b[39m: combined_cos(pct_start, \u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmoms \u001b[38;5;28;01mif\u001b[39;00m moms \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m moms))}\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcbs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mParamScheduler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mscheds\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43mL\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcbs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreset_opt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreset_opt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwd\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart_epoch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart_epoch\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:256\u001b[0m, in \u001b[0;36mLearner.fit\u001b[0;34m(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt\u001b[38;5;241m.\u001b[39mset_hypers(lr\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlr \u001b[38;5;28;01mif\u001b[39;00m lr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m lr)\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_epoch \u001b[38;5;241m=\u001b[39m n_epoch\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_fit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfit\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelFitException\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_end_cleanup\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:193\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_with_events\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, event_type, ex, final\u001b[38;5;241m=\u001b[39mnoop):\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:245\u001b[0m, in \u001b[0;36mLearner._do_fit\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m epoch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_epoch):\n\u001b[1;32m 244\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mepoch\u001b[38;5;241m=\u001b[39mepoch\n\u001b[0;32m--> 245\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mepoch\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelEpochException\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:193\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_with_events\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, event_type, ex, final\u001b[38;5;241m=\u001b[39mnoop):\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:240\u001b[0m, in \u001b[0;36mLearner._do_epoch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_do_epoch\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_do_epoch_train()\n\u001b[0;32m--> 240\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_epoch_validate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:236\u001b[0m, in \u001b[0;36mLearner._do_epoch_validate\u001b[0;34m(self, ds_idx, dl)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dl \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m: dl \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdls[ds_idx]\n\u001b[1;32m 235\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdl \u001b[38;5;241m=\u001b[39m dl\n\u001b[0;32m--> 236\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad(): \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mall_batches\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvalidate\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelValidException\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:193\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_with_events\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, event_type, ex, final\u001b[38;5;241m=\u001b[39mnoop):\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:199\u001b[0m, in \u001b[0;36mLearner.all_batches\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mall_batches\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_iter \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdl)\n\u001b[0;32m--> 199\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m o \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdl): \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mone_batch\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:227\u001b[0m, in \u001b[0;36mLearner.one_batch\u001b[0;34m(self, i, b)\u001b[0m\n\u001b[1;32m 225\u001b[0m b \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_device(b)\n\u001b[1;32m 226\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_split(b)\n\u001b[0;32m--> 227\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_one_batch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbatch\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelBatchException\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:195\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); f()\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 195\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mafter_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mevent_type\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m; final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:171\u001b[0m, in \u001b[0;36mLearner.__call__\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[0;32m--> 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, event_name): \u001b[43mL\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevent_name\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_one\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/share/miniconda/lib/python3.9/site-packages/fastcore/foundation.py:156\u001b[0m, in \u001b[0;36mL.map\u001b[0;34m(self, f, gen, *args, **kwargs)\u001b[0m\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmap\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, \u001b[38;5;241m*\u001b[39margs, gen\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_new(\u001b[43mmap_ex\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgen\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgen\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
"File \u001b[0;32m~/.local/share/miniconda/lib/python3.9/site-packages/fastcore/basics.py:840\u001b[0m, in \u001b[0;36mmap_ex\u001b[0;34m(iterable, f, gen, *args, **kwargs)\u001b[0m\n\u001b[1;32m 838\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmap\u001b[39m(g, iterable)\n\u001b[1;32m 839\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m gen: \u001b[38;5;28;01mreturn\u001b[39;00m res\n\u001b[0;32m--> 840\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mres\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/share/miniconda/lib/python3.9/site-packages/fastcore/basics.py:825\u001b[0m, in \u001b[0;36mbind.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 823\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(v,_Arg): kwargs[k] \u001b[38;5;241m=\u001b[39m args\u001b[38;5;241m.\u001b[39mpop(v\u001b[38;5;241m.\u001b[39mi)\n\u001b[1;32m 824\u001b[0m fargs \u001b[38;5;241m=\u001b[39m [args[x\u001b[38;5;241m.\u001b[39mi] \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(x, _Arg) \u001b[38;5;28;01melse\u001b[39;00m x \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpargs] \u001b[38;5;241m+\u001b[39m args[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmaxi\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m:]\n\u001b[0;32m--> 825\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:175\u001b[0m, in \u001b[0;36mLearner._call_one\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_call_one\u001b[39m(\u001b[38;5;28mself\u001b[39m, event_name):\n\u001b[1;32m 174\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(event, event_name): \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmissing \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 175\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m cb \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcbs\u001b[38;5;241m.\u001b[39msorted(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124morder\u001b[39m\u001b[38;5;124m'\u001b[39m): \u001b[43mcb\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevent_name\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/callback/core.py:62\u001b[0m, in \u001b[0;36mCallback.__call__\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: res \u001b[38;5;241m=\u001b[39m getcallable(\u001b[38;5;28mself\u001b[39m, event_name)()\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (CancelBatchException, CancelBackwardException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m---> 62\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e: \u001b[38;5;28;01mraise\u001b[39;00m modify_exception(e, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mException occured in `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` when calling event `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m, replace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m event_name\u001b[38;5;241m==\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_fit\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;66;03m#Reset self.run to True at each end of fit\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
"File \u001b[0;32m~/workspace/fastai/fastai/callback/core.py:60\u001b[0m, in \u001b[0;36mCallback.__call__\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[1;32m 58\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun \u001b[38;5;129;01mand\u001b[39;00m _run: \n\u001b[0;32m---> 60\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: res \u001b[38;5;241m=\u001b[39m \u001b[43mgetcallable\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mevent_name\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (CancelBatchException, CancelBackwardException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e: \u001b[38;5;28;01mraise\u001b[39;00m modify_exception(e, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mException occured in `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` when calling event `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m, replace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:551\u001b[0m, in \u001b[0;36mRecorder.after_batch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39myb) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m: \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 550\u001b[0m mets \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_mets \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_valid_mets\n\u001b[0;32m--> 551\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m met \u001b[38;5;129;01min\u001b[39;00m mets: \u001b[43mmet\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maccumulate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining: \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 553\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlrs\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt\u001b[38;5;241m.\u001b[39mhypers[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlr\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:473\u001b[0m, in \u001b[0;36mAvgMetric.accumulate\u001b[0;34m(self, learn)\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21maccumulate\u001b[39m(\u001b[38;5;28mself\u001b[39m, learn):\n\u001b[1;32m 472\u001b[0m bs \u001b[38;5;241m=\u001b[39m find_bs(learn\u001b[38;5;241m.\u001b[39myb)\n\u001b[0;32m--> 473\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtotal \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learn\u001b[38;5;241m.\u001b[39mto_detach(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlearn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpred\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43myb\u001b[49m\u001b[43m)\u001b[49m)\u001b[38;5;241m*\u001b[39mbs\n\u001b[1;32m 474\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcount \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m bs\n",
"Cell \u001b[0;32mIn [9], line 2\u001b[0m, in \u001b[0;36mbroken_error_rate\u001b[0;34m(inp, targ, axis)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbroken_error_rate\u001b[39m(inp, targ, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241;43m1\u001b[39;49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\n",
"\u001b[0;31mZeroDivisionError\u001b[0m: Exception occured in `Recorder` when calling event `after_batch`:\n\tdivision by zero"
]
}
],
"source": [
"with nbdev_allow_exception(): train(convnext_large, accum=4, raise_exception=True)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "2ad597f5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This time it won't be just 7 tensors left alive:\n",
"Counter({'cuda': 370, 'cpu': 10})\n",
"Reserved mem in use: 1832 MB\n",
"|===========================================================================|\n",
"| PyTorch CUDA memory summary, device ID 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| CUDA OOMs: 0 | cudaMalloc retries: 0 |\n",
"|===========================================================================|\n",
"| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocated memory | 1525 MB | 6745 MB | 510936 MB | 509411 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Active memory | 1525 MB | 6745 MB | 510936 MB | 509411 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved memory | 1832 MB | 7338 MB | 31738 MB | 29906 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable memory | 313710 KB | 563634 KB | 215766 MB | 215460 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocations | 712 | 1957 | 72524 | 71812 |\n",
"|---------------------------------------------------------------------------|\n",
"| Active allocs | 712 | 1957 | 72524 | 71812 |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved segments | 111 | 349 | 1469 | 1358 |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable allocs | 124 | 289 | 47634 | 47510 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize allocations | 0 | 0 | 0 | 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize GPU segments | 0 | 0 | 0 | 0 |\n",
"|===========================================================================|\n",
"\n"
]
}
],
"source": [
"print_tensor_count(\"This time it won't be just 7 tensors left alive:\")\n",
"print(torch.cuda.memory_summary(abbreviated=True))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "b01524d2",
"metadata": {},
"outputs": [],
"source": [
"# let's try our clean up function\n",
"\n",
"sys.last_traceback.tb_next = None\n",
"gc.collect()\n",
"torch.cuda.empty_cache()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "09e866cc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Are we back to low number?\n",
"Counter({'cuda': 9})\n",
"Reserved mem in use: 10 MB\n",
"|===========================================================================|\n",
"| PyTorch CUDA memory summary, device ID 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| CUDA OOMs: 0 | cudaMalloc retries: 0 |\n",
"|===========================================================================|\n",
"| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocated memory | 4608 B | 6745 MB | 510936 MB | 510936 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Active memory | 4608 B | 6745 MB | 510936 MB | 510936 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved memory | 10240 KB | 7338 MB | 31738 MB | 31728 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable memory | 10235 KB | 563634 KB | 216108 MB | 216098 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocations | 7 | 1957 | 72524 | 72517 |\n",
"|---------------------------------------------------------------------------|\n",
"| Active allocs | 7 | 1957 | 72524 | 72517 |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved segments | 5 | 349 | 1469 | 1464 |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable allocs | 10 | 289 | 47742 | 47732 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize allocations | 0 | 0 | 0 | 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize GPU segments | 0 | 0 | 0 | 0 |\n",
"|===========================================================================|\n",
"\n"
]
}
],
"source": [
"print_tensor_count(\"Are we back to low number?\")\n",
"print(torch.cuda.memory_summary(abbreviated=True))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "b250a522",
"metadata": {},
"outputs": [],
"source": [
"# ok so this seems useful, let's see if we can make a callback out of it"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e183e632",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"class CleanUpUnhandledException(Callback):\n",
" \"Clean up traceback stored in sys.last_traceback freeing memory\"\n",
" order=MixedPrecision.order\n",
" def _mem(self): \n",
" return torch.cuda.memory_stats().get('reserved_bytes.all.current',0)//1024**2\n",
" def before_fit(self):\n",
" if hasattr(sys, 'last_traceback'):\n",
" print(\"sys.last_traceback detected recovering.\")\n",
" pre=self._mem()\n",
" sys.last_traceback.tb_next = None\n",
" gc.collect()\n",
" if torch.cuda.is_available(): \n",
" torch.cuda.empty_cache()\n",
" print(\"Recovered:\", pre - self._mem(), \"MB\")\n",
" del sys.last_traceback\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "11dac031",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <progress value='0' class='' max='1' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 0.00% [0/1 00:00&lt;?]\n",
" </div>\n",
" \n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>broken_error_rate</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table><p>\n",
"\n",
" <div>\n",
" <progress value='0' class='' max='3' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 0.00% [0/3 00:00&lt;?]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Simulating unhandled exception!\n"
]
},
{
"ename": "ZeroDivisionError",
"evalue": "Exception occured in `Recorder` when calling event `after_batch`:\n\tdivision by zero",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn [24], line 6\u001b[0m, in \u001b[0;36mnbdev_allow_exception\u001b[0;34m(*args, **kwds)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;129m@contextmanager\u001b[39m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnbdev_allow_exception\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds):\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m interactive: \u001b[38;5;28;01mraise\u001b[39;00m\n",
"Cell \u001b[0;32mIn [31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m nbdev_allow_exception(): train(convnext_large, accum\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m, raise_exception\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"Cell \u001b[0;32mIn [9], line 14\u001b[0m, in \u001b[0;36mtrain\u001b[0;34m(arch, accum, raise_exception, cbs)\u001b[0m\n\u001b[1;32m 12\u001b[0m lr \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.01\u001b[39m\n\u001b[1;32m 13\u001b[0m learn\u001b[38;5;241m.\u001b[39munfreeze()\n\u001b[0;32m---> 14\u001b[0m \u001b[43mlearn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_one_cycle\u001b[49m\u001b[43m(\u001b[49m\u001b[43mepochs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/callback/schedule.py:119\u001b[0m, in \u001b[0;36mfit_one_cycle\u001b[0;34m(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt, start_epoch)\u001b[0m\n\u001b[1;32m 116\u001b[0m lr_max \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray([h[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlr\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m h \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt\u001b[38;5;241m.\u001b[39mhypers])\n\u001b[1;32m 117\u001b[0m scheds \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlr\u001b[39m\u001b[38;5;124m'\u001b[39m: combined_cos(pct_start, lr_max\u001b[38;5;241m/\u001b[39mdiv, lr_max, lr_max\u001b[38;5;241m/\u001b[39mdiv_final),\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmom\u001b[39m\u001b[38;5;124m'\u001b[39m: combined_cos(pct_start, \u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmoms \u001b[38;5;28;01mif\u001b[39;00m moms \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m moms))}\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcbs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mParamScheduler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mscheds\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43mL\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcbs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreset_opt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreset_opt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwd\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart_epoch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart_epoch\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:256\u001b[0m, in \u001b[0;36mLearner.fit\u001b[0;34m(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt\u001b[38;5;241m.\u001b[39mset_hypers(lr\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlr \u001b[38;5;28;01mif\u001b[39;00m lr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m lr)\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_epoch \u001b[38;5;241m=\u001b[39m n_epoch\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_fit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfit\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelFitException\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_end_cleanup\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:193\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_with_events\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, event_type, ex, final\u001b[38;5;241m=\u001b[39mnoop):\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:245\u001b[0m, in \u001b[0;36mLearner._do_fit\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m epoch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_epoch):\n\u001b[1;32m 244\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mepoch\u001b[38;5;241m=\u001b[39mepoch\n\u001b[0;32m--> 245\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mepoch\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelEpochException\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:193\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_with_events\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, event_type, ex, final\u001b[38;5;241m=\u001b[39mnoop):\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:240\u001b[0m, in \u001b[0;36mLearner._do_epoch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_do_epoch\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_do_epoch_train()\n\u001b[0;32m--> 240\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_epoch_validate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:236\u001b[0m, in \u001b[0;36mLearner._do_epoch_validate\u001b[0;34m(self, ds_idx, dl)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dl \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m: dl \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdls[ds_idx]\n\u001b[1;32m 235\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdl \u001b[38;5;241m=\u001b[39m dl\n\u001b[0;32m--> 236\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad(): \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mall_batches\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvalidate\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelValidException\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:193\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_with_events\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, event_type, ex, final\u001b[38;5;241m=\u001b[39mnoop):\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:199\u001b[0m, in \u001b[0;36mLearner.all_batches\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mall_batches\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_iter \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdl)\n\u001b[0;32m--> 199\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m o \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdl): \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mone_batch\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:227\u001b[0m, in \u001b[0;36mLearner.one_batch\u001b[0;34m(self, i, b)\u001b[0m\n\u001b[1;32m 225\u001b[0m b \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_device(b)\n\u001b[1;32m 226\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_split(b)\n\u001b[0;32m--> 227\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_with_events\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_one_batch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbatch\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCancelBatchException\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:195\u001b[0m, in \u001b[0;36mLearner._with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbefore_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m); f()\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ex: \u001b[38;5;28mself\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_cancel_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 195\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mafter_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mevent_type\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m; final()\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:171\u001b[0m, in \u001b[0;36mLearner.__call__\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[0;32m--> 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, event_name): \u001b[43mL\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevent_name\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_one\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/share/miniconda/lib/python3.9/site-packages/fastcore/foundation.py:156\u001b[0m, in \u001b[0;36mL.map\u001b[0;34m(self, f, gen, *args, **kwargs)\u001b[0m\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmap\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, \u001b[38;5;241m*\u001b[39margs, gen\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_new(\u001b[43mmap_ex\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgen\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgen\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
"File \u001b[0;32m~/.local/share/miniconda/lib/python3.9/site-packages/fastcore/basics.py:840\u001b[0m, in \u001b[0;36mmap_ex\u001b[0;34m(iterable, f, gen, *args, **kwargs)\u001b[0m\n\u001b[1;32m 838\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmap\u001b[39m(g, iterable)\n\u001b[1;32m 839\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m gen: \u001b[38;5;28;01mreturn\u001b[39;00m res\n\u001b[0;32m--> 840\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mres\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/share/miniconda/lib/python3.9/site-packages/fastcore/basics.py:825\u001b[0m, in \u001b[0;36mbind.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 823\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(v,_Arg): kwargs[k] \u001b[38;5;241m=\u001b[39m args\u001b[38;5;241m.\u001b[39mpop(v\u001b[38;5;241m.\u001b[39mi)\n\u001b[1;32m 824\u001b[0m fargs \u001b[38;5;241m=\u001b[39m [args[x\u001b[38;5;241m.\u001b[39mi] \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(x, _Arg) \u001b[38;5;28;01melse\u001b[39;00m x \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpargs] \u001b[38;5;241m+\u001b[39m args[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmaxi\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m:]\n\u001b[0;32m--> 825\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:175\u001b[0m, in \u001b[0;36mLearner._call_one\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_call_one\u001b[39m(\u001b[38;5;28mself\u001b[39m, event_name):\n\u001b[1;32m 174\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(event, event_name): \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmissing \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 175\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m cb \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcbs\u001b[38;5;241m.\u001b[39msorted(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124morder\u001b[39m\u001b[38;5;124m'\u001b[39m): \u001b[43mcb\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevent_name\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workspace/fastai/fastai/callback/core.py:62\u001b[0m, in \u001b[0;36mCallback.__call__\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: res \u001b[38;5;241m=\u001b[39m getcallable(\u001b[38;5;28mself\u001b[39m, event_name)()\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (CancelBatchException, CancelBackwardException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m---> 62\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e: \u001b[38;5;28;01mraise\u001b[39;00m modify_exception(e, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mException occured in `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` when calling event `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m, replace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m event_name\u001b[38;5;241m==\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mafter_fit\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;66;03m#Reset self.run to True at each end of fit\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
"File \u001b[0;32m~/workspace/fastai/fastai/callback/core.py:60\u001b[0m, in \u001b[0;36mCallback.__call__\u001b[0;34m(self, event_name)\u001b[0m\n\u001b[1;32m 58\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun \u001b[38;5;129;01mand\u001b[39;00m _run: \n\u001b[0;32m---> 60\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: res \u001b[38;5;241m=\u001b[39m \u001b[43mgetcallable\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mevent_name\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (CancelBatchException, CancelBackwardException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e: \u001b[38;5;28;01mraise\u001b[39;00m modify_exception(e, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mException occured in `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` when calling event `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevent_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m, replace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:551\u001b[0m, in \u001b[0;36mRecorder.after_batch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39myb) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m: \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 550\u001b[0m mets \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_mets \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_valid_mets\n\u001b[0;32m--> 551\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m met \u001b[38;5;129;01min\u001b[39;00m mets: \u001b[43mmet\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maccumulate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining: \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 553\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlrs\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt\u001b[38;5;241m.\u001b[39mhypers[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlr\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
"File \u001b[0;32m~/workspace/fastai/fastai/learner.py:473\u001b[0m, in \u001b[0;36mAvgMetric.accumulate\u001b[0;34m(self, learn)\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21maccumulate\u001b[39m(\u001b[38;5;28mself\u001b[39m, learn):\n\u001b[1;32m 472\u001b[0m bs \u001b[38;5;241m=\u001b[39m find_bs(learn\u001b[38;5;241m.\u001b[39myb)\n\u001b[0;32m--> 473\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtotal \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learn\u001b[38;5;241m.\u001b[39mto_detach(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlearn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpred\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43myb\u001b[49m\u001b[43m)\u001b[49m)\u001b[38;5;241m*\u001b[39mbs\n\u001b[1;32m 474\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcount \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m bs\n",
"Cell \u001b[0;32mIn [9], line 2\u001b[0m, in \u001b[0;36mbroken_error_rate\u001b[0;34m(inp, targ, axis)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbroken_error_rate\u001b[39m(inp, targ, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241;43m1\u001b[39;49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\n",
"\u001b[0;31mZeroDivisionError\u001b[0m: Exception occured in `Recorder` when calling event `after_batch`:\n\tdivision by zero"
]
}
],
"source": [
"with nbdev_allow_exception(): train(convnext_large, accum=4, raise_exception=True)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "a00f4e19",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sys.last_traceback detected recovering.\n",
"Recovered: 4712 MB\n"
]
},
{
"data": {
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>error_rate</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>3.974687</td>\n",
" <td>3.260266</td>\n",
" <td>0.950000</td>\n",
" <td>00:02</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"train(convnext_large, accum=4, cbs=[CleanUpUnhandledException] )"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "343bf9c2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Are already on 7?\n",
"Counter({'cuda': 9})\n",
"Reserved mem in use: 8 MB\n",
"|===========================================================================|\n",
"| PyTorch CUDA memory summary, device ID 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| CUDA OOMs: 0 | cudaMalloc retries: 0 |\n",
"|===========================================================================|\n",
"| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocated memory | 3584 B | 6745 MB | 711922 MB | 711922 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Active memory | 3584 B | 6745 MB | 711922 MB | 711922 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved memory | 8192 KB | 7338 MB | 42198 MB | 42190 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable memory | 8188 KB | 594380 KB | 303215 MB | 303207 MB |\n",
"|---------------------------------------------------------------------------|\n",
"| Allocations | 7 | 1957 | 100235 | 100228 |\n",
"|---------------------------------------------------------------------------|\n",
"| Active allocs | 7 | 1957 | 100235 | 100228 |\n",
"|---------------------------------------------------------------------------|\n",
"| GPU reserved segments | 4 | 349 | 1950 | 1946 |\n",
"|---------------------------------------------------------------------------|\n",
"| Non-releasable allocs | 11 | 289 | 67200 | 67189 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize allocations | 0 | 0 | 0 | 0 |\n",
"|---------------------------------------------------------------------------|\n",
"| Oversize GPU segments | 0 | 0 | 0 | 0 |\n",
"|===========================================================================|\n",
"\n"
]
}
],
"source": [
"print_tensor_count(\"Are already on 7?\")\n",
"print(torch.cuda.memory_summary(abbreviated=True))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "864f1ca4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment