Last active
February 28, 2024 19:21
-
-
Save stefanonardo/693d96ceb2f531fa05db530f3e21517d to your computer and use it in GitHub Desktop.
Early Stopping PyTorch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MIT License | |
# | |
# Copyright (c) 2018 Stefano Nardo https://gist.github.com/stefanonardo | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
class EarlyStopping(object): | |
def __init__(self, mode='min', min_delta=0, patience=10, percentage=False): | |
self.mode = mode | |
self.min_delta = min_delta | |
self.patience = patience | |
self.best = None | |
self.num_bad_epochs = 0 | |
self.is_better = None | |
self._init_is_better(mode, min_delta, percentage) | |
if patience == 0: | |
self.is_better = lambda a, b: True | |
self.step = lambda a: False | |
def step(self, metrics): | |
if self.best is None: | |
self.best = metrics | |
return False | |
if torch.isnan(metrics): | |
return True | |
if self.is_better(metrics, self.best): | |
self.num_bad_epochs = 0 | |
self.best = metrics | |
else: | |
self.num_bad_epochs += 1 | |
if self.num_bad_epochs >= self.patience: | |
return True | |
return False | |
def _init_is_better(self, mode, min_delta, percentage): | |
if mode not in {'min', 'max'}: | |
raise ValueError('mode ' + mode + ' is unknown!') | |
if not percentage: | |
if mode == 'min': | |
self.is_better = lambda a, best: a < best - min_delta | |
if mode == 'max': | |
self.is_better = lambda a, best: a > best + min_delta | |
else: | |
if mode == 'min': | |
self.is_better = lambda a, best: a < best - ( | |
best * min_delta / 100) | |
if mode == 'max': | |
self.is_better = lambda a, best: a > best + ( | |
best * min_delta / 100) |
Thanks for your script, which is easy to use and with little overhead, Stefano.
For usage, here is an example:
...
es = EarlyStopping(patience=5)
num_epochs = 100
for epoch in range(num_epochs):
train_one_epoch(model, data_loader) # train the model for one epoch, on training set
metric = eval(model, data_loader_dev) # evalution on dev set (i.e., holdout from training)
if es.step(metric):
break # early stop criterion is met, we can stop now
...
I followed the above example and got the following error:
train_log loss:
training (min: 0.087, max: 0.087, cur: 0.087)
train_accuracy:
training (min: 0.084, max: 0.084, cur: 0.084)
test (min: 0.547, max: 0.547, cur: 0.547)
test (min: 0.342, max: 0.342, cur: 0.342)
1 100
1 200
1 300
1 400
1 500
1 600
1 700
1 800
1 900
1 1000
{'train_log loss': 0.08505982160568237, 'train_accuracy': 0.08596525341272354}
1 100
1 200
1 300
1 400
1 500
1 600
1 700
1 800
1 900
1 1000
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-47-46b97a2ae203> in <module>
72
73
---> 74 train_model(model = combined_model, criterion = criterion, optimizer = optimizer, num_epochs=500)
<ipython-input-47-46b97a2ae203> in train_model(model, criterion, optimizer, num_epochs)
52 torch.save(combined_model.state_dict()
53 , 'D:\\CIS inspection images 0318\\self_build\\combined_model_1.pt')
---> 54 if es.step(epoch_acc):
55 break
56
<ipython-input-44-a6e50955f13f> in step(self, metrics)
18 return False
19
---> 20 if np.isnan(metrics):
21 return True
22
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\tensor.py in __array__(self, dtype)
484 def __array__(self, dtype=None):
485 if dtype is None:
--> 486 return self.numpy()
487 else:
488 return self.numpy().astype(dtype, copy=False)
TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
Here is the model loop:
es = EarlyStopping(patience = 10)
def train_model(model, criterion, optimizer, num_epochs=10):
liveloss = PlotLosses(series_fmt={'training': '{}', 'test':'test_{}'})
max_accuracy = 0
for epoch in range(num_epochs):
logs = {}
for phase in ['train', 'test']:
if phase == 'train':
model.train()
else:
model.eval()
running_loss = 0.0
running_corrects = 0
for b, (image, label, policy, categorical_data) in enumerate(dataloaders[phase]):
image = image.cuda()
label = label.cuda()
#numerical_data = numerical_data.cuda()
categorical_data = categorical_data.cuda()
outputs = model(image, categorical_data)
loss = criterion(outputs, label)
if phase == 'train':
optimizer.zero_grad()
loss.backward()
optimizer.step()
_, preds = torch.max(outputs, 1)
running_loss += loss.detach() * image.size(0)
running_corrects += torch.sum(preds == label.data)
b += 1
if b % print_interval == 0:
print(epoch, b)
if b == max_trn_batch:
break
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)
prefix = ''
if phase == 'test':
prefix = 'test_'
if epoch_acc > max_accuracy:
max_accuracy = epoch_acc
torch.save(combined_model.state_dict()
, 'D:\\CIS inspection images 0318\\self_build\\combined_model_1.pt')
if es.step(epoch_acc):
break
else:
prefix = 'train_'
logs[prefix + 'log loss'] = epoch_loss.item()
logs[prefix + 'accuracy'] = epoch_acc.item()
print(logs)
liveloss.update(logs)
liveloss.draw()
print(max_accuracy)
scheduler.step(loss)
Try replacing np.isnan with torch.isnan
Perfect! Thank you.
On Wed, Feb 5, 2020 at 7:02 AM Stefano Nardo ***@***.***> wrote:
Try replacing np.isnan with torch.isnan
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://gist.github.com/693d96ceb2f531fa05db530f3e21517d?email_source=notifications&email_token=AHVFDND2R5ZDMSQWX75Z7P3RBKTHFA5CNFSM4HO5I4OKYY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAGBJU4#gistcomment-3167054>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AHVFDNBAW5NXLYEFPSLAYSTRBKTHFANCNFSM4HO5I4OA>
.
--
Respectfully,
Jordan Howell
Principal Data Scientist
Candid Truth DSC
www.candidtruthdsc.com
253-266-8088
Thank you for this script!
Hi @stefanonardo , how should I credit you and what is the license of the script?
Hi @stefanonardo , how should I credit you and what is the license of the script?
Hi @jakobamb, thank you for asking. I added a license.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How can I use it in my code? Very Thanks.