Last active
June 24, 2021 23:51
-
-
Save rwightman/e69d5f456047c16773a77182cea68c3c to your computer and use it in GitHub Desktop.
timm config for training an nfnet, load with --config arg, override batch size, lr for your number of GPUs/dist nodes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aa: rand-m6-n5-inc1-mstd1.0 | |
amp: false | |
apex_amp: false | |
aug_splits: 0 | |
batch_size: 256 | |
bn_eps: null | |
bn_momentum: null | |
bn_tf: false | |
channels_last: false | |
checkpoint_hist: 10 | |
clip_grad: 0.02 | |
clip_mode: agc | |
color_jitter: 0.4 | |
cooldown_epochs: 10 | |
crop_pct: 0.94 | |
cutmix: 0.0 | |
cutmix_minmax: null | |
data: /data/imagenet/ | |
data_dir: /imagenet/ | |
dataset: '' | |
decay_epochs: 1.0 | |
decay_rate: 0.988 | |
dist_bn: reduce | |
drop: 0.4 | |
drop_block: null | |
drop_connect: null | |
drop_path: 0.25 | |
epochs: 656 | |
eval_metric: top1 | |
gp: fast | |
hflip: 0.5 | |
img_size: 320 | |
initial_checkpoint: '' | |
input_size: null | |
interpolation: '' | |
jsd: false | |
local_rank: 0 | |
log_interval: 50 | |
lr: 0.5 | |
lr_cycle_limit: 1 | |
lr_cycle_mul: 1.0 | |
lr_noise: null | |
lr_noise_pct: 0.67 | |
lr_noise_std: 1.0 | |
mean: null | |
min_lr: 1.0e-05 | |
mixup: 0.2 | |
mixup_mode: batch | |
mixup_off_epoch: 0 | |
mixup_prob: 1.0 | |
mixup_switch_prob: 0.5 | |
model: efficientnet_v2m | |
model_ema: true | |
model_ema_decay: 0.999975 | |
model_ema_force_cpu: false | |
momentum: 0.9 | |
native_amp: true | |
no_aug: false | |
no_prefetcher: false | |
no_resume_opt: false | |
num_classes: 1000 | |
opt: fusedsgd | |
opt_betas: null | |
opt_eps: 0.001 | |
output: '' | |
patience_epochs: 10 | |
pin_mem: false | |
pretrained: false | |
ratio: | |
- 0.67 | |
- 1.5 | |
recount: 3 | |
recovery_interval: 0 | |
remode: pixel | |
reprob: 0.5 | |
resplit: false | |
resume: '' | |
save_images: false | |
scale: | |
- 0.08 | |
- 1.0 | |
sched: cosine | |
seed: 42 | |
smoothing: 0.1 | |
split_bn: false | |
start_epoch: null | |
std: null | |
sync_bn: false | |
torchscript: false | |
train_interpolation: random | |
train_split: train | |
tta: 0 | |
use_multi_epochs_loader: false | |
val_split: validation | |
validation_batch_size_multiplier: 1 | |
vflip: 0.0 | |
warmup_epochs: 10 | |
warmup_lr: 1.0e-06 | |
weight_decay: 7.0e-06 | |
workers: 5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aa: rand-m6-n4-inc1-mstd1.0 | |
amp: false | |
apex_amp: false | |
aug_splits: 0 | |
batch_size: 256 | |
bn_eps: null | |
bn_momentum: null | |
bn_tf: false | |
channels_last: false | |
checkpoint_hist: 10 | |
clip_grad: 0.015 | |
clip_mode: agc | |
color_jitter: 0.4 | |
cooldown_epochs: 10 | |
crop_pct: 0.94 | |
cutmix: 0.0 | |
cutmix_minmax: null | |
data: /data/imagenet/ | |
data_dir: /imagenet/ | |
dataset: '' | |
decay_epochs: 1.0 | |
decay_rate: 0.988 | |
dist_bn: reduce | |
drop: 0.375 | |
drop_block: null | |
drop_connect: null | |
drop_path: 0.25 | |
epochs: 656 | |
eval_metric: top1 | |
gp: fast | |
hflip: 0.5 | |
img_size: 256 | |
initial_checkpoint: '' | |
input_size: null | |
interpolation: '' | |
jsd: false | |
local_rank: 0 | |
log_interval: 50 | |
lr: 0.5 | |
lr_cycle_limit: 1 | |
lr_cycle_mul: 1.0 | |
lr_noise: null | |
lr_noise_pct: 0.67 | |
lr_noise_std: 1.0 | |
mean: null | |
min_lr: 1.0e-05 | |
mixup: 0.2 | |
mixup_mode: batch | |
mixup_off_epoch: 0 | |
mixup_prob: 1.0 | |
mixup_switch_prob: 0.5 | |
model: eca_nfnet_l1 | |
model_ema: true | |
model_ema_decay: 0.999975 | |
model_ema_force_cpu: false | |
momentum: 0.9 | |
native_amp: true | |
no_aug: false | |
no_prefetcher: false | |
no_resume_opt: false | |
num_classes: 1000 | |
opt: fusedsgd | |
opt_betas: null | |
opt_eps: 0.001 | |
output: '' | |
patience_epochs: 10 | |
pin_mem: false | |
pretrained: false | |
ratio: | |
- 0.67 | |
- 1.5 | |
recount: 3 | |
recovery_interval: 0 | |
remode: pixel | |
reprob: 0.5 | |
resplit: false | |
resume: '' | |
save_images: false | |
scale: | |
- 0.08 | |
- 1.0 | |
sched: cosine | |
seed: 42 | |
smoothing: 0.1 | |
split_bn: false | |
start_epoch: null | |
std: null | |
sync_bn: false | |
torchscript: false | |
train_interpolation: random | |
train_split: train | |
tta: 0 | |
use_multi_epochs_loader: false | |
val_split: validation | |
validation_batch_size_multiplier: 1 | |
vflip: 0.0 | |
warmup_epochs: 10 | |
warmup_lr: 1.0e-06 | |
weight_decay: 7.0e-06 | |
workers: 5 |
Hi Ross, I have seen various sets of Hparams for your certain models. I know the sets come from your empirical, experimental and sensible abilities. Can you share or write any statement of your experience. I am sure It will be helpful for community to save our resources.
cheers
Linh
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This was used on 4x 32GB V100.
If you need to change the batch size and number of GPUs, rescale the LR linearly ...
old_global_batch = 4 * 256
new_lr = new_global_batch_size * 0.5 / 1024