Skip to content

Instantly share code, notes, and snippets.

@rwightman
Last active January 9, 2025 19:34
Show Gist options
  • Save rwightman/fb37c339efd2334177ff99a8083ebbc4 to your computer and use it in GitHub Desktop.
Save rwightman/fb37c339efd2334177ff99a8083ebbc4 to your computer and use it in GitHub Desktop.
Searching for Better Vit Baselines Hparams

ViT 'Searching For Better Baselines' Hparams

Included yaml files are timm train script configs for training timm SBB ViT exploration

See

Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling.

Also note, some models have lr set to a non null value, this LR is used directly if set. Otherwise, it falls back to lr_base and the used rate is calculated based on lr_base_size and a sqrt scaling according to the global batch size.

To easily use the .yaml file, use the --config argument for the timm train.py script. eg: train.py --config hparams.yaml --data-dir /where/my/data ... <other arg overrides>

aa: rand-m8-inc1-mstd1.0-n4
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: null
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_base_patch16_rope_reg1_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs:
img_size: 256
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: null
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.08
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_betwixt_patch16_reg1_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: inductor
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.06
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 0.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_pos_weight: null
bce_sum: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
color_jitter_prob: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 0.8
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
device: cuda
device_modules: null
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 60
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gaussian_blur_prob: null
gp: null
grad_accum_steps: 1
grad_checkpointing: false
grayscale_prob: null
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_img_mode: null
input_key: null
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: 0.75
local_rank: 0
log_interval: 50
log_wandb: false
lr: 0.0001
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.2
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_betwixt_patch16_reg4_gap_256
model_ema: true
model_ema_decay: 0.999
model_ema_force_cpu: false
model_ema_warmup: false
model_kwargs: {}
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: 1000
opt: adamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: true
pretrained_path: vit_betw-in12k-8.pth
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
target_key: null
torchcompile: inductor
torchscript: false
train_crop_mode: null
train_interpolation: random
train_num_samples: null
train_split: train
tta: 0
use_multi_epochs_loader: false
val_num_samples: null
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.01
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 0.0
aug_splits: 0
batch_size: 256
bce_loss: true
bce_pos_weight: null
bce_sum: true
bce_target_thresh: 0.2
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
color_jitter_prob: 0.2
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/tfds/
dataset: tfds/imagenet12k
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 150
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gaussian_blur_prob: 0.05
gp: null
grad_accum_steps: 1
grad_checkpointing: false
grayscale_prob: 0.1
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_img_mode: null
input_key: null
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0001
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.2
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_betwixt_patch16_reg4_gap_256
model_ema: true
model_ema_decay: 0.99993
model_ema_force_cpu: false
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: 11821
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
pretrained_path: null
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.2
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.0
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
target_key: null
torchcompile: inductor
torchscript: false
train_crop_mode: rkrr
train_interpolation: random
train_num_samples: null
train_split: train
tta: 0
use_multi_epochs_loader: false
val_num_samples: null
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 3
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.2
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_betwixt_patch16_reg4_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: inductor
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.08
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: null
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_betwixt_patch16_rope_reg4_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs: {}
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: inductor
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.08
worker_seeding: all
workers: 8
aa: rand-m6-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_pos_weight: null
bce_sum: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
color_jitter_prob: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/f/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
device: cuda
device_modules: null
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gaussian_blur_prob: null
gp: null
grad_accum_steps: 1
grad_checkpointing: false
grayscale_prob: null
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_img_mode: null
input_key: null
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_little_patch16_reg4_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_ema_warmup: false
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
pretrained_path: null
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
target_key: null
torchcompile: inductor
torchscript: false
train_crop_mode: null
train_interpolation: random
train_num_samples: null
train_split: train
tta: 0
use_multi_epochs_loader: false
val_num_samples: null
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.06
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_medium_patch16_reg1_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: inductor
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.06
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_medium_patch16_reg4_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: adamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: inductor
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.07
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: null
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_medium_patch16_rope_reg1_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs: {}
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: inductor
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.08
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 0.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_pos_weight: null
bce_sum: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
color_jitter_prob: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 0.8
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
device: cuda
device_modules: null
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 60
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gaussian_blur_prob: null
gp: null
grad_accum_steps: 1
grad_checkpointing: false
grayscale_prob: null
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_img_mode: null
input_key: null
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: 0.75
local_rank: 0
log_interval: 50
log_wandb: false
lr: 0.0001
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.2
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_mediumd_patch16_reg4_gap_256
model_ema: true
model_ema_decay: 0.999
model_ema_force_cpu: false
model_ema_warmup: false
model_kwargs: {}
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: 1000
opt: adamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: true
pretrained_path: vit_md_patch16_reg4-in12k-8.pth
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
target_key: null
torchcompile: inductor
torchscript: false
train_crop_mode: null
train_interpolation: random
train_num_samples: null
train_split: train
tta: 0
use_multi_epochs_loader: false
val_num_samples: null
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.01
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 0.0
aug_splits: 0
batch_size: 256
bce_loss: true
bce_pos_weight: null
bce_sum: true
bce_target_thresh: 0.2
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
color_jitter_prob: 0.2
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/tfds/
dataset: tfds/imagenet12k
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
device: cuda
device_modules: null
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 150
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gaussian_blur_prob: 0.05
gp: null
grad_accum_steps: 1
grad_checkpointing: false
grayscale_prob: 0.1
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_img_mode: null
input_key: null
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0001
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.2
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_mediumd_patch16_reg4_gap_256
model_ema: true
model_ema_decay: 0.99993
model_ema_force_cpu: false
model_ema_warmup: false
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: 11821
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
pretrained_path: null
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.2
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.0
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
target_key: null
torchcompile: inductor
torchscript: false
train_crop_mode: rkrr
train_interpolation: random
train_num_samples: null
train_split: train
tta: 0
use_multi_epochs_loader: false
val_num_samples: null
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 3
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.2
worker_seeding: all
workers: 8
aa: rand-m7-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.3
epoch_repeats: 0.0
epochs: 600
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gp: null
grad_accum_steps: 1
grad_checkpointing: false
head_init_bias: null
head_init_scale: null
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_mediumd_patch16_rope_reg1_gap_256
model_ema: false
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_kwargs: {}
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
torchcompile: inductor
torchscript: false
train_interpolation: random
train_split: train
tta: 0
use_multi_epochs_loader: false
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.08
worker_seeding: all
workers: 8
aa: rand-m6-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_pos_weight: null
bce_sum: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
color_jitter_prob: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/f/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
device: cuda
device_modules: null
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 450
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gaussian_blur_prob: null
gp: null
grad_accum_steps: 1
grad_checkpointing: false
grayscale_prob: null
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_img_mode: null
input_key: null
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_pwee_patch16_reg1_gap_256
model_ema: true
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_ema_warmup: true
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
pretrained_path: null
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
target_key: null
torchcompile: inductor
torchscript: false
train_crop_mode: null
train_interpolation: random
train_num_samples: null
train_split: train
tta: 0
use_multi_epochs_loader: false
val_num_samples: null
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.06
worker_seeding: all
workers: 8
aa: rand-m6-inc1-mstd1.0-n3
amp: true
amp_dtype: float16
amp_impl: native
aug_repeats: 3.0
aug_splits: 0
batch_size: 256
bce_loss: false
bce_pos_weight: null
bce_sum: false
bce_target_thresh: null
bn_eps: null
bn_momentum: null
channels_last: false
checkpoint_hist: 10
class_map: ''
clip_grad: 5.0
clip_mode: norm
color_jitter: null
color_jitter_prob: null
cooldown_epochs: 10
crop_pct: 0.95
cutmix: 1.0
cutmix_minmax: null
data: null
data_dir: /data/f/imagenet/
dataset: imagenet
dataset_download: false
decay_epochs: 100
decay_milestones:
- 90
- 180
- 270
decay_rate: 0.1
device: cuda
device_modules: null
dist_bn: reduce
drop: 0.0
drop_block: null
drop_connect: null
drop_path: 0.2
epoch_repeats: 0.0
epochs: 450
eval_metric: top1
experiment: ''
fast_norm: false
force_cpu: false
fuser: ''
gaussian_blur_prob: null
gp: null
grad_accum_steps: 1
grad_checkpointing: false
grayscale_prob: null
head_init_bias: null
head_init_scale: 0.0
hflip: 0.5
img_size: 256
in_chans: null
initial_checkpoint: ''
input_img_mode: null
input_key: null
input_size: null
interpolation: ''
jsd_loss: false
layer_decay: null
local_rank: 0
log_interval: 50
log_wandb: false
lr: null
lr_base: 0.0008
lr_base_scale: ''
lr_base_size: 512
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
mean: null
min_lr: 5.0e-07
mixup: 0.8
mixup_mode: batch
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_switch_prob: 0.5
model: vit_wee_patch16_reg1_gap_256
model_ema: true
model_ema_decay: 0.9998
model_ema_force_cpu: false
model_ema_warmup: true
model_kwargs:
fix_init: true
momentum: 0.9
no_aug: false
no_ddp_bb: false
no_prefetcher: false
no_resume_opt: false
num_classes: null
opt: nadamw
opt_betas: null
opt_eps: 1.0e-08
opt_kwargs: {}
output: ''
patience_epochs: 10
pin_mem: true
pretrained: false
pretrained_path: null
ratio:
- 0.75
- 1.3333333333333333
recount: 1
recovery_interval: 0
remode: pixel
reprob: 0.2
resplit: false
resume: ''
save_images: false
scale:
- 0.08
- 1.0
sched: cosine
sched_on_updates: true
seed: 0
smoothing: 0.1
split_bn: false
start_epoch: null
std: null
sync_bn: false
synchronize_step: false
target_key: null
torchcompile: inductor
torchscript: false
train_crop_mode: null
train_interpolation: random
train_num_samples: null
train_split: train
tta: 0
use_multi_epochs_loader: false
val_num_samples: null
val_split: validation
validation_batch_size: null
vflip: 0.0
warmup_epochs: 20
warmup_lr: 5.0e-07
warmup_prefix: false
weight_decay: 0.06
worker_seeding: all
workers: 8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment