Hparams for tiny test timm
models, all pretty similar, vit3 extended the schedule from the default 1600 epochs to 1800. All based on MobileNetv4 template w/ adamw reduced beta1, grinding out the training. Some of the smallest models had a bit less AA magnitude (3), slightly higher capacity ones increased to m5 or m6.
Last active
October 29, 2024 16:14
-
-
Save rwightman/9ba8efc39a546426e99055720d2f705f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aa: rand-m3-inc1-mstd101 | |
amp: true | |
amp_dtype: float16 | |
amp_impl: native | |
aug_repeats: 0 | |
aug_splits: 0 | |
batch_size: 1024 | |
bce_loss: false | |
bce_pos_weight: null | |
bce_sum: false | |
bce_target_thresh: null | |
bn_eps: null | |
bn_momentum: null | |
channels_last: true | |
checkpoint_hist: 10 | |
class_map: '' | |
clip_grad: 5.0 | |
clip_mode: norm | |
color_jitter: 0.4 | |
color_jitter_prob: null | |
cooldown_epochs: 0 | |
crop_pct: null | |
cutmix: 0.0 | |
cutmix_minmax: null | |
data: /data/imagenet/ | |
data_dir: null | |
dataset: '' | |
dataset_download: false | |
decay_epochs: 90 | |
decay_milestones: | |
- 90 | |
- 180 | |
- 270 | |
decay_rate: 0.1 | |
device: cuda | |
device_modules: null | |
dist_bn: reduce | |
drop: 0.3 | |
drop_block: null | |
drop_connect: null | |
drop_path: 0.05 | |
epoch_repeats: 0.0 | |
epochs: 1600 | |
eval_metric: top1 | |
experiment: '' | |
fast_norm: false | |
fuser: '' | |
gaussian_blur_prob: null | |
gp: null | |
grad_accum_steps: 1 | |
grad_checkpointing: false | |
grayscale_prob: 0.05 | |
head_init_bias: null | |
head_init_scale: 0.0 | |
hflip: 0.5 | |
img_size: null | |
in_chans: null | |
initial_checkpoint: '' | |
input_img_mode: null | |
input_key: null | |
input_size: null | |
interpolation: '' | |
jsd_loss: false | |
layer_decay: null | |
local_rank: 0 | |
log_interval: 50 | |
log_wandb: false | |
lr: 0.00167 | |
lr_base: 0.1 | |
lr_base_scale: '' | |
lr_base_size: 256 | |
lr_cycle_decay: 0.5 | |
lr_cycle_limit: 1 | |
lr_cycle_mul: 1.0 | |
lr_k_decay: 1.0 | |
lr_noise: null | |
lr_noise_pct: 0.67 | |
lr_noise_std: 1.0 | |
mean: null | |
min_lr: 0.0 | |
mixup: 0.0 | |
mixup_mode: batch | |
mixup_off_epoch: 0 | |
mixup_prob: 1.0 | |
mixup_switch_prob: 0.5 | |
model: test_convnext3 | |
model_ema: true | |
model_ema_decay: 0.9999 | |
model_ema_force_cpu: false | |
model_ema_warmup: true | |
model_kwargs: {} | |
momentum: 0.9 | |
no_aug: false | |
no_ddp_bb: false | |
no_prefetcher: false | |
no_resume_opt: false | |
num_classes: 1000 | |
opt: adamw | |
opt_betas: null | |
opt_eps: null | |
opt_kwargs: {} | |
output: '' | |
patience_epochs: 10 | |
pin_mem: false | |
pretrained: false | |
pretrained_path: null | |
ratio: | |
- 0.75 | |
- 1.3333333333333333 | |
recount: 1 | |
recovery_interval: 0 | |
remode: pixel | |
reprob: 0.2 | |
resplit: false | |
resume: '' | |
save_images: false | |
scale: | |
- 0.8 | |
- 1.0 | |
sched: cosine | |
sched_on_updates: true | |
seed: 42 | |
smoothing: 0.1 | |
split_bn: false | |
start_epoch: null | |
std: null | |
sync_bn: false | |
synchronize_step: false | |
target_key: null | |
torchcompile: inductor | |
torchscript: false | |
train_crop_mode: rkrr | |
train_interpolation: random | |
train_num_samples: null | |
train_split: train | |
tta: 0 | |
use_multi_epochs_loader: false | |
val_num_samples: null | |
val_split: validation | |
validation_batch_size: null | |
vflip: 0.0 | |
warmup_epochs: 20 | |
warmup_lr: 0.0 | |
warmup_prefix: false | |
weight_decay: 0.05 | |
worker_seeding: all | |
workers: 8 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aa: rand-m5-inc1-mstd101 | |
amp: true | |
amp_dtype: float16 | |
amp_impl: native | |
aug_repeats: 0 | |
aug_splits: 0 | |
batch_size: 1024 | |
bce_loss: false | |
bce_pos_weight: null | |
bce_sum: false | |
bce_target_thresh: null | |
bn_eps: null | |
bn_momentum: null | |
channels_last: true | |
checkpoint_hist: 10 | |
class_map: '' | |
clip_grad: 5.0 | |
clip_mode: norm | |
color_jitter: 0.4 | |
color_jitter_prob: null | |
cooldown_epochs: 0 | |
crop_pct: null | |
cutmix: 0.0 | |
cutmix_minmax: null | |
data: /data/imagenet/ | |
data_dir: null | |
dataset: '' | |
dataset_download: false | |
decay_epochs: 90 | |
decay_milestones: | |
- 90 | |
- 180 | |
- 270 | |
decay_rate: 0.1 | |
device: cuda | |
device_modules: null | |
dist_bn: reduce | |
drop: 0.3 | |
drop_block: null | |
drop_connect: null | |
drop_path: 0.05 | |
epoch_repeats: 0.0 | |
epochs: 1600 | |
eval_metric: top1 | |
experiment: '' | |
fast_norm: false | |
fuser: '' | |
gaussian_blur_prob: null | |
gp: null | |
grad_accum_steps: 1 | |
grad_checkpointing: false | |
grayscale_prob: 0.05 | |
head_init_bias: null | |
head_init_scale: 0.0 | |
hflip: 0.5 | |
img_size: null | |
in_chans: null | |
initial_checkpoint: '' | |
input_img_mode: null | |
input_key: null | |
input_size: null | |
interpolation: '' | |
jsd_loss: false | |
layer_decay: null | |
local_rank: 0 | |
log_interval: 50 | |
log_wandb: false | |
lr: 0.00167 | |
lr_base: 0.1 | |
lr_base_scale: '' | |
lr_base_size: 256 | |
lr_cycle_decay: 0.5 | |
lr_cycle_limit: 1 | |
lr_cycle_mul: 1.0 | |
lr_k_decay: 1.0 | |
lr_noise: null | |
lr_noise_pct: 0.67 | |
lr_noise_std: 1.0 | |
mean: null | |
min_lr: 0.0 | |
mixup: 0.0 | |
mixup_mode: batch | |
mixup_off_epoch: 0 | |
mixup_prob: 1.0 | |
mixup_switch_prob: 0.5 | |
model: test_vit2 | |
model_ema: true | |
model_ema_decay: 0.9999 | |
model_ema_force_cpu: false | |
model_ema_warmup: true | |
model_kwargs: {} | |
momentum: 0.9 | |
no_aug: false | |
no_ddp_bb: false | |
no_prefetcher: false | |
no_resume_opt: false | |
num_classes: 1000 | |
opt: adamw | |
opt_betas: null | |
opt_eps: null | |
opt_kwargs: {} | |
output: '' | |
patience_epochs: 10 | |
pin_mem: false | |
pretrained: false | |
pretrained_path: null | |
ratio: | |
- 0.75 | |
- 1.3333333333333333 | |
recount: 1 | |
recovery_interval: 0 | |
remode: pixel | |
reprob: 0.2 | |
resplit: false | |
resume: '' | |
save_images: false | |
scale: | |
- 0.8 | |
- 1.0 | |
sched: cosine | |
sched_on_updates: true | |
seed: 42 | |
smoothing: 0.1 | |
split_bn: false | |
start_epoch: null | |
std: null | |
sync_bn: false | |
synchronize_step: false | |
target_key: null | |
torchcompile: inductor | |
torchscript: false | |
train_crop_mode: rkrr | |
train_interpolation: random | |
train_num_samples: null | |
train_split: train | |
tta: 0 | |
use_multi_epochs_loader: false | |
val_num_samples: null | |
val_split: validation | |
validation_batch_size: null | |
vflip: 0.0 | |
warmup_epochs: 20 | |
warmup_lr: 0.0 | |
warmup_prefix: false | |
weight_decay: 0.05 | |
worker_seeding: all | |
workers: 8 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aa: rand-m6-inc1-mstd101 | |
amp: true | |
amp_dtype: float16 | |
amp_impl: native | |
aug_repeats: 0 | |
aug_splits: 0 | |
batch_size: 1024 | |
bce_loss: false | |
bce_pos_weight: null | |
bce_sum: false | |
bce_target_thresh: null | |
bn_eps: null | |
bn_momentum: null | |
channels_last: true | |
checkpoint_hist: 10 | |
class_map: '' | |
clip_grad: 5.0 | |
clip_mode: norm | |
color_jitter: 0.4 | |
color_jitter_prob: null | |
cooldown_epochs: 0 | |
crop_pct: 0.9 | |
cutmix: 0.0 | |
cutmix_minmax: null | |
data: /data/imagenet/ | |
data_dir: null | |
dataset: '' | |
dataset_download: false | |
decay_epochs: 90 | |
decay_milestones: | |
- 90 | |
- 180 | |
- 270 | |
decay_rate: 0.1 | |
device: cuda | |
device_modules: null | |
dist_bn: reduce | |
drop: 0.3 | |
drop_block: null | |
drop_connect: null | |
drop_path: 0.05 | |
epoch_repeats: 0.0 | |
epochs: 1800 | |
eval_metric: top1 | |
experiment: '' | |
fast_norm: false | |
fuser: '' | |
gaussian_blur_prob: null | |
gp: null | |
grad_accum_steps: 1 | |
grad_checkpointing: false | |
grayscale_prob: 0.05 | |
head_init_bias: null | |
head_init_scale: 0.0 | |
hflip: 0.5 | |
img_size: null | |
in_chans: null | |
initial_checkpoint: '' | |
input_img_mode: null | |
input_key: null | |
input_size: null | |
interpolation: '' | |
jsd_loss: false | |
layer_decay: null | |
local_rank: 0 | |
log_interval: 50 | |
log_wandb: false | |
lr: 0.00167 | |
lr_base: 0.1 | |
lr_base_scale: '' | |
lr_base_size: 256 | |
lr_cycle_decay: 0.5 | |
lr_cycle_limit: 1 | |
lr_cycle_mul: 1.0 | |
lr_k_decay: 1.0 | |
lr_noise: null | |
lr_noise_pct: 0.67 | |
lr_noise_std: 1.0 | |
mean: null | |
min_lr: 0.0 | |
mixup: 0.0 | |
mixup_mode: batch | |
mixup_off_epoch: 0 | |
mixup_prob: 1.0 | |
mixup_switch_prob: 0.5 | |
model: test_vit3 | |
model_ema: true | |
model_ema_decay: 0.9999 | |
model_ema_force_cpu: false | |
model_ema_warmup: true | |
model_kwargs: {} | |
momentum: 0.9 | |
no_aug: false | |
no_ddp_bb: false | |
no_prefetcher: false | |
no_resume_opt: false | |
num_classes: 1000 | |
opt: adamw | |
opt_betas: null | |
opt_eps: null | |
opt_kwargs: {} | |
output: '' | |
patience_epochs: 10 | |
pin_mem: false | |
pretrained: false | |
pretrained_path: null | |
ratio: | |
- 0.75 | |
- 1.3333333333333333 | |
recount: 1 | |
recovery_interval: 0 | |
remode: pixel | |
reprob: 0.2 | |
resplit: false | |
resume: '' | |
save_images: false | |
scale: | |
- 0.8 | |
- 1.0 | |
sched: cosine | |
sched_on_updates: true | |
seed: 42 | |
smoothing: 0.1 | |
split_bn: false | |
start_epoch: null | |
std: null | |
sync_bn: false | |
synchronize_step: false | |
target_key: null | |
torchcompile: inductor | |
torchscript: false | |
train_crop_mode: rkrr | |
train_interpolation: random | |
train_num_samples: null | |
train_split: train | |
tta: 0 | |
use_multi_epochs_loader: false | |
val_num_samples: null | |
val_split: validation | |
validation_batch_size: null | |
vflip: 0.0 | |
warmup_epochs: 20 | |
warmup_lr: 0.0 | |
warmup_prefix: false | |
weight_decay: 0.05 | |
worker_seeding: all | |
workers: 8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment