Last active
August 17, 2024 10:34
-
-
Save Jaid/85d2b63289c65168ba06e113b94857e0 to your computer and use it in GitHub Desktop.
Kohya-SS CLI help
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
usage: sdxl_train_network.py [-h] [--console_log_level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] | |
[--console_log_file CONSOLE_LOG_FILE] [--console_log_simple] [--v2] | |
[--v_parameterization] | |
[--pretrained_model_name_or_path PRETRAINED_MODEL_NAME_OR_PATH] | |
[--tokenizer_cache_dir TOKENIZER_CACHE_DIR] | |
[--train_data_dir TRAIN_DATA_DIR] [--cache_info] [--shuffle_caption] | |
[--caption_separator CAPTION_SEPARATOR] | |
[--caption_extension CAPTION_EXTENSION] | |
[--caption_extention CAPTION_EXTENTION] [--keep_tokens KEEP_TOKENS] | |
[--keep_tokens_separator KEEP_TOKENS_SEPARATOR] | |
[--secondary_separator SECONDARY_SEPARATOR] [--enable_wildcard] | |
[--caption_prefix CAPTION_PREFIX] [--caption_suffix CAPTION_SUFFIX] | |
[--color_aug] [--flip_aug] | |
[--face_crop_aug_range FACE_CROP_AUG_RANGE] [--random_crop] | |
[--debug_dataset] [--resolution RESOLUTION] [--cache_latents] | |
[--vae_batch_size VAE_BATCH_SIZE] [--cache_latents_to_disk] | |
[--enable_bucket] [--min_bucket_reso MIN_BUCKET_RESO] | |
[--max_bucket_reso MAX_BUCKET_RESO] | |
[--bucket_reso_steps BUCKET_RESO_STEPS] [--bucket_no_upscale] | |
[--token_warmup_min TOKEN_WARMUP_MIN] | |
[--token_warmup_step TOKEN_WARMUP_STEP] [--alpha_mask] | |
[--dataset_class DATASET_CLASS] | |
[--caption_dropout_rate CAPTION_DROPOUT_RATE] | |
[--caption_dropout_every_n_epochs CAPTION_DROPOUT_EVERY_N_EPOCHS] | |
[--caption_tag_dropout_rate CAPTION_TAG_DROPOUT_RATE] | |
[--reg_data_dir REG_DATA_DIR] [--in_json IN_JSON] | |
[--dataset_repeats DATASET_REPEATS] [--output_dir OUTPUT_DIR] | |
[--output_name OUTPUT_NAME] | |
[--huggingface_repo_id HUGGINGFACE_REPO_ID] | |
[--huggingface_repo_type HUGGINGFACE_REPO_TYPE] | |
[--huggingface_path_in_repo HUGGINGFACE_PATH_IN_REPO] | |
[--huggingface_token HUGGINGFACE_TOKEN] | |
[--huggingface_repo_visibility HUGGINGFACE_REPO_VISIBILITY] | |
[--save_state_to_huggingface] [--resume_from_huggingface] | |
[--async_upload] [--save_precision {None,float,fp16,bf16}] | |
[--save_every_n_epochs SAVE_EVERY_N_EPOCHS] | |
[--save_every_n_steps SAVE_EVERY_N_STEPS] | |
[--save_n_epoch_ratio SAVE_N_EPOCH_RATIO] | |
[--save_last_n_epochs SAVE_LAST_N_EPOCHS] | |
[--save_last_n_epochs_state SAVE_LAST_N_EPOCHS_STATE] | |
[--save_last_n_steps SAVE_LAST_N_STEPS] | |
[--save_last_n_steps_state SAVE_LAST_N_STEPS_STATE] [--save_state] | |
[--save_state_on_train_end] [--resume RESUME] | |
[--train_batch_size TRAIN_BATCH_SIZE] | |
[--max_token_length {None,150,225}] [--mem_eff_attn] | |
[--torch_compile] | |
[--dynamo_backend {eager,aot_eager,inductor,aot_ts_nvfuser,nvprims_nvfuser,cudagraphs,ofi,fx2trt,onnxrt}] | |
[--xformers] [--sdpa] [--vae VAE] | |
[--max_train_steps MAX_TRAIN_STEPS] | |
[--max_train_epochs MAX_TRAIN_EPOCHS] | |
[--max_data_loader_n_workers MAX_DATA_LOADER_N_WORKERS] | |
[--persistent_data_loader_workers] [--seed SEED] | |
[--gradient_checkpointing] | |
[--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS] | |
[--mixed_precision {no,fp16,bf16}] [--full_fp16] [--full_bf16] | |
[--fp8_base] [--ddp_timeout DDP_TIMEOUT] | |
[--ddp_gradient_as_bucket_view] [--ddp_static_graph] | |
[--clip_skip CLIP_SKIP] [--logging_dir LOGGING_DIR] | |
[--log_with {tensorboard,wandb,all}] [--log_prefix LOG_PREFIX] | |
[--log_tracker_name LOG_TRACKER_NAME] | |
[--wandb_run_name WANDB_RUN_NAME] | |
[--log_tracker_config LOG_TRACKER_CONFIG] | |
[--wandb_api_key WANDB_API_KEY] [--log_config] | |
[--noise_offset NOISE_OFFSET] [--noise_offset_random_strength] | |
[--multires_noise_iterations MULTIRES_NOISE_ITERATIONS] | |
[--ip_noise_gamma IP_NOISE_GAMMA] | |
[--ip_noise_gamma_random_strength] | |
[--multires_noise_discount MULTIRES_NOISE_DISCOUNT] | |
[--adaptive_noise_scale ADAPTIVE_NOISE_SCALE] [--zero_terminal_snr] | |
[--min_timestep MIN_TIMESTEP] [--max_timestep MAX_TIMESTEP] | |
[--loss_type {l1,l2,huber,smooth_l1}] | |
[--huber_schedule {constant,exponential,snr}] [--huber_c HUBER_C] | |
[--lowram] [--highvram] | |
[--sample_every_n_steps SAMPLE_EVERY_N_STEPS] [--sample_at_first] | |
[--sample_every_n_epochs SAMPLE_EVERY_N_EPOCHS] | |
[--sample_prompts SAMPLE_PROMPTS] | |
[--sample_sampler {ddim,pndm,lms,euler,euler_a,heun,dpm_2,dpm_2_a,dpmsolver,dpmsolver++,dpmsingle,k_lms,k_euler,k_euler_a,k_dpm_2,k_dpm_2_a}] | |
[--config_file CONFIG_FILE] [--output_config] | |
[--metadata_title METADATA_TITLE] [--metadata_author METADATA_AUTHOR] | |
[--metadata_description METADATA_DESCRIPTION] | |
[--metadata_license METADATA_LICENSE] | |
[--metadata_tags METADATA_TAGS] | |
[--prior_loss_weight PRIOR_LOSS_WEIGHT] | |
[--conditioning_data_dir CONDITIONING_DATA_DIR] [--masked_loss] | |
[--deepspeed] [--zero_stage {0,1,2,3}] | |
[--offload_optimizer_device {None,cpu,nvme}] | |
[--offload_optimizer_nvme_path OFFLOAD_OPTIMIZER_NVME_PATH] | |
[--offload_param_device {None,cpu,nvme}] | |
[--offload_param_nvme_path OFFLOAD_PARAM_NVME_PATH] | |
[--zero3_init_flag] [--zero3_save_16bit_model] | |
[--fp16_master_weights_and_gradients] | |
[--optimizer_type OPTIMIZER_TYPE] [--use_8bit_adam] | |
[--use_lion_optimizer] [--learning_rate LEARNING_RATE] | |
[--max_grad_norm MAX_GRAD_NORM] | |
[--optimizer_args [OPTIMIZER_ARGS ...]] | |
[--lr_scheduler_type LR_SCHEDULER_TYPE] | |
[--lr_scheduler_args [LR_SCHEDULER_ARGS ...]] | |
[--lr_scheduler LR_SCHEDULER] [--lr_warmup_steps LR_WARMUP_STEPS] | |
[--lr_scheduler_num_cycles LR_SCHEDULER_NUM_CYCLES] | |
[--lr_scheduler_power LR_SCHEDULER_POWER] [--fused_backward_pass] | |
[--dataset_config DATASET_CONFIG] [--min_snr_gamma MIN_SNR_GAMMA] | |
[--scale_v_pred_loss_like_noise_pred] | |
[--v_pred_like_loss V_PRED_LIKE_LOSS] [--debiased_estimation_loss] | |
[--weighted_captions] [--no_metadata] | |
[--save_model_as {None,ckpt,pt,safetensors}] [--unet_lr UNET_LR] | |
[--text_encoder_lr TEXT_ENCODER_LR] | |
[--network_weights NETWORK_WEIGHTS] | |
[--network_module NETWORK_MODULE] [--network_dim NETWORK_DIM] | |
[--network_alpha NETWORK_ALPHA] [--network_dropout NETWORK_DROPOUT] | |
[--network_args [NETWORK_ARGS ...]] [--network_train_unet_only] | |
[--network_train_text_encoder_only] | |
[--training_comment TRAINING_COMMENT] [--dim_from_weights] | |
[--scale_weight_norms SCALE_WEIGHT_NORMS] | |
[--base_weights [BASE_WEIGHTS ...]] | |
[--base_weights_multiplier [BASE_WEIGHTS_MULTIPLIER ...]] | |
[--no_half_vae] [--skip_until_initial_step] | |
[--initial_epoch INITIAL_EPOCH] [--initial_step INITIAL_STEP] | |
[--cache_text_encoder_outputs] | |
[--cache_text_encoder_outputs_to_disk] | |
[--disable_mmap_load_safetensors] | |
options: | |
-h, --help show this help message and exit | |
--console_log_level {DEBUG,INFO,WARNING,ERROR,CRITICAL} | |
Set the logging level | |
--console_log_file CONSOLE_LOG_FILE | |
Log to a file instead of stderr | |
--console_log_simple Simple log output | |
--v2 load Stable Diffusion v2.0 model | |
--v_parameterization enable v-parameterization training | |
--pretrained_model_name_or_path PRETRAINED_MODEL_NAME_OR_PATH | |
pretrained model to train, directory to Diffusers model or StableDiffusion | |
checkpoint | |
--tokenizer_cache_dir TOKENIZER_CACHE_DIR | |
directory for caching Tokenizer (for offline training) | |
--train_data_dir TRAIN_DATA_DIR | |
directory for train images | |
--cache_info cache meta information (caption and image size) for faster dataset | |
loading. only available for DreamBooth | |
--shuffle_caption shuffle separated caption | |
--caption_separator CAPTION_SEPARATOR | |
separator for caption | |
--caption_extension CAPTION_EXTENSION | |
extension of caption files | |
--caption_extention CAPTION_EXTENTION | |
extension of caption files (backward compatibility) | |
--keep_tokens KEEP_TOKENS | |
keep heading N tokens when shuffling caption tokens (token means comma | |
separated strings) | |
--keep_tokens_separator KEEP_TOKENS_SEPARATOR | |
A custom separator to divide the caption into fixed and flexible parts. | |
Tokens before this separator will not be shuffled. If not specified, '-- | |
keep_tokens' will be used to determine the fixed number of tokens. | |
--secondary_separator SECONDARY_SEPARATOR | |
a secondary separator for caption. This separator is replaced to | |
caption_separator after dropping/shuffling caption | |
--enable_wildcard enable wildcard for caption (e.g. '{image|picture|rendition}') | |
--caption_prefix CAPTION_PREFIX | |
prefix for caption text | |
--caption_suffix CAPTION_SUFFIX | |
suffix for caption text | |
--color_aug enable weak color augmentation | |
--flip_aug enable horizontal flip augmentation | |
--face_crop_aug_range FACE_CROP_AUG_RANGE | |
enable face-centered crop augmentation and its range (e.g. 2.0,4.0) | |
--random_crop enable random crop (for style training in face-centered crop augmentation) | |
--debug_dataset show images for debugging (do not train) | |
--resolution RESOLUTION | |
resolution in training ('size' or 'width,height') | |
--cache_latents cache latents to main memory to reduce VRAM usage (augmentations must be | |
disabled) | |
--vae_batch_size VAE_BATCH_SIZE | |
batch size for caching latents | |
--cache_latents_to_disk | |
cache latents to disk to reduce VRAM usage (augmentations must be | |
disabled) | |
--enable_bucket enable buckets for multi aspect ratio training | |
--min_bucket_reso MIN_BUCKET_RESO | |
minimum resolution for buckets | |
--max_bucket_reso MAX_BUCKET_RESO | |
maximum resolution for buckets | |
--bucket_reso_steps BUCKET_RESO_STEPS | |
steps of resolution for buckets, divisible by 8 is recommended | |
--bucket_no_upscale make bucket for each image without upscaling | |
--token_warmup_min TOKEN_WARMUP_MIN | |
start learning at N tags (token means comma separated strings) | |
--token_warmup_step TOKEN_WARMUP_STEP | |
tag length reaches maximum on N steps (or N*max_train_steps if N<1) | |
--alpha_mask use alpha channel as mask for training | |
--dataset_class DATASET_CLASS | |
dataset class for arbitrary dataset (package.module.Class) | |
--caption_dropout_rate CAPTION_DROPOUT_RATE | |
Rate out dropout caption(0.0~1.0) | |
--caption_dropout_every_n_epochs CAPTION_DROPOUT_EVERY_N_EPOCHS | |
Dropout all captions every N epochs | |
--caption_tag_dropout_rate CAPTION_TAG_DROPOUT_RATE | |
Rate out dropout comma separated tokens(0.0~1.0) | |
--reg_data_dir REG_DATA_DIR | |
directory for regularization images | |
--in_json IN_JSON json metadata for dataset | |
--dataset_repeats DATASET_REPEATS | |
repeat dataset when training with captions | |
--output_dir OUTPUT_DIR | |
directory to output trained model | |
--output_name OUTPUT_NAME | |
base name of trained model file | |
--huggingface_repo_id HUGGINGFACE_REPO_ID | |
huggingface repo name to upload | |
--huggingface_repo_type HUGGINGFACE_REPO_TYPE | |
huggingface repo type to upload | |
--huggingface_path_in_repo HUGGINGFACE_PATH_IN_REPO | |
huggingface model path to upload files | |
--huggingface_token HUGGINGFACE_TOKEN | |
huggingface token | |
--huggingface_repo_visibility HUGGINGFACE_REPO_VISIBILITY | |
huggingface repository visibility ('public' for public, 'private' or None | |
for private) | |
--save_state_to_huggingface | |
save state to huggingface | |
--resume_from_huggingface | |
resume from huggingface (ex: --resume | |
{repo_id}/{path_in_repo}:{revision}:{repo_type}) | |
--async_upload upload to huggingface asynchronously | |
--save_precision {None,float,fp16,bf16} | |
precision in saving | |
--save_every_n_epochs SAVE_EVERY_N_EPOCHS | |
save checkpoint every N epochs | |
--save_every_n_steps SAVE_EVERY_N_STEPS | |
save checkpoint every N steps | |
--save_n_epoch_ratio SAVE_N_EPOCH_RATIO | |
save checkpoint N epoch ratio (for example 5 means save at least 5 files | |
total) | |
--save_last_n_epochs SAVE_LAST_N_EPOCHS | |
save last N checkpoints when saving every N epochs (remove older | |
checkpoints) | |
--save_last_n_epochs_state SAVE_LAST_N_EPOCHS_STATE | |
save last N checkpoints of state (overrides the value of | |
--save_last_n_epochs) | |
--save_last_n_steps SAVE_LAST_N_STEPS | |
save checkpoints until N steps elapsed (remove older checkpoints if N | |
steps elapsed) | |
--save_last_n_steps_state SAVE_LAST_N_STEPS_STATE | |
save states until N steps elapsed (remove older states if N steps elapsed, | |
overrides --save_last_n_steps) | |
--save_state save training state additionally (including optimizer states etc.) when | |
saving model | |
--save_state_on_train_end | |
save training state (including optimizer states etc.) on train end | |
--resume RESUME saved state to resume training | |
--train_batch_size TRAIN_BATCH_SIZE | |
batch size for training | |
--max_token_length {None,150,225} | |
max token length of text encoder (default for 75, 150 or 225) | |
--mem_eff_attn use memory efficient attention for CrossAttention | |
--torch_compile use torch.compile (requires PyTorch 2.0) | |
--dynamo_backend {eager,aot_eager,inductor,aot_ts_nvfuser,nvprims_nvfuser,cudagraphs,ofi,fx2trt,onnxrt} | |
dynamo backend type (default is inductor) | |
--xformers use xformers for CrossAttention | |
--sdpa use sdpa for CrossAttention (requires PyTorch 2.0) | |
--vae VAE path to checkpoint of vae to replace | |
--max_train_steps MAX_TRAIN_STEPS | |
training steps | |
--max_train_epochs MAX_TRAIN_EPOCHS | |
training epochs (overrides max_train_steps) | |
--max_data_loader_n_workers MAX_DATA_LOADER_N_WORKERS | |
max num workers for DataLoader (lower is less main RAM usage, faster epoch | |
start and slower data loading) | |
--persistent_data_loader_workers | |
persistent DataLoader workers (useful for reduce time gap between epoch, | |
but may use more memory) | |
--seed SEED random seed for training | |
--gradient_checkpointing | |
enable gradient checkpointing | |
--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS | |
Number of updates steps to accumulate before performing a backward/update | |
pass | |
--mixed_precision {no,fp16,bf16} | |
use mixed precision | |
--full_fp16 fp16 training including gradients | |
--full_bf16 bf16 training including gradients | |
--fp8_base use fp8 for base model | |
--ddp_timeout DDP_TIMEOUT | |
DDP timeout (min, None for default of accelerate) | |
--ddp_gradient_as_bucket_view | |
enable gradient_as_bucket_view for DDP | |
--ddp_static_graph enable static_graph for DDP | |
--clip_skip CLIP_SKIP | |
use output of nth layer from back of text encoder (n>=1) | |
--logging_dir LOGGING_DIR | |
enable logging and output TensorBoard log to this directory | |
--log_with {tensorboard,wandb,all} | |
what logging tool(s) to use (if 'all', TensorBoard and WandB are both | |
used) | |
--log_prefix LOG_PREFIX | |
add prefix for each log directory | |
--log_tracker_name LOG_TRACKER_NAME | |
name of tracker to use for logging, default is script-specific default | |
name | |
--wandb_run_name WANDB_RUN_NAME | |
The name of the specific wandb session | |
--log_tracker_config LOG_TRACKER_CONFIG | |
path to tracker config file to use for logging | |
--wandb_api_key WANDB_API_KEY | |
specify WandB API key to log in before starting training (optional). | |
--log_config log training configuration | |
--noise_offset NOISE_OFFSET | |
enable noise offset with this value (if enabled, around 0.1 is | |
recommended) | |
--noise_offset_random_strength | |
use random strength between 0~noise_offset for noise offset. | |
--multires_noise_iterations MULTIRES_NOISE_ITERATIONS | |
enable multires noise with this number of iterations (if enabled, around | |
6-10 is recommended) | |
--ip_noise_gamma IP_NOISE_GAMMA | |
enable input perturbation noise. used for regularization. recommended | |
value: around 0.1 (from arxiv.org/abs/2301.11706) | |
--ip_noise_gamma_random_strength | |
Use random strength between 0~ip_noise_gamma for input perturbation noise. | |
--multires_noise_discount MULTIRES_NOISE_DISCOUNT | |
set discount value for multires noise (has no effect without | |
--multires_noise_iterations) | |
--adaptive_noise_scale ADAPTIVE_NOISE_SCALE | |
add `latent mean absolute value * this value` to noise_offset (disabled if | |
None, default) | |
--zero_terminal_snr fix noise scheduler betas to enforce zero terminal SNR | |
--min_timestep MIN_TIMESTEP | |
set minimum time step for U-Net training (0~999, default is 0) | |
--max_timestep MAX_TIMESTEP | |
set maximum time step for U-Net training (1~1000, default is 1000) | |
--loss_type {l1,l2,huber,smooth_l1} | |
The type of loss function to use (L1, L2, Huber, or smooth L1), default is | |
L2 | |
--huber_schedule {constant,exponential,snr} | |
The scheduling method for Huber loss (constant, exponential, or SNR- | |
based). Only used when loss_type is 'huber' or 'smooth_l1'. default is snr | |
--huber_c HUBER_C The huber loss parameter. Only used if one of the huber loss modes (huber | |
or smooth l1) is selected with loss_type. default is 0.1 | |
--lowram enable low RAM optimization. e.g. load models to VRAM instead of RAM (for | |
machines which have bigger VRAM than RAM such as Colab and Kaggle) | |
--highvram disable low VRAM optimization. e.g. do not clear CUDA cache after each | |
latent caching (for machines which have bigger VRAM) | |
--sample_every_n_steps SAMPLE_EVERY_N_STEPS | |
generate sample images every N steps | |
--sample_at_first generate sample images before training | |
--sample_every_n_epochs SAMPLE_EVERY_N_EPOCHS | |
generate sample images every N epochs (overwrites n_steps) | |
--sample_prompts SAMPLE_PROMPTS | |
file for prompts to generate sample images | |
--sample_sampler {ddim,pndm,lms,euler,euler_a,heun,dpm_2,dpm_2_a,dpmsolver,dpmsolver++,dpmsingle,k_lms,k_euler,k_euler_a,k_dpm_2,k_dpm_2_a} | |
sampler (scheduler) type for sample images | |
--config_file CONFIG_FILE | |
using .toml instead of args to pass hyperparameter | |
--output_config output command line args to given .toml file | |
--metadata_title METADATA_TITLE | |
title for model metadata (default is output_name) | |
--metadata_author METADATA_AUTHOR | |
author name for model metadata | |
--metadata_description METADATA_DESCRIPTION | |
description for model metadata | |
--metadata_license METADATA_LICENSE | |
license for model metadata | |
--metadata_tags METADATA_TAGS | |
tags for model metadata, separated by comma | |
--prior_loss_weight PRIOR_LOSS_WEIGHT | |
loss weight for regularization images | |
--conditioning_data_dir CONDITIONING_DATA_DIR | |
conditioning data directory | |
--masked_loss apply mask for calculating loss. conditioning_data_dir is required for | |
dataset. | |
--deepspeed enable deepspeed training | |
--zero_stage {0,1,2,3} | |
Possible options are 0,1,2,3. | |
--offload_optimizer_device {None,cpu,nvme} | |
Possible options are none|cpu|nvme. Only applicable with ZeRO Stages 2 and | |
3. | |
--offload_optimizer_nvme_path OFFLOAD_OPTIMIZER_NVME_PATH | |
Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3. | |
--offload_param_device {None,cpu,nvme} | |
Possible options are none|cpu|nvme. Only applicable with ZeRO Stage 3. | |
--offload_param_nvme_path OFFLOAD_PARAM_NVME_PATH | |
Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3. | |
--zero3_init_flag Flag to indicate whether to enable `deepspeed.zero.Init` for constructing | |
massive models.Only applicable with ZeRO Stage-3. | |
--zero3_save_16bit_model | |
Flag to indicate whether to save 16-bit model. Only applicable with ZeRO | |
Stage-3. | |
--fp16_master_weights_and_gradients | |
fp16_master_and_gradients requires optimizer to support keeping fp16 | |
master and gradients while keeping the optimizer states in fp32. | |
--optimizer_type OPTIMIZER_TYPE | |
Optimizer to use: AdamW (default), AdamW8bit, PagedAdamW, PagedAdamW8bit, | |
PagedAdamW32bit, Lion8bit, PagedLion8bit, Lion, SGDNesterov, | |
SGDNesterov8bit, DAdaptation(DAdaptAdamPreprint), DAdaptAdaGrad, | |
DAdaptAdam, DAdaptAdan, DAdaptAdanIP, DAdaptLion, DAdaptSGD, AdaFactor | |
--use_8bit_adam use 8bit AdamW optimizer (requires bitsandbytes) | |
--use_lion_optimizer use Lion optimizer (requires lion-pytorch) | |
--learning_rate LEARNING_RATE | |
learning rate | |
--max_grad_norm MAX_GRAD_NORM | |
Max gradient norm, 0 for no clipping | |
--optimizer_args [OPTIMIZER_ARGS ...] | |
additional arguments for optimizer (like "weight_decay=0.01 | |
betas=0.9,0.999 ...") | |
--lr_scheduler_type LR_SCHEDULER_TYPE | |
custom scheduler module | |
--lr_scheduler_args [LR_SCHEDULER_ARGS ...] | |
additional arguments for scheduler (like "T_max=100") | |
--lr_scheduler LR_SCHEDULER | |
scheduler to use for learning rate: linear, cosine, | |
cosine_with_restarts, polynomial, constant (default), | |
constant_with_warmup, adafactor | |
--lr_warmup_steps LR_WARMUP_STEPS | |
Number of steps for the warmup in the lr scheduler (default is 0) | |
--lr_scheduler_num_cycles LR_SCHEDULER_NUM_CYCLES | |
Number of restarts for cosine scheduler with restarts | |
--lr_scheduler_power LR_SCHEDULER_POWER | |
Polynomial power for polynomial scheduler | |
--fused_backward_pass | |
Combines backward pass and optimizer step to reduce VRAM usage. Only | |
available in SDXL | |
--dataset_config DATASET_CONFIG | |
config file for detail settings | |
--min_snr_gamma MIN_SNR_GAMMA | |
gamma for reducing the weight of high loss timesteps. Lower numbers have | |
stronger effect. 5 is recommended by paper. | |
--scale_v_pred_loss_like_noise_pred | |
scale v-prediction loss like noise prediction loss | |
--v_pred_like_loss V_PRED_LIKE_LOSS | |
add v-prediction like loss multiplied by this value | |
--debiased_estimation_loss | |
debiased estimation loss | |
--weighted_captions Enable weighted captions in the standard style (token:1.3). No commas | |
inside parens, or shuffle/dropout may break the decoder. | |
--no_metadata do not save metadata in output model | |
--save_model_as {None,ckpt,pt,safetensors} | |
format to save the model (default is .safetensors) | |
--unet_lr UNET_LR learning rate for U-Net | |
--text_encoder_lr TEXT_ENCODER_LR | |
learning rate for Text Encoder | |
--network_weights NETWORK_WEIGHTS | |
pretrained weights for network | |
--network_module NETWORK_MODULE | |
network module to train | |
--network_dim NETWORK_DIM | |
network dimensions (depends on each network) | |
--network_alpha NETWORK_ALPHA | |
alpha for LoRA weight scaling, default 1 (same as network_dim for same | |
behavior as old version) | |
--network_dropout NETWORK_DROPOUT | |
Drops neurons out of training every step (0 or None is default behavior | |
(no dropout), 1 would drop all neurons) | |
--network_args [NETWORK_ARGS ...] | |
additional arguments for network (key=value) | |
--network_train_unet_only | |
only training U-Net part | |
--network_train_text_encoder_only | |
only training Text Encoder part | |
--training_comment TRAINING_COMMENT | |
arbitrary comment string stored in metadata | |
--dim_from_weights automatically determine dim (rank) from network_weights | |
--scale_weight_norms SCALE_WEIGHT_NORMS | |
Scale the weight of each key pair to help prevent overtraing via exploding | |
gradients. (1 is a good starting point) | |
--base_weights [BASE_WEIGHTS ...] | |
network weights to merge into the model before training | |
--base_weights_multiplier [BASE_WEIGHTS_MULTIPLIER ...] | |
multiplier for network weights to merge into the model before training | |
--no_half_vae do not use fp16/bf16 VAE in mixed precision (use float VAE) | |
--skip_until_initial_step | |
skip training until initial_step is reached | |
--initial_epoch INITIAL_EPOCH | |
initial epoch number, 1 means first epoch (same as not specifying). NOTE: | |
initial_epoch/step doesn't affect to lr scheduler. Which means lr | |
scheduler will start from 0 without `--resume`. | |
--initial_step INITIAL_STEP | |
initial step number including all epochs, 0 means first step (same as not | |
specifying). overwrites initial_epoch. | |
--cache_text_encoder_outputs | |
cache text encoder outputs | |
--cache_text_encoder_outputs_to_disk | |
cache text encoder outputs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment