Skip to content

Instantly share code, notes, and snippets.

@Jaid
Last active August 17, 2024 10:34
Show Gist options
  • Save Jaid/85d2b63289c65168ba06e113b94857e0 to your computer and use it in GitHub Desktop.
Save Jaid/85d2b63289c65168ba06e113b94857e0 to your computer and use it in GitHub Desktop.
Kohya-SS CLI help
usage: sdxl_train_network.py [-h] [--console_log_level {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
[--console_log_file CONSOLE_LOG_FILE] [--console_log_simple] [--v2]
[--v_parameterization]
[--pretrained_model_name_or_path PRETRAINED_MODEL_NAME_OR_PATH]
[--tokenizer_cache_dir TOKENIZER_CACHE_DIR]
[--train_data_dir TRAIN_DATA_DIR] [--cache_info] [--shuffle_caption]
[--caption_separator CAPTION_SEPARATOR]
[--caption_extension CAPTION_EXTENSION]
[--caption_extention CAPTION_EXTENTION] [--keep_tokens KEEP_TOKENS]
[--keep_tokens_separator KEEP_TOKENS_SEPARATOR]
[--secondary_separator SECONDARY_SEPARATOR] [--enable_wildcard]
[--caption_prefix CAPTION_PREFIX] [--caption_suffix CAPTION_SUFFIX]
[--color_aug] [--flip_aug]
[--face_crop_aug_range FACE_CROP_AUG_RANGE] [--random_crop]
[--debug_dataset] [--resolution RESOLUTION] [--cache_latents]
[--vae_batch_size VAE_BATCH_SIZE] [--cache_latents_to_disk]
[--enable_bucket] [--min_bucket_reso MIN_BUCKET_RESO]
[--max_bucket_reso MAX_BUCKET_RESO]
[--bucket_reso_steps BUCKET_RESO_STEPS] [--bucket_no_upscale]
[--token_warmup_min TOKEN_WARMUP_MIN]
[--token_warmup_step TOKEN_WARMUP_STEP] [--alpha_mask]
[--dataset_class DATASET_CLASS]
[--caption_dropout_rate CAPTION_DROPOUT_RATE]
[--caption_dropout_every_n_epochs CAPTION_DROPOUT_EVERY_N_EPOCHS]
[--caption_tag_dropout_rate CAPTION_TAG_DROPOUT_RATE]
[--reg_data_dir REG_DATA_DIR] [--in_json IN_JSON]
[--dataset_repeats DATASET_REPEATS] [--output_dir OUTPUT_DIR]
[--output_name OUTPUT_NAME]
[--huggingface_repo_id HUGGINGFACE_REPO_ID]
[--huggingface_repo_type HUGGINGFACE_REPO_TYPE]
[--huggingface_path_in_repo HUGGINGFACE_PATH_IN_REPO]
[--huggingface_token HUGGINGFACE_TOKEN]
[--huggingface_repo_visibility HUGGINGFACE_REPO_VISIBILITY]
[--save_state_to_huggingface] [--resume_from_huggingface]
[--async_upload] [--save_precision {None,float,fp16,bf16}]
[--save_every_n_epochs SAVE_EVERY_N_EPOCHS]
[--save_every_n_steps SAVE_EVERY_N_STEPS]
[--save_n_epoch_ratio SAVE_N_EPOCH_RATIO]
[--save_last_n_epochs SAVE_LAST_N_EPOCHS]
[--save_last_n_epochs_state SAVE_LAST_N_EPOCHS_STATE]
[--save_last_n_steps SAVE_LAST_N_STEPS]
[--save_last_n_steps_state SAVE_LAST_N_STEPS_STATE] [--save_state]
[--save_state_on_train_end] [--resume RESUME]
[--train_batch_size TRAIN_BATCH_SIZE]
[--max_token_length {None,150,225}] [--mem_eff_attn]
[--torch_compile]
[--dynamo_backend {eager,aot_eager,inductor,aot_ts_nvfuser,nvprims_nvfuser,cudagraphs,ofi,fx2trt,onnxrt}]
[--xformers] [--sdpa] [--vae VAE]
[--max_train_steps MAX_TRAIN_STEPS]
[--max_train_epochs MAX_TRAIN_EPOCHS]
[--max_data_loader_n_workers MAX_DATA_LOADER_N_WORKERS]
[--persistent_data_loader_workers] [--seed SEED]
[--gradient_checkpointing]
[--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS]
[--mixed_precision {no,fp16,bf16}] [--full_fp16] [--full_bf16]
[--fp8_base] [--ddp_timeout DDP_TIMEOUT]
[--ddp_gradient_as_bucket_view] [--ddp_static_graph]
[--clip_skip CLIP_SKIP] [--logging_dir LOGGING_DIR]
[--log_with {tensorboard,wandb,all}] [--log_prefix LOG_PREFIX]
[--log_tracker_name LOG_TRACKER_NAME]
[--wandb_run_name WANDB_RUN_NAME]
[--log_tracker_config LOG_TRACKER_CONFIG]
[--wandb_api_key WANDB_API_KEY] [--log_config]
[--noise_offset NOISE_OFFSET] [--noise_offset_random_strength]
[--multires_noise_iterations MULTIRES_NOISE_ITERATIONS]
[--ip_noise_gamma IP_NOISE_GAMMA]
[--ip_noise_gamma_random_strength]
[--multires_noise_discount MULTIRES_NOISE_DISCOUNT]
[--adaptive_noise_scale ADAPTIVE_NOISE_SCALE] [--zero_terminal_snr]
[--min_timestep MIN_TIMESTEP] [--max_timestep MAX_TIMESTEP]
[--loss_type {l1,l2,huber,smooth_l1}]
[--huber_schedule {constant,exponential,snr}] [--huber_c HUBER_C]
[--lowram] [--highvram]
[--sample_every_n_steps SAMPLE_EVERY_N_STEPS] [--sample_at_first]
[--sample_every_n_epochs SAMPLE_EVERY_N_EPOCHS]
[--sample_prompts SAMPLE_PROMPTS]
[--sample_sampler {ddim,pndm,lms,euler,euler_a,heun,dpm_2,dpm_2_a,dpmsolver,dpmsolver++,dpmsingle,k_lms,k_euler,k_euler_a,k_dpm_2,k_dpm_2_a}]
[--config_file CONFIG_FILE] [--output_config]
[--metadata_title METADATA_TITLE] [--metadata_author METADATA_AUTHOR]
[--metadata_description METADATA_DESCRIPTION]
[--metadata_license METADATA_LICENSE]
[--metadata_tags METADATA_TAGS]
[--prior_loss_weight PRIOR_LOSS_WEIGHT]
[--conditioning_data_dir CONDITIONING_DATA_DIR] [--masked_loss]
[--deepspeed] [--zero_stage {0,1,2,3}]
[--offload_optimizer_device {None,cpu,nvme}]
[--offload_optimizer_nvme_path OFFLOAD_OPTIMIZER_NVME_PATH]
[--offload_param_device {None,cpu,nvme}]
[--offload_param_nvme_path OFFLOAD_PARAM_NVME_PATH]
[--zero3_init_flag] [--zero3_save_16bit_model]
[--fp16_master_weights_and_gradients]
[--optimizer_type OPTIMIZER_TYPE] [--use_8bit_adam]
[--use_lion_optimizer] [--learning_rate LEARNING_RATE]
[--max_grad_norm MAX_GRAD_NORM]
[--optimizer_args [OPTIMIZER_ARGS ...]]
[--lr_scheduler_type LR_SCHEDULER_TYPE]
[--lr_scheduler_args [LR_SCHEDULER_ARGS ...]]
[--lr_scheduler LR_SCHEDULER] [--lr_warmup_steps LR_WARMUP_STEPS]
[--lr_scheduler_num_cycles LR_SCHEDULER_NUM_CYCLES]
[--lr_scheduler_power LR_SCHEDULER_POWER] [--fused_backward_pass]
[--dataset_config DATASET_CONFIG] [--min_snr_gamma MIN_SNR_GAMMA]
[--scale_v_pred_loss_like_noise_pred]
[--v_pred_like_loss V_PRED_LIKE_LOSS] [--debiased_estimation_loss]
[--weighted_captions] [--no_metadata]
[--save_model_as {None,ckpt,pt,safetensors}] [--unet_lr UNET_LR]
[--text_encoder_lr TEXT_ENCODER_LR]
[--network_weights NETWORK_WEIGHTS]
[--network_module NETWORK_MODULE] [--network_dim NETWORK_DIM]
[--network_alpha NETWORK_ALPHA] [--network_dropout NETWORK_DROPOUT]
[--network_args [NETWORK_ARGS ...]] [--network_train_unet_only]
[--network_train_text_encoder_only]
[--training_comment TRAINING_COMMENT] [--dim_from_weights]
[--scale_weight_norms SCALE_WEIGHT_NORMS]
[--base_weights [BASE_WEIGHTS ...]]
[--base_weights_multiplier [BASE_WEIGHTS_MULTIPLIER ...]]
[--no_half_vae] [--skip_until_initial_step]
[--initial_epoch INITIAL_EPOCH] [--initial_step INITIAL_STEP]
[--cache_text_encoder_outputs]
[--cache_text_encoder_outputs_to_disk]
[--disable_mmap_load_safetensors]
options:
-h, --help show this help message and exit
--console_log_level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
Set the logging level
--console_log_file CONSOLE_LOG_FILE
Log to a file instead of stderr
--console_log_simple Simple log output
--v2 load Stable Diffusion v2.0 model
--v_parameterization enable v-parameterization training
--pretrained_model_name_or_path PRETRAINED_MODEL_NAME_OR_PATH
pretrained model to train, directory to Diffusers model or StableDiffusion
checkpoint
--tokenizer_cache_dir TOKENIZER_CACHE_DIR
directory for caching Tokenizer (for offline training)
--train_data_dir TRAIN_DATA_DIR
directory for train images
--cache_info cache meta information (caption and image size) for faster dataset
loading. only available for DreamBooth
--shuffle_caption shuffle separated caption
--caption_separator CAPTION_SEPARATOR
separator for caption
--caption_extension CAPTION_EXTENSION
extension of caption files
--caption_extention CAPTION_EXTENTION
extension of caption files (backward compatibility)
--keep_tokens KEEP_TOKENS
keep heading N tokens when shuffling caption tokens (token means comma
separated strings)
--keep_tokens_separator KEEP_TOKENS_SEPARATOR
A custom separator to divide the caption into fixed and flexible parts.
Tokens before this separator will not be shuffled. If not specified, '--
keep_tokens' will be used to determine the fixed number of tokens.
--secondary_separator SECONDARY_SEPARATOR
a secondary separator for caption. This separator is replaced to
caption_separator after dropping/shuffling caption
--enable_wildcard enable wildcard for caption (e.g. '{image|picture|rendition}')
--caption_prefix CAPTION_PREFIX
prefix for caption text
--caption_suffix CAPTION_SUFFIX
suffix for caption text
--color_aug enable weak color augmentation
--flip_aug enable horizontal flip augmentation
--face_crop_aug_range FACE_CROP_AUG_RANGE
enable face-centered crop augmentation and its range (e.g. 2.0,4.0)
--random_crop enable random crop (for style training in face-centered crop augmentation)
--debug_dataset show images for debugging (do not train)
--resolution RESOLUTION
resolution in training ('size' or 'width,height')
--cache_latents cache latents to main memory to reduce VRAM usage (augmentations must be
disabled)
--vae_batch_size VAE_BATCH_SIZE
batch size for caching latents
--cache_latents_to_disk
cache latents to disk to reduce VRAM usage (augmentations must be
disabled)
--enable_bucket enable buckets for multi aspect ratio training
--min_bucket_reso MIN_BUCKET_RESO
minimum resolution for buckets
--max_bucket_reso MAX_BUCKET_RESO
maximum resolution for buckets
--bucket_reso_steps BUCKET_RESO_STEPS
steps of resolution for buckets, divisible by 8 is recommended
--bucket_no_upscale make bucket for each image without upscaling
--token_warmup_min TOKEN_WARMUP_MIN
start learning at N tags (token means comma separated strings)
--token_warmup_step TOKEN_WARMUP_STEP
tag length reaches maximum on N steps (or N*max_train_steps if N<1)
--alpha_mask use alpha channel as mask for training
--dataset_class DATASET_CLASS
dataset class for arbitrary dataset (package.module.Class)
--caption_dropout_rate CAPTION_DROPOUT_RATE
Rate out dropout caption(0.0~1.0)
--caption_dropout_every_n_epochs CAPTION_DROPOUT_EVERY_N_EPOCHS
Dropout all captions every N epochs
--caption_tag_dropout_rate CAPTION_TAG_DROPOUT_RATE
Rate out dropout comma separated tokens(0.0~1.0)
--reg_data_dir REG_DATA_DIR
directory for regularization images
--in_json IN_JSON json metadata for dataset
--dataset_repeats DATASET_REPEATS
repeat dataset when training with captions
--output_dir OUTPUT_DIR
directory to output trained model
--output_name OUTPUT_NAME
base name of trained model file
--huggingface_repo_id HUGGINGFACE_REPO_ID
huggingface repo name to upload
--huggingface_repo_type HUGGINGFACE_REPO_TYPE
huggingface repo type to upload
--huggingface_path_in_repo HUGGINGFACE_PATH_IN_REPO
huggingface model path to upload files
--huggingface_token HUGGINGFACE_TOKEN
huggingface token
--huggingface_repo_visibility HUGGINGFACE_REPO_VISIBILITY
huggingface repository visibility ('public' for public, 'private' or None
for private)
--save_state_to_huggingface
save state to huggingface
--resume_from_huggingface
resume from huggingface (ex: --resume
{repo_id}/{path_in_repo}:{revision}:{repo_type})
--async_upload upload to huggingface asynchronously
--save_precision {None,float,fp16,bf16}
precision in saving
--save_every_n_epochs SAVE_EVERY_N_EPOCHS
save checkpoint every N epochs
--save_every_n_steps SAVE_EVERY_N_STEPS
save checkpoint every N steps
--save_n_epoch_ratio SAVE_N_EPOCH_RATIO
save checkpoint N epoch ratio (for example 5 means save at least 5 files
total)
--save_last_n_epochs SAVE_LAST_N_EPOCHS
save last N checkpoints when saving every N epochs (remove older
checkpoints)
--save_last_n_epochs_state SAVE_LAST_N_EPOCHS_STATE
save last N checkpoints of state (overrides the value of
--save_last_n_epochs)
--save_last_n_steps SAVE_LAST_N_STEPS
save checkpoints until N steps elapsed (remove older checkpoints if N
steps elapsed)
--save_last_n_steps_state SAVE_LAST_N_STEPS_STATE
save states until N steps elapsed (remove older states if N steps elapsed,
overrides --save_last_n_steps)
--save_state save training state additionally (including optimizer states etc.) when
saving model
--save_state_on_train_end
save training state (including optimizer states etc.) on train end
--resume RESUME saved state to resume training
--train_batch_size TRAIN_BATCH_SIZE
batch size for training
--max_token_length {None,150,225}
max token length of text encoder (default for 75, 150 or 225)
--mem_eff_attn use memory efficient attention for CrossAttention
--torch_compile use torch.compile (requires PyTorch 2.0)
--dynamo_backend {eager,aot_eager,inductor,aot_ts_nvfuser,nvprims_nvfuser,cudagraphs,ofi,fx2trt,onnxrt}
dynamo backend type (default is inductor)
--xformers use xformers for CrossAttention
--sdpa use sdpa for CrossAttention (requires PyTorch 2.0)
--vae VAE path to checkpoint of vae to replace
--max_train_steps MAX_TRAIN_STEPS
training steps
--max_train_epochs MAX_TRAIN_EPOCHS
training epochs (overrides max_train_steps)
--max_data_loader_n_workers MAX_DATA_LOADER_N_WORKERS
max num workers for DataLoader (lower is less main RAM usage, faster epoch
start and slower data loading)
--persistent_data_loader_workers
persistent DataLoader workers (useful for reduce time gap between epoch,
but may use more memory)
--seed SEED random seed for training
--gradient_checkpointing
enable gradient checkpointing
--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS
Number of updates steps to accumulate before performing a backward/update
pass
--mixed_precision {no,fp16,bf16}
use mixed precision
--full_fp16 fp16 training including gradients
--full_bf16 bf16 training including gradients
--fp8_base use fp8 for base model
--ddp_timeout DDP_TIMEOUT
DDP timeout (min, None for default of accelerate)
--ddp_gradient_as_bucket_view
enable gradient_as_bucket_view for DDP
--ddp_static_graph enable static_graph for DDP
--clip_skip CLIP_SKIP
use output of nth layer from back of text encoder (n>=1)
--logging_dir LOGGING_DIR
enable logging and output TensorBoard log to this directory
--log_with {tensorboard,wandb,all}
what logging tool(s) to use (if 'all', TensorBoard and WandB are both
used)
--log_prefix LOG_PREFIX
add prefix for each log directory
--log_tracker_name LOG_TRACKER_NAME
name of tracker to use for logging, default is script-specific default
name
--wandb_run_name WANDB_RUN_NAME
The name of the specific wandb session
--log_tracker_config LOG_TRACKER_CONFIG
path to tracker config file to use for logging
--wandb_api_key WANDB_API_KEY
specify WandB API key to log in before starting training (optional).
--log_config log training configuration
--noise_offset NOISE_OFFSET
enable noise offset with this value (if enabled, around 0.1 is
recommended)
--noise_offset_random_strength
use random strength between 0~noise_offset for noise offset.
--multires_noise_iterations MULTIRES_NOISE_ITERATIONS
enable multires noise with this number of iterations (if enabled, around
6-10 is recommended)
--ip_noise_gamma IP_NOISE_GAMMA
enable input perturbation noise. used for regularization. recommended
value: around 0.1 (from arxiv.org/abs/2301.11706)
--ip_noise_gamma_random_strength
Use random strength between 0~ip_noise_gamma for input perturbation noise.
--multires_noise_discount MULTIRES_NOISE_DISCOUNT
set discount value for multires noise (has no effect without
--multires_noise_iterations)
--adaptive_noise_scale ADAPTIVE_NOISE_SCALE
add `latent mean absolute value * this value` to noise_offset (disabled if
None, default)
--zero_terminal_snr fix noise scheduler betas to enforce zero terminal SNR
--min_timestep MIN_TIMESTEP
set minimum time step for U-Net training (0~999, default is 0)
--max_timestep MAX_TIMESTEP
set maximum time step for U-Net training (1~1000, default is 1000)
--loss_type {l1,l2,huber,smooth_l1}
The type of loss function to use (L1, L2, Huber, or smooth L1), default is
L2
--huber_schedule {constant,exponential,snr}
The scheduling method for Huber loss (constant, exponential, or SNR-
based). Only used when loss_type is 'huber' or 'smooth_l1'. default is snr
--huber_c HUBER_C The huber loss parameter. Only used if one of the huber loss modes (huber
or smooth l1) is selected with loss_type. default is 0.1
--lowram enable low RAM optimization. e.g. load models to VRAM instead of RAM (for
machines which have bigger VRAM than RAM such as Colab and Kaggle)
--highvram disable low VRAM optimization. e.g. do not clear CUDA cache after each
latent caching (for machines which have bigger VRAM)
--sample_every_n_steps SAMPLE_EVERY_N_STEPS
generate sample images every N steps
--sample_at_first generate sample images before training
--sample_every_n_epochs SAMPLE_EVERY_N_EPOCHS
generate sample images every N epochs (overwrites n_steps)
--sample_prompts SAMPLE_PROMPTS
file for prompts to generate sample images
--sample_sampler {ddim,pndm,lms,euler,euler_a,heun,dpm_2,dpm_2_a,dpmsolver,dpmsolver++,dpmsingle,k_lms,k_euler,k_euler_a,k_dpm_2,k_dpm_2_a}
sampler (scheduler) type for sample images
--config_file CONFIG_FILE
using .toml instead of args to pass hyperparameter
--output_config output command line args to given .toml file
--metadata_title METADATA_TITLE
title for model metadata (default is output_name)
--metadata_author METADATA_AUTHOR
author name for model metadata
--metadata_description METADATA_DESCRIPTION
description for model metadata
--metadata_license METADATA_LICENSE
license for model metadata
--metadata_tags METADATA_TAGS
tags for model metadata, separated by comma
--prior_loss_weight PRIOR_LOSS_WEIGHT
loss weight for regularization images
--conditioning_data_dir CONDITIONING_DATA_DIR
conditioning data directory
--masked_loss apply mask for calculating loss. conditioning_data_dir is required for
dataset.
--deepspeed enable deepspeed training
--zero_stage {0,1,2,3}
Possible options are 0,1,2,3.
--offload_optimizer_device {None,cpu,nvme}
Possible options are none|cpu|nvme. Only applicable with ZeRO Stages 2 and
3.
--offload_optimizer_nvme_path OFFLOAD_OPTIMIZER_NVME_PATH
Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3.
--offload_param_device {None,cpu,nvme}
Possible options are none|cpu|nvme. Only applicable with ZeRO Stage 3.
--offload_param_nvme_path OFFLOAD_PARAM_NVME_PATH
Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3.
--zero3_init_flag Flag to indicate whether to enable `deepspeed.zero.Init` for constructing
massive models.Only applicable with ZeRO Stage-3.
--zero3_save_16bit_model
Flag to indicate whether to save 16-bit model. Only applicable with ZeRO
Stage-3.
--fp16_master_weights_and_gradients
fp16_master_and_gradients requires optimizer to support keeping fp16
master and gradients while keeping the optimizer states in fp32.
--optimizer_type OPTIMIZER_TYPE
Optimizer to use: AdamW (default), AdamW8bit, PagedAdamW, PagedAdamW8bit,
PagedAdamW32bit, Lion8bit, PagedLion8bit, Lion, SGDNesterov,
SGDNesterov8bit, DAdaptation(DAdaptAdamPreprint), DAdaptAdaGrad,
DAdaptAdam, DAdaptAdan, DAdaptAdanIP, DAdaptLion, DAdaptSGD, AdaFactor
--use_8bit_adam use 8bit AdamW optimizer (requires bitsandbytes)
--use_lion_optimizer use Lion optimizer (requires lion-pytorch)
--learning_rate LEARNING_RATE
learning rate
--max_grad_norm MAX_GRAD_NORM
Max gradient norm, 0 for no clipping
--optimizer_args [OPTIMIZER_ARGS ...]
additional arguments for optimizer (like "weight_decay=0.01
betas=0.9,0.999 ...")
--lr_scheduler_type LR_SCHEDULER_TYPE
custom scheduler module
--lr_scheduler_args [LR_SCHEDULER_ARGS ...]
additional arguments for scheduler (like "T_max=100")
--lr_scheduler LR_SCHEDULER
scheduler to use for learning rate: linear, cosine,
cosine_with_restarts, polynomial, constant (default),
constant_with_warmup, adafactor
--lr_warmup_steps LR_WARMUP_STEPS
Number of steps for the warmup in the lr scheduler (default is 0)
--lr_scheduler_num_cycles LR_SCHEDULER_NUM_CYCLES
Number of restarts for cosine scheduler with restarts
--lr_scheduler_power LR_SCHEDULER_POWER
Polynomial power for polynomial scheduler
--fused_backward_pass
Combines backward pass and optimizer step to reduce VRAM usage. Only
available in SDXL
--dataset_config DATASET_CONFIG
config file for detail settings
--min_snr_gamma MIN_SNR_GAMMA
gamma for reducing the weight of high loss timesteps. Lower numbers have
stronger effect. 5 is recommended by paper.
--scale_v_pred_loss_like_noise_pred
scale v-prediction loss like noise prediction loss
--v_pred_like_loss V_PRED_LIKE_LOSS
add v-prediction like loss multiplied by this value
--debiased_estimation_loss
debiased estimation loss
--weighted_captions Enable weighted captions in the standard style (token:1.3). No commas
inside parens, or shuffle/dropout may break the decoder.
--no_metadata do not save metadata in output model
--save_model_as {None,ckpt,pt,safetensors}
format to save the model (default is .safetensors)
--unet_lr UNET_LR learning rate for U-Net
--text_encoder_lr TEXT_ENCODER_LR
learning rate for Text Encoder
--network_weights NETWORK_WEIGHTS
pretrained weights for network
--network_module NETWORK_MODULE
network module to train
--network_dim NETWORK_DIM
network dimensions (depends on each network)
--network_alpha NETWORK_ALPHA
alpha for LoRA weight scaling, default 1 (same as network_dim for same
behavior as old version)
--network_dropout NETWORK_DROPOUT
Drops neurons out of training every step (0 or None is default behavior
(no dropout), 1 would drop all neurons)
--network_args [NETWORK_ARGS ...]
additional arguments for network (key=value)
--network_train_unet_only
only training U-Net part
--network_train_text_encoder_only
only training Text Encoder part
--training_comment TRAINING_COMMENT
arbitrary comment string stored in metadata
--dim_from_weights automatically determine dim (rank) from network_weights
--scale_weight_norms SCALE_WEIGHT_NORMS
Scale the weight of each key pair to help prevent overtraing via exploding
gradients. (1 is a good starting point)
--base_weights [BASE_WEIGHTS ...]
network weights to merge into the model before training
--base_weights_multiplier [BASE_WEIGHTS_MULTIPLIER ...]
multiplier for network weights to merge into the model before training
--no_half_vae do not use fp16/bf16 VAE in mixed precision (use float VAE)
--skip_until_initial_step
skip training until initial_step is reached
--initial_epoch INITIAL_EPOCH
initial epoch number, 1 means first epoch (same as not specifying). NOTE:
initial_epoch/step doesn't affect to lr scheduler. Which means lr
scheduler will start from 0 without `--resume`.
--initial_step INITIAL_STEP
initial step number including all epochs, 0 means first step (same as not
specifying). overwrites initial_epoch.
--cache_text_encoder_outputs
cache text encoder outputs
--cache_text_encoder_outputs_to_disk
cache text encoder outputs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment