Created
May 28, 2023 18:42
-
-
Save fearnworks/e7817bfdca1aef3518db22c3e78ba887 to your computer and use it in GitHub Desktop.
This config creates a Error invalid device ordinal at line 359 in file /mnt/d/training_area/bitsandbytes/csrc/pythonInterface.c error on a A6000 single gpu run with axolotl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Changing this would change the base model used for training | |
base_model: huggyllama/llama-7b | |
# Changing this would change the configuration used for the base model | |
base_model_config: huggyllama/llama-7b | |
# Changing this would change the type of model used for training | |
model_type: LlamaForCausalLM | |
# Changing this would change the type of tokenizer used for tokenizing text data | |
tokenizer_type: LlamaTokenizer | |
# Changing this to false would prevent the model from being loaded in 8-bit precision | |
load_in_8bit: false | |
# Changing this to true would load the model in 4-bit precision | |
load_in_4bit: true | |
# Changing this to true would enforce strict loading of the base model's configuration | |
strict: false | |
# Changing this to true would push the dataset to Hugging Face's Hub during training | |
push_dataset_to_hub: | |
# Changing this would change the datasets used for training | |
datasets: | |
- path: teknium/GPT4-LLM-Cleaned # Changing this would change the path to the dataset on Hugging Face's Hub | |
type: alpaca # Changing this would change the type of dataset used for training | |
# Changing this would change the path where the prepared dataset is stored | |
dataset_prepared_path: last_run_prepared | |
# Changing this would change the size of the validation set as a fraction of the total dataset size | |
val_set_size: 0.02 | |
# Changing this would change the type of adapter used during training | |
adapter: qlora | |
# Changing this would change the directory where the LoRA model is stored | |
lora_model_dir: | |
# Changing this would change the maximum sequence length for training examples | |
sequence_len: 256 | |
# Changing this would change the maximum packed sequence length for training examples (if using packed sequences) | |
max_packed_sequence_len: | |
# Changing this would change the rank of LoRA's low-rank approximation matrices, which affects their expressiveness and memory usage during training | |
lora_r: 64 | |
# Changing this would change LoRA's alpha hyperparameter, which controls how much weight is given to new information during training and affects how quickly LoRA adapters adapt to new data | |
lora_alpha: 32 | |
#Changing this value will modify LoRA's dropout rate during training | |
lora_dropout: 0.0 | |
#Changing these values will modify which modules in the base model are targeted with LoRA adapters during training (if left blank, all modules are targeted) | |
lora_target_modules: | |
#Changing this value to false will target all layers with LoRA adapters during training (if true, only linear layers are targeted) | |
lora_target_linear: true | |
#Changing this value to false will not use fan-in/fan-out initialization for LoRA adapters during training (if true, fan-in/fan-out initialization is used) | |
lora_fan_in_fan_out: | |
#Changing this value will modify name of Weights & Biases project to log training information to (if left blank, no logging is performed) | |
wandb_project: | |
#Changing this value to false will not log gradients and parameters to Weights & Biases during training (if true, gradients and parameters are logged) | |
wandb_watch: | |
#Changing this value will modify Weights & Biases run ID to resume logging from (if left blank, a new run is created) | |
wandb_run_id: | |
#Changing this value to false will not log trained model to Weights & Biases after training (if true, trained model is logged) | |
wandb_log_model: | |
#Changing this value will modify directory where trained models and other output files are saved after training | |
output_dir: ./qlora-out | |
batch_size: 2 #Changing batch size will modify how many examples are processed at once during training | |
micro_batch_size: 1 #Changing micro-batch size will modify how many examples are processed at once within each batch during training | |
num_epochs: 3 #Changing number of epochs will modify how many passes through entire dataset occur during training | |
optimizer: paged_adamw_32bit #Changing optimizer will modify which optimizer is used during training | |
torchdistx_path: | |
lr_scheduler: cosine #Changing learning rate scheduler will modify which learning rate scheduler is used during training | |
learning_rate: 0.0002 #Changing learning rate will modify initial learning rate for optimizer | |
train_on_inputs: false | |
group_by_length: false | |
bf16: false | |
fp16: true #Changing fp16 to false will not use fp16 precision during training | |
tf32: false | |
gradient_checkpointing: true #Changing gradient_checkpointing to false will not use gradient checkpointing during training | |
early_stopping_patience: | |
resume_from_checkpoint: | |
local_rank: 0 | |
logging_steps: 1 #Changing logging_steps will modify how often logging should occur during training | |
xformers_attention: | |
flash_attention: | |
gptq_groupsize: | |
gptq_model_v1: | |
warmup_steps: 10 #Changing warmup_steps will modify number of warmup steps before learning rate reaches initial value | |
eval_steps: 50 #Changing eval_steps will modify how often evaluation should occur during training | |
save_steps: | |
debug: | |
device_map: auto | |
deepspeed: | |
weight_decay: 0.0 #Changing weight_decay will modify weight decay hyperparameter for optimizer | |
fsdp: | |
fsdp_config: | |
special_tokens: #Specifies special tokens used by tokenizer | |
bos_token: "<s>" #Beginning of sentence token | |
eos_token: "</s>" #End of sentence token | |
unk_token: "<unk>" #Unknown token | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment