ncoop57 · September 8, 2024 16:18
diff --git a/tinyprograms-lora-8b.yml b/tinyprograms-lora-8b.yml
 base_model: NousResearch/Meta-Llama-3-8B
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer

 load_in_8bit: true
 load_in_4bit: false
 strict: false

 datasets:
  - path: answerdotai/tiny_programs_haiku3_critiques
    type: alpaca
 datasets:
  - path: repo
    type:
      system_prompt: ""
      field_instruction: requirements
      field_output: code
      format: "[INST] {instruction} [/INST]"
      no_input_format: "[INST] {instruction} [/INST]"
 dataset_prepared_path:
 val_set_size: 0.05
 output_dir: ./outputs/lora-out

 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
 pad_to_sequence_len: true

 adapter: lora
 lora_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
 lora_target_linear: true
 lora_fan_in_fan_out:
 lora_modules_to_save:
  - embed_tokens
  - lm_head

 wandb_project:
 wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:

 gradient_accumulation_steps: 4
 micro_batch_size: 2
 num_epochs: 4
 optimizer: adamw_bnb_8bit
 lr_scheduler: cosine
 learning_rate: 0.0002

 train_on_inputs: false
 group_by_length: false
 bf16: auto
 fp16:
 tf32: false

 gradient_checkpointing: true
 early_stopping_patience:
 resume_from_checkpoint:
 local_rank:
 logging_steps: 1
 xformers_attention:
 flash_attention: true
 s2_attention:

 warmup_steps: 10
 evals_per_epoch: 4
 eval_table_size:
 eval_max_new_tokens: 128
 saves_per_epoch: 1
 debug:
 deepspeed:
 weight_decay: 0.0
 fsdp:
 fsdp_config:
 special_tokens:
   pad_token: <|end_of_text|>
	base_model: NousResearch/Meta-Llama-3-8B
	model_type: LlamaForCausalLM
	tokenizer_type: AutoTokenizer

	load_in_8bit: true
	load_in_4bit: false
	strict: false

	datasets:
	- path: answerdotai/tiny_programs_haiku3_critiques
	type: alpaca
	datasets:
	- path: repo
	type:
	system_prompt: ""
	field_instruction: requirements
	field_output: code
	format: "[INST] {instruction} [/INST]"
	no_input_format: "[INST] {instruction} [/INST]"
	dataset_prepared_path:
	val_set_size: 0.05
	output_dir: ./outputs/lora-out

	sequence_len: 4096
	sample_packing: true
	eval_sample_packing: false
	pad_to_sequence_len: true

	adapter: lora
	lora_model_dir:
	lora_r: 32
	lora_alpha: 16
	lora_dropout: 0.05
	lora_target_linear: true
	lora_fan_in_fan_out:
	lora_modules_to_save:
	- embed_tokens
	- lm_head

	wandb_project:
	wandb_entity:
	wandb_watch:
	wandb_name:
	wandb_log_model:

	gradient_accumulation_steps: 4
	micro_batch_size: 2
	num_epochs: 4
	optimizer: adamw_bnb_8bit
	lr_scheduler: cosine
	learning_rate: 0.0002

	train_on_inputs: false
	group_by_length: false
	bf16: auto
	fp16:
	tf32: false

	gradient_checkpointing: true
	early_stopping_patience:
	resume_from_checkpoint:
	local_rank:
	logging_steps: 1
	xformers_attention:
	flash_attention: true
	s2_attention:

	warmup_steps: 10
	evals_per_epoch: 4
	eval_table_size:
	eval_max_new_tokens: 128
	saves_per_epoch: 1
	debug:
	deepspeed:
	weight_decay: 0.0
	fsdp:
	fsdp_config:
	special_tokens:
	pad_token: <\|end_of_text\|>
No results found