adamlin120 · August 3, 2024 02:33
diff --git a/dpo.yaml b/dpo.yaml
 base_model: yentinglin/Llama-3-Taiwan-8B-Instruct

 load_in_8bit: false
 load_in_4bit: false
 strict: false

 hub_model_id: yentinglin/Llama-3-Taiwan-8B-Instruct-DPO
 hub_strategy: end
 wandb_name: 8b dpo

 dataset_processes: 1
 rl: dpo
 datasets:
  - path: aigrant/tw_chatbot_arena
    name: argilla
    split: train
    type: llama3.argilla_chat

 chat_template: llama3

 dataset_prepared_path: last_run_prepared
 val_set_size: 0
 output_dir: ./output/tw-llm/8b/dpo/

 sequence_len: 8000
 pad_to_sequence_len: true

 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
 optimizer: adamw_8bit
 lr_scheduler: cosine
 learning_rate: 5e-5

 train_on_inputs: false
 group_by_length: false
 bf16: auto
 fp16:
 tf32: false

 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
  use_reentrant: false
 early_stopping_patience:
 resume_from_checkpoint:
 logging_steps: 1
 xformers_attention:
 flash_attention: true

 warmup_steps: 100
 evals_per_epoch: 0
 eval_table_size:
 saves_per_epoch: 1
 save_total_limit: 100
 save_steps:
 debug:
 deepspeed: deepspeed_configs/zero3_bf16.json
 weight_decay: 0.05
 fsdp:
 fsdp_config:
	base_model: yentinglin/Llama-3-Taiwan-8B-Instruct

	load_in_8bit: false
	load_in_4bit: false
	strict: false

	hub_model_id: yentinglin/Llama-3-Taiwan-8B-Instruct-DPO
	hub_strategy: end
	wandb_name: 8b dpo

	dataset_processes: 1
	rl: dpo
	datasets:
	- path: aigrant/tw_chatbot_arena
	name: argilla
	split: train
	type: llama3.argilla_chat

	chat_template: llama3

	dataset_prepared_path: last_run_prepared
	val_set_size: 0
	output_dir: ./output/tw-llm/8b/dpo/

	sequence_len: 8000
	pad_to_sequence_len: true

	gradient_accumulation_steps: 1
	micro_batch_size: 1
	num_epochs: 4
	optimizer: adamw_8bit
	lr_scheduler: cosine
	learning_rate: 5e-5

	train_on_inputs: false
	group_by_length: false
	bf16: auto
	fp16:
	tf32: false

	gradient_checkpointing: true
	gradient_checkpointing_kwargs:
	use_reentrant: false
	early_stopping_patience:
	resume_from_checkpoint:
	logging_steps: 1
	xformers_attention:
	flash_attention: true

	warmup_steps: 100
	evals_per_epoch: 0
	eval_table_size:
	saves_per_epoch: 1
	save_total_limit: 100
	save_steps:
	debug:
	deepspeed: deepspeed_configs/zero3_bf16.json
	weight_decay: 0.05
	fsdp:
	fsdp_config:
No results found