This was a full fine-tune of llama-2-13b-hf using dataset https://huggingface.co/datasets/jondurbin/airoboros-gpt4-2.0
Convert the JSONL (newline delimeted JSON strings) into conversational format that FastChat expects:
import re
import json
import uuid
inputs = [json.loads(line) for line in open("instructions.jsonl").readlines()]
def split_response(instruction, response):
if '</s>' not in response:
return [
{
"from": "human",
"value": instruction,
},
{
"from": "gpt",
"value": response,
},
]
parts = response.split('</s>')
user = [instruction]
assistant = []
for idx in range(len(parts)):
part = parts[idx]
if idx == 0:
assistant.append(part)
continue
match = re.match(r'^\s*USER:(.*?)ASSISTANT:(.*)\s*$', part, re.DOTALL)
if not match:
return None
user.append(match.group(1).strip())
assistant.append(match.group(2).strip())
conv = []
for idx in range(len(user)):
conv.append({
"from": "human",
"value": user[idx],
})
conv.append({
"from": "gpt",
"value": assistant[idx]
})
return conv
conversations = []
for row in inputs:
conversation = split_response(row['instruction'], row['response'])
if not conversation:
print("Bad format, skipping...")
continue
conversations.append({
"id": str(uuid.uuid4()),
"conversations": conversation,
})
with open("as_conversations.json", "w") as outfile:
outfile.write(json.dumps(conversations, indent=2))
with FastChat (and flash attention 2.0.1)
Need to use the airoboros format instead of vicuna, as well as using trainer.save_model:
diff --git a/fastchat/train/train.py b/fastchat/train/train.py
index 0fa855e..948bac3 100644
--- a/fastchat/train/train.py
+++ b/fastchat/train/train.py
@@ -82,7 +82,7 @@ def preprocess(
sources,
tokenizer: transformers.PreTrainedTokenizer,
) -> Dict:
- conv = get_conversation_template("vicuna")
+ conv = get_conversation_template("airoboros_v1")
roles = {"human": conv.roles[0], "gpt": conv.roles[1]}
# Apply prompt templates
@@ -272,7 +272,8 @@ def train():
trainer.train()
model.config.use_cache = True
trainer.save_state()
- safe_save_model_for_hf_trainer(trainer=trainer, output_dir=training_args.output_dir)
+ trainer.save_model(output_dir=training_args.output_dir)
+ #safe_save_model_for_hf_trainer(trainer=trainer, output_dir=training_args.output_dir)
if __name__ == "__main__":
This was done with 4x 80GB A100 GPUs.
{
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"bf16": {
"enabled": true
},
"zero_optimization": {
"stage": 2,
"offload_optimizer": {
"device": "cpu"
},
"contiguous_gradients": true,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"allgather_bucket_size": 5e8
}
}
export WANDB_API_KEY=[redacted]
export BASE_MODEL=llama-2-13b-hf
export WANDB_PROJECT=airoboros-l2-13b-gpt4-m2.0
export PYTHONPATH=FastChat
deepspeed --num_nodes=1 --num_gpus=4 FastChat/fastchat/train/train_mem.py \
--model_name_or_path $BASE_MODEL \
--data_path as_conversations.json \
--output_dir $WANDB_PROJECT \
--num_train_epochs 3 \
--evaluation_strategy "no" \
--save_steps 100 \
--save_total_limit 1 \
--learning_rate 2e-5 \
--weight_decay 0. \
--warmup_ratio 0.04 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--bf16 \
--per_device_train_batch_size 8 \
--deepspeed deepspeed.json \
--model_max_length 4096 \
--gradient_checkpointing True \
--lazy_preprocess True