Skip to content

Instantly share code, notes, and snippets.

View kykim0's full-sized avatar

kykim0

View GitHub Profile
@kykim0
kykim0 / main.py
Created September 1, 2024 07:40
Custom reward model to ppo trainer
"""PPO v2 trainer."""
import logging
import random
from accelerate import PartialState
from datasets import load_dataset
import torch
from transformers import (
AutoModelForCausalLM,
@kykim0
kykim0 / main.py
Last active August 27, 2024 04:05
Custom reward function
import shutil
from accelerate import PartialState
from datasets import load_dataset
import torch
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
AutoTokenizer,
HfArgumentParser,
@kykim0
kykim0 / conversation.py
Last active April 4, 2025 12:31
Llama3 custom
"""
Conversation prompt templates.
We kindly request that you import fastchat instead of copying this file if you wish to use it.
If you have any changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
"""
import base64
import dataclasses
from enum import auto, IntEnum
@kykim0
kykim0 / configs.yaml
Last active May 14, 2024 09:25
AlpacaEval custom model config
gemma-7b-sft:
prompt_template: "gemma-7b-sft/prompt.txt"
fn_completions: "huggingface_local_completions"
completions_kwargs:
model_name: "kykim0/gemma-7b-ultrachat-sft"
model_kwargs:
torch_dtype: 'bfloat16'
max_new_tokens: 512
temperature: 0.7
top_p: 1.0
class RewardTrainer(Trainer):
r"""
The RewardTrainer can be used to train your custom Reward Model. It is a subclass of the
`transformers.Trainer` class and inherits all of its attributes and methods. It is recommended to use
an `AutoModelForSequenceClassification` as the reward model. The reward model should be trained on a dataset
of paired examples, where each example is a tuple of two sequences. The reward model should be trained to
predict which example in the pair is more relevant to the task at hand.
The reward trainer expects a very specific format for the dataset. The dataset should contain two 4 entries at least
if you don't use the default `RewardDataCollatorWithPadding` data collator. The entries should be named
@kykim0
kykim0 / reward_modeling.py
Last active February 27, 2024 08:08
reward modeling
# Reward modeling on preference data.
from collections import defaultdict
import logging
import os
from random import sample
import sys
from alignment import (
DataArguments,
@kykim0
kykim0 / eval_reward.py
Created February 14, 2024 09:00
run_ppo.py
"""Process inference output files."""
from collections import defaultdict
import csv
import glob
import json
import os
from fastchat.llm_judge.common import load_questions
from fastchat.model import get_conversation_template
@kykim0
kykim0 / eval_reward.py
Last active February 6, 2024 06:56
eval_reward.py
"""Process inference output files."""
from collections import defaultdict
import csv
import glob
import json
import os
from fastchat.llm_judge.common import load_questions
from fastchat.model import get_conversation_template
@kykim0
kykim0 / fastchat_eval.py
Created January 31, 2024 02:34
PPOTrainer with periodic eval
import json
import os
import random
import time
import shortuuid
import torch
from tqdm import tqdm
from fastchat.llm_judge.common import load_questions, temperature_config