kykim0

Learn. Understand. Serve.

15 followers · 8 following

@google
Bay Area / Seoul

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

kykim0 / main.py

Created September 1, 2024 07:40

Custom reward model to ppo trainer

	"""PPO v2 trainer."""

	import logging
	import random

	from accelerate import PartialState
	from datasets import load_dataset
	import torch
	from transformers import (
	AutoModelForCausalLM,

kykim0 / main.py

Last active August 27, 2024 04:05

Custom reward function

	import shutil

	from accelerate import PartialState
	from datasets import load_dataset
	import torch
	from transformers import (
	AutoModelForCausalLM,
	AutoModelForSequenceClassification,
	AutoTokenizer,
	HfArgumentParser,

kykim0 / conversation.py

Last active April 4, 2025 12:31

Llama3 custom

	"""
	Conversation prompt templates.

	We kindly request that you import fastchat instead of copying this file if you wish to use it.
	If you have any changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
	"""

	import base64
	import dataclasses
	from enum import auto, IntEnum

kykim0 / configs.yaml

Last active May 14, 2024 09:25

AlpacaEval custom model config

	gemma-7b-sft:
	prompt_template: "gemma-7b-sft/prompt.txt"
	fn_completions: "huggingface_local_completions"
	completions_kwargs:
	model_name: "kykim0/gemma-7b-ultrachat-sft"
	model_kwargs:
	torch_dtype: 'bfloat16'
	max_new_tokens: 512
	temperature: 0.7
	top_p: 1.0

kykim0 / reward_trainer.py

Created March 27, 2024 04:05

CAPO

	class RewardTrainer(Trainer):
	r"""
	The RewardTrainer can be used to train your custom Reward Model. It is a subclass of the
	`transformers.Trainer` class and inherits all of its attributes and methods. It is recommended to use
	an `AutoModelForSequenceClassification` as the reward model. The reward model should be trained on a dataset
	of paired examples, where each example is a tuple of two sequences. The reward model should be trained to
	predict which example in the pair is more relevant to the task at hand.

	The reward trainer expects a very specific format for the dataset. The dataset should contain two 4 entries at least
	if you don't use the default `RewardDataCollatorWithPadding` data collator. The entries should be named

kykim0 / reward_modeling.py

Last active February 27, 2024 08:08

reward modeling

	# Reward modeling on preference data.

	from collections import defaultdict
	import logging
	import os
	from random import sample
	import sys

	from alignment import (
	DataArguments,

kykim0 / eval_reward.py

Created February 14, 2024 09:00

run_ppo.py

	"""Process inference output files."""

	from collections import defaultdict
	import csv
	import glob
	import json
	import os

	from fastchat.llm_judge.common import load_questions
	from fastchat.model import get_conversation_template

kykim0 / eval_reward.py

Last active February 6, 2024 06:56

eval_reward.py

	"""Process inference output files."""

	from collections import defaultdict
	import csv
	import glob
	import json
	import os

	from fastchat.llm_judge.common import load_questions
	from fastchat.model import get_conversation_template

kykim0 / fastchat_eval.py

Created January 31, 2024 02:34

PPOTrainer with periodic eval

	import json
	import os
	import random
	import time

	import shortuuid
	import torch
	from tqdm import tqdm

	from fastchat.llm_judge.common import load_questions, temperature_config