😃

Costa Huang vwxyzjn

😃

vwxyzjn / cross_entropy_loss_and_logprob.py

Last active July 7, 2025 15:43

	import torch

	# set seed
	torch.manual_seed(42)

	B, T, D = 1, 4, 10 # batch size, sequence length, vocab size
	tensor = torch.rand(B, T, D, requires_grad=True)
	labels = torch.Tensor([[1, 2, 3, 4]]).long()

	print("="*100)

vwxyzjn / convert.py

Created June 23, 2025 16:06

	"""
	# Convert HF to TorchTitan DCP
	python convert.py hf_to_dcp --input-path meta-llama/Meta-Llama-3.1-8B --output-path ./torchtitan_model

	# Convert TorchTitan DCP to HF (works with any checkpoint structure)
	python convert.py dcp_to_hf --input-path ./torchtitan_model --output-path ./hf_model

	# Model structure
	If you run the following code, you will get the model structure.

vwxyzjn / req.txt

Created June 16, 2025 15:21

vwxyzjn / gradio_vllm.py

Created May 1, 2025 14:15

	# SPDX-License-Identifier: Apache-2.0
	"""Example for starting a Gradio OpenAI Chatbot Webserver
	Start vLLM API server:
	vllm serve allenai/OLMo-2-0425-1B-Instruct

	Start Gradio OpenAI Chatbot Webserver:
	python x1.py -m allenai/OLMo-2-0425-1B-Instruct --model-url http://ceres-cs-aus-441:8000/v1

	Note that `pip install --upgrade gradio` is needed to run this example.
	More details: https://github.com/gradio-app/gradio

vwxyzjn / olmo1b_grpo_sweep.sh

Created May 1, 2025 13:58


	for seed in 1 2; do
	for lr in 5e-7 7e-7 9e-7; do
	python update_command_args.py scripts/train/olmo2/grpo_7b.sh \
	--priority urgent \
	--workspace ai2/olmo-instruct \
	--exp_name 0423_grpo_seed_${seed}_lr_${lr} \
	--model_name_or_path allenai/OLMo-2-0425-1B-DPO \
	--model_revision main \
	--tokenizer_name_or_path allenai/OLMo-2-1124-7B-DPO \

vwxyzjn / releasemd.py

Created April 29, 2025 20:44

	TEMPLATE = """
	---
	license: apache-2.0
	language:
	- en
	datasets:
	- {{dataset}}
	base_model:
	- {{base_model}}
	pipeline_tag: text-generation

vwxyzjn / temp_sampling.py

Created April 7, 2025 21:31

	import numpy as np


	def stable_softmax(x, axis=None):
	"""taken from scipy.special.softmax"""
	x_max = np.amax(x, axis=axis, keepdims=True)
	exp_x_shifted = np.exp(x - x_max)
	return exp_x_shifted / np.sum(exp_x_shifted, axis=axis, keepdims=True)

	def get_prob(arr: np.ndarray, temp: float) -> np.ndarray:

vwxyzjn / drgrpo.py

Created April 1, 2025 12:43

	# Copyright 2024 AllenAI. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,

vwxyzjn / benchmark.py

Created March 20, 2025 12:09

	from collections import deque
	import queue
	import time
	import numpy as np
	import ray
	from vllm import SamplingParams, LLM
	import wandb
	from open_instruct.dataset_transformation import TokenizerConfig, get_cached_dataset_rlvr
	from open_instruct.vllm_utils3 import create_vllm_engines
	from transformers import HfArgumentParser

vwxyzjn / sft_vs_reinforce.py

Last active February 21, 2025 04:35

	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import torch.nn.functional as F

	model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M")
	tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M")
	tokenizer.add_special_tokens({"pad_token": "<PAD>"})
	device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
	device = torch.device("cpu")
	model.to(device)