Skip to content

Instantly share code, notes, and snippets.

View vwxyzjn's full-sized avatar
😃

Costa Huang vwxyzjn

😃
View GitHub Profile
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import torch.nn.functional as F
model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M")
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M")
tokenizer.add_special_tokens({"pad_token": "<PAD>"})
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device = torch.device("cpu")
model.to(device)
import argparse
import numpy as np
p = 100 # padding token id
o = 1 # observation (prompt / input ids)
a = 2 # action (response ids)
queries = [
[p, p, o, o, o],
@vwxyzjn
vwxyzjn / kl1.py
Last active January 31, 2025 17:31
import torch
import torch.nn as nn
import torch.optim as optim
# Create target distribution (fixed)
target_logits = torch.randn(10)
target_log_probs = torch.log_softmax(target_logits, dim=0)
# Create learnable distribution
learnable_logits = nn.Parameter(torch.rand_like(target_logits)) # Initialize randomly
{
"name": "material-ui-nextjs-ts",
"version": "5.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "material-ui-nextjs-ts",
"version": "5.0.0",
"dependencies": {
# Taken and modified from https://github.com/huggingface/trl
# Copyright 2024 The AllenAI Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
import json
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
import gradio as gr
import numpy as np
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi, snapshot_download
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.40.1/src/transformers/models/olmo/modeling_olmo.py
# Copyright 2024 The vLLM team.
# Copyright 2024 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
python scripts/submit_finetune_job.py \
--cluster ai2/augusta-google-1 \
--priority high \
--workspace ai2/tulu-3-dev \
--num_nodes 4 \
--image costah/open_instruct_ppo_ray_ninja \
--default_beaker_config configs/beaker_configs/default_finetune_multinode.yaml \
--config configs/train_configs/sft/tulu3_8b_preview_mix_v3.9.yaml \
--exp_name olmo1124_finetune
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.40.1/src/transformers/models/olmo/modeling_olmo.py
# Copyright 2024 The vLLM team.
# Copyright 2024 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.40.1/src/transformers/models/olmo/modeling_olmo.py
# Copyright 2024 The vLLM team.
# Copyright 2024 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.