title | tags | |||
---|---|---|---|---|
The Best Cloud GPU Providers for Artificial Intelligence & Machine Learning |
|
import torch | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from matplotlib import colormaps | |
from matplotlib.animation import FuncAnimation | |
from mpl_toolkits.mplot3d import Axes3D | |
from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
from sklearn.decomposition import PCA | |
# === CONFIG === |
# the "verifiers" repository is a clean implementation of templated GRPO reinforcement learning training environments | |
# this is a generic set of "install from scratch" commands complete with a deepspeed z3 config that i have been using when i spin up nodes | |
# it will run on the gsm8k example w/ default batch size & generation size (8), and the 8th GPU is used for vllm generations | |
# qwen 14b full finetuning will run on this configuration too without LoRA or CUDA OOM, at least for the gsm8k task's context sizes + generation lengths | |
# hyperparameters are controlled by `verifiers/utils/config_utils.py`; i have been preferring extreme grad clipping (between 0.001 and 0.01) and low beta (under 0.01) | |
# NOTE FEB 27: examples have moved into `verifiers/examples` not `/examples` | |
cd /root | |
mkdir boom |
# train_grpo.py | |
from typing import * | |
import re | |
import torch | |
from datasets import load_dataset, Dataset, load_from_disk | |
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments | |
from peft import LoraConfig | |
from trl import GRPOConfig, GRPOTrainer, TrlParser | |
from dataclasses import dataclass, field |
import tqdm | |
import numpy as np | |
import torch | |
import torch.distributed as dist | |
import transformers | |
def extract_xml_answer(text: str) -> str: | |
answer = text.split("<final_answer>")[-1] | |
answer = answer.split("</final_answer>")[0] | |
return answer.strip() |
# train_grpo.py | |
# | |
# See https://github.com/willccbb/verifiers for ongoing developments | |
# | |
""" | |
citation: | |
@misc{brown2025grpodemo, | |
title={Granular Format Rewards for Eliciting Mathematical Reasoning Capabilities in Small Language Models}, | |
author={Brown, William}, |
# Works on OS X, with conda installed. | |
# Create conda environment for PyTorch Geometric | |
echo "Creating pyg environment" | |
conda create -n pyg python=3.6 | |
echo "Activate pyg Env" | |
source activate pyg | |
# PyTorch Conda Installation |
#MongoDB 3.2.x Replica Sets on AWS EC2 A MongoDB replica set provides a mechanism to allow for a reliable database services. The basic replica set consists of three servers, a primary, a secondary and an arbitrator. The primary and secondary both hold a copy of the data. The arbitrator is normally a low spec server which just monitors the other servers and help with the failover process. In production, there can be more than three servers.
To setup mongo as a replica set on Amazon Web Services EC2 you need to first setup a security group with ssh on port 22 and mongodb on port 27017. You then need to create three servers. Select Ubuntu 14.04 LTS x64 and a micro (or bigger depending on your database size, ideally you should have enough memory to match your database size) instance for the primary and secondary and a nano instance for the arbitrator.
##Adjust the File System on each Server The operating system by default will update the last access time on a file. In a high data throughput database application