Skip to content

Instantly share code, notes, and snippets.

View LiuHao-THU's full-sized avatar
🌴
On vacation

LiuHao-THU

🌴
On vacation
  • Tsinghua University
  • Shanghai China
View GitHub Profile
@infoslack
infoslack / grpo_demo.py
Created January 27, 2025 17:59
Group Relative Policy Optimization (GRPO) implementation
# This implementation is based on the paper: https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf
#
# pip install torch transformers
# python grpo_demo.py
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel
@ozancaglayan
ozancaglayan / weightedsampler.py
Created October 31, 2017 17:37
WeightedBatchSampler for PyTorch
class WeightedBatchSampler(Sampler):
def __init__(self, n_elems, batch_size,
initial_p=None, epoch_p_reset=False):
self.n_elems = n_elems
self.batch_size = batch_size
self.epoch_p_reset = epoch_p_reset
self.n_batches = math.ceil(self.n_elems / self.batch_size)
if initial_p is None: