Skip to content

Instantly share code, notes, and snippets.

View Nyx2022's full-sized avatar

Nyx2025 Nyx2022

View GitHub Profile
@Nyx2022
Nyx2022 / r1.py
Created August 10, 2025 01:58 — forked from vgel/r1.py
script to run deepseek-r1 with a min-thinking-tokens parameter, replacing </think> with a random continuation string to extend the model's chain of thought
import argparse
import random
import sys
from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache
import torch
parser = argparse.ArgumentParser()
parser.add_argument("question", type=str)
parser.add_argument(
import base64
import re
import xml.dom.minidom
import json
import uuid
import struct
import string
import random
import hashlib
import time