Skip to content

Instantly share code, notes, and snippets.

@NickyDark1
NickyDark1 / grpo_demo.py
Created February 7, 2025 14:24 — forked from cgpeter96/grpo_demo.py
a grpo modifaction for deepspeed in multigpu from https://gist.github.com/willccbb/4676755236bb08cab5f4e54a0475d6fb
# train_grpo.py
from typing import *
import re
import torch
from datasets import load_dataset, Dataset, load_from_disk
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer, TrlParser
from dataclasses import dataclass, field
@NickyDark1
NickyDark1 / grpo_demo.py
Created January 30, 2025 23:29 — forked from infoslack/grpo_demo.py
Group Relative Policy Optimization (GRPO) implementation
# This implementation is based on the paper: https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf
#
# pip install torch transformers
# python grpo_demo.py
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel
@NickyDark1
NickyDark1 / GPT4all-langchain-demo.ipynb
Created January 28, 2025 21:01 — forked from abodacs/GPT4all-langchain-demo.ipynb
Example of running GPT4all local LLM via langchain in a Jupyter notebook (Python)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#pip install git+https://github.com/huggingface/transformers.git
import datetime
import sys
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_microphone_live
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0)
sampling_rate = pipe.feature_extractor.sampling_rate
@NickyDark1
NickyDark1 / whisper-static-cache.ipynb
Created January 28, 2025 20:59 — forked from abodacs/whisper-static-cache.ipynb
example of whisper static cache
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@NickyDark1
NickyDark1 / grpo_demo.py
Created January 26, 2025 21:25 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
import re
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
# Load and prep dataset
SYSTEM_PROMPT = """
ContractAddress TokenName TokenSymbol holder count transfer count # of times appears in list notes
0x420b595d8b648971b3bfcf46e66544c384860536 VenmoCash VMO 1 6 2
0xdeeb40536e94be7226b77fb89d7d3cd65a82fb85 Zoom Protocol ZOM 1 9 2
0xe670848d54788997942ecf938cd23b09550bae73 TARO TARO 1 4 2
0xf28fec34928a1dc19b650104ae082665b66f720e ETH/BTC Long-Only Alpha XTF.SWCEBL 1 4 2
0x030385efc63ebda6021d9098b1fcc422547d83d3 Tacos @ Taconomics.io $TACO 2 5 2
0x03bb9bbf0423e44370e88ec5fc31eecf4e2b4ac2 STVKE.Network STV 2 9 2
0x05e850909664a3cf926ca4777c3ec1577d36ec18 OnFlow Flow 2 8 2
0x06ca771a689d6d5f5e435be2ef1d1ffc6bdb3b4c Wing Token WING 2 8 2
0x08a958bdc9e0beb0c3ee2ec6e9c0013f14ce66e5 Harold Returns KEKW 2 6 2
@NickyDark1
NickyDark1 / sft_trainer.py
Created October 19, 2023 00:19 — forked from lewtun/sft_trainer.py
Fine-tuning Mistral 7B with TRL & DeepSpeed ZeRO-3
# This is a modified version of TRL's `SFTTrainer` example (https://github.com/huggingface/trl/blob/main/examples/scripts/sft_trainer.py),
# adapted to run with DeepSpeed ZeRO-3 and Mistral-7B-V1.0. The settings below were run on 1 node of 8 x A100 (80GB) GPUs.
#
# Usage:
# - Install the latest transformers & accelerate versions: `pip install -U transformers accelerate`
# - Install deepspeed: `pip install deepspeed==0.9.5`
# - Install TRL from main: pip install git+https://github.com/huggingface/trl.git
# - Clone the repo: git clone github.com/huggingface/trl.git
# - Copy this Gist into trl/examples/scripts
# - Run from root of trl repo with: accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml --gradient_accumulation_steps 8 examples/scripts/sft_trainer.py
@NickyDark1
NickyDark1 / finetune_sft_trl.py
Created August 30, 2023 06:07 — forked from younesbelkada/finetune_sft_trl.py
Benchmarking SFT trainer with 8bit models
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@NickyDark1
NickyDark1 / finetune_mpt30b_guanaco.py
Created August 30, 2023 06:04 — forked from younesbelkada/finetune_mpt30b_guanaco.py
Fine tune MPT-30B on Guanaco dataset and turn it into a chatbot - read the docstrings to install the correct versions of the required libraries.
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software