Skip to content

Instantly share code, notes, and snippets.

View thomwolf's full-sized avatar
🚂
training

Thomas Wolf thomwolf

🚂
training
View GitHub Profile
@thomwolf
thomwolf / gpt-2-main-forward.py
Created August 9, 2019 09:25
Main forward pass for GPT-2
def forward(self, input_ids):
position_ids = torch.arange(0, input_ids.size(-1), dtype=torch.long, device=input_ids.device)
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
hidden_states = self.wte(input_ids) + self.wpe(position_ids)
hidden_states = self.drop(hidden_states)
for block in self.h:
hidden_states = block(hidden_states)
hidden_states = self.ln_f(hidden_states)
@thomwolf
thomwolf / loading-weights-gpt-2.py
Last active February 8, 2023 19:01
Loading TensorFlow weights in a PyTorch model
import re
import numpy as np
import tensorflow as tf
model = MyPyTorchGPT2() # load the un-initialized PyTorch model we have created
# Retrieve weights from TF checkpoint
tf_path = os.path.abspath(gpt2_checkpoint_path)
init_vars = tf.train.list_variables(tf_path)
tf_vars = []
@thomwolf
thomwolf / comparing-hidden-states.py
Created August 9, 2019 11:18
Compare the hidden-states of the TensorFlow and PyTorch models
# Get the tensorflow and pytorch hidden-states as NumPy arrays
tensorflow_hidden_states = sess.run(feed_dict)
pytorch_hidden_states = pytorch_model(inputs)
pytorch_hidden_states = pytorch_hidden_states.cpu().detach().numpy()
# Compute the maximum absolute difference between hidden-states.
# Should be less than 1e-3. Typically around 1e-5/1e-6.
max_absolute_diff = np.amax(np.abs(tensorflow_hidden_states - pytorch_hidden_states))
@thomwolf
thomwolf / knowledge_distilation.py
Last active July 12, 2022 22:21
Knowledge Distilation
import torch
import torch.nn as nn
from torch.optim import Optimizer
KD_loss = nn.KLDivLoss(reduction='batchmean')
def kd_step(teacher: nn.Module, student: nn.Module, temperature: float,
inputs: torch.tensor, optimizer: Optimizer):
teacher.eval()
student.train()
@thomwolf
thomwolf / loading_wikipedia.py
Last active January 12, 2025 13:34
Load full English Wikipedia dataset in HuggingFace nlp library
import os; import psutil; import timeit
from datasets import load_dataset
mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
wiki = load_dataset("wikipedia", "20200501.en", split='train')
mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
print(f"RAM memory used: {(mem_after - mem_before)} MB")
s = """batch_size = 1000
for i in range(0, len(wiki), batch_size):
@thomwolf
thomwolf / fast_speech_text_speech.py
Last active January 14, 2025 12:13
speech to text to speech
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
git clone https://github.com/myshell-ai/OpenVoice
cd OpenVoice
git clone https://huggingface.co/myshell-ai/OpenVoice
cp -r OpenVoice/* .
pip install whisper pynput pyaudio
"""
from openai import OpenAI
import time
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
git clone https://github.com/myshell-ai/OpenVoice
cd OpenVoice
git clone https://huggingface.co/myshell-ai/OpenVoice
cp -r OpenVoice/* .
pip install whisper pynput pyaudio
"""
from dataclasses import dataclass
from typing import Optional
import random