Thomas Wolf thomwolf

🚂

training

Co-founder @ Hugging Face

thomwolf / gpt-2-main-forward.py

Created August 9, 2019 09:25

Main forward pass for GPT-2

	def forward(self, input_ids):
	position_ids = torch.arange(0, input_ids.size(-1), dtype=torch.long, device=input_ids.device)
	position_ids = position_ids.unsqueeze(0).expand_as(input_ids)

	hidden_states = self.wte(input_ids) + self.wpe(position_ids)
	hidden_states = self.drop(hidden_states)

	for block in self.h:
	hidden_states = block(hidden_states)
	hidden_states = self.ln_f(hidden_states)

thomwolf / loading-weights-gpt-2.py

Last active February 8, 2023 19:01

Loading TensorFlow weights in a PyTorch model

	import re
	import numpy as np
	import tensorflow as tf

	model = MyPyTorchGPT2() # load the un-initialized PyTorch model we have created

	# Retrieve weights from TF checkpoint
	tf_path = os.path.abspath(gpt2_checkpoint_path)
	init_vars = tf.train.list_variables(tf_path)
	tf_vars = []

thomwolf / comparing-hidden-states.py

Created August 9, 2019 11:18

Compare the hidden-states of the TensorFlow and PyTorch models

	# Get the tensorflow and pytorch hidden-states as NumPy arrays
	tensorflow_hidden_states = sess.run(feed_dict)
	pytorch_hidden_states = pytorch_model(inputs)
	pytorch_hidden_states = pytorch_hidden_states.cpu().detach().numpy()

	# Compute the maximum absolute difference between hidden-states.
	# Should be less than 1e-3. Typically around 1e-5/1e-6.
	max_absolute_diff = np.amax(np.abs(tensorflow_hidden_states - pytorch_hidden_states))

thomwolf / knowledge_distilation.py

Last active July 12, 2022 22:21

Knowledge Distilation

	import torch
	import torch.nn as nn
	from torch.optim import Optimizer

	KD_loss = nn.KLDivLoss(reduction='batchmean')

	def kd_step(teacher: nn.Module, student: nn.Module, temperature: float,
	inputs: torch.tensor, optimizer: Optimizer):
	teacher.eval()
	student.train()

thomwolf / loading_wikipedia.py

Last active January 12, 2025 13:34

Load full English Wikipedia dataset in HuggingFace nlp library

	import os; import psutil; import timeit
	from datasets import load_dataset

	mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
	wiki = load_dataset("wikipedia", "20200501.en", split='train')
	mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
	print(f"RAM memory used: {(mem_after - mem_before)} MB")

	s = """batch_size = 1000
	for i in range(0, len(wiki), batch_size):

thomwolf / fast_speech_text_speech.py

Last active January 14, 2025 12:13

speech to text to speech

	""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
	git clone https://github.com/myshell-ai/OpenVoice
	cd OpenVoice
	git clone https://huggingface.co/myshell-ai/OpenVoice
	cp -r OpenVoice/* .
	pip install whisper pynput pyaudio
	"""

	from openai import OpenAI
	import time

thomwolf / gist:6aa3a2689f66ec2c8d28b281bdd01fe2

Last active February 15, 2024 00:04

tcot

	""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
	git clone https://github.com/myshell-ai/OpenVoice
	cd OpenVoice
	git clone https://huggingface.co/myshell-ai/OpenVoice
	cp -r OpenVoice/* .
	pip install whisper pynput pyaudio
	"""
	from dataclasses import dataclass
	from typing import Optional
	import random