Skip to content

Instantly share code, notes, and snippets.

View eustlb's full-sized avatar

eustlb

  • Hugging Face
  • Paris, France
View GitHub Profile
@eustlb
eustlb / benchmark_tdt_loss_kernel.py
Created April 16, 2026 13:49
reproduce benchmarks
"""
Benchmark TDT loss: PyTorch vs CUDA kernel vs NeMo (Numba).
Sweeps over batch sizes and sequence lengths, measuring speed and peak memory.
Usage:
/raid/eustache/venvs/pr-44171/bin/python benchmark_tdt_loss.py
/home/eustache_lebihan/pr-44171/NeMo/.venv/bin/python benchmark_tdt_loss.py --nemo-worker
"""
@eustlb
eustlb / tdt_expected_loss_value.py
Last active April 16, 2026 13:32
reproduce expected value for tdt loss using NeMo
"""
Generate the expected TDT loss reference value using NeMo's GPU kernel.
Runs nvidia/parakeet-tdt-0.6b-v3 in eval mode on 2 LibriSpeech samples
with sigma=0 and computes the HF-style mean reduction (per-sample loss
divided by target length, then averaged across the batch).
NeMo commit: 16f469b122 (v2.8.0rc0)
https://github.com/NVIDIA/NeMo/tree/16f469b122
@eustlb
eustlb / cohere_asr.py
Last active March 26, 2026 19:02
convert cohere asr tokenizer
"""
Convert a CohereAsr SentencePiece tokenizer (.model) to a HuggingFace fast tokenizer.
Downloads the tokenizer files from CohereLabs/cohere-transcribe-03-2026 on HuggingFace Hub.
"""
import json
from pathlib import Path
from huggingface_hub import snapshot_download
@eustlb
eustlb / reproduce_outputs_test_integration_longform.py
Created February 20, 2026 15:18
Reproduce expected outputs for test_integration_longform in transformers/tests/models/mimi/test_modeling_mimi.py
# Reproduce expected outputs for test_integration_longform in
# transformers/tests/models/mimi/test_modeling_mimi.py
#
# This uses the original moshi codebase (https://github.com/kyutai-labs/moshi)
# to generate reference values.
#
# Installation:
# git clone https://github.com/kyutai-labs/moshi.git
# uv pip install -e moshi/moshi/
# uv pip install librosa
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Reproduce expected outputs for each VoxtralRealtime HF integration test.
Uses vLLM offline inference (as in run_eval.py) to generate reference
transcriptions for every @slow integration test in
test_modeling_voxtral_realtime.py, then saves them to a JSON file.
"""
import json
import json
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import jiwer
import torch
from datasets import Audio, load_dataset
from transformers import (
VoxtralRealtimeForConditionalGeneration,
from transformers import LasrTokenizer
from transformers.tokenization_utils_sentencepiece import SentencePieceExtractor
from huggingface_hub import hf_hub_download
import sentencepiece
from datasets import load_dataset
from tqdm import tqdm
path = hf_hub_download(repo_id='wuketest/lasr_test', filename='spiece.model')
# vocab_ids, vocab_scores, merges = SentencePieceExtractor(path).extract()
@eustlb
eustlb / convert_proc.py
Last active December 5, 2025 19:47
Lasr Tokenizer trfms vs sentensepiece
from transformers import LasrTokenizer, LasrFeatureExtractor, LasrProcessor
from transformers.tokenization_utils_sentencepiece import SentencePieceExtractor
from huggingface_hub import hf_hub_download
import sentencepiece
from datasets import load_dataset
from tqdm import tqdm
import unicodedata
import re
path = hf_hub_download(repo_id='wuketest/lasr_test', filename='spiece.model')
from transformers import AutoProcessor, HiggsAudioForConditionalGeneration
model_id = "eustlb/higgs-v2"
processor = AutoProcessor.from_pretrained(model_id, device_map="cuda")
processor.tokenizer.pad_token = processor.tokenizer.eos_token
model = HiggsAudioForConditionalGeneration.from_pretrained(model_id, device_map="cuda")
# single speaker smart voice
conversation = [
{
from transformers import AutoProcessor
from transformers.models.mllama.image_processing_mllama import convert_aspect_ratios_to_ids
# Load the chat template from file
with open("/Users/eustachelebihan/dev/add-higgs-v2/tmp/chat_template.jinja", "r") as f:
chat_template = f.read()
# Load expected outputs for comparison
with open("/Users/eustachelebihan/dev/add-higgs-v2/expected/single_speaker_with_smart_voice.txt", "r") as f:
expected_single_speaker_with_smart_voice = f.read()