Skip to content

Instantly share code, notes, and snippets.

View eustlb's full-sized avatar

eustlb

  • Hugging Face
  • Paris, France
View GitHub Profile
@eustlb
eustlb / reproducer_kyutai_speech_to_text_generate.py
Created June 21, 2025 07:04
reproducer for Kyutai stt Transformers integration, test `test_generation`
# ------ install moshi ------
# git clone https://github.com/kyutai-labs/moshi.git
# cd moshi && git checkout 0395bd6c9a95e899c397a68c75f300f3b5409b2c
# uv pip install -e .
# ----------------------------
import torch
from moshi import run_inference
args = {
@eustlb
eustlb / reproducer_kyutai_speech_to_text_generate_batched.py
Created June 21, 2025 07:03
reproducer for Kyutai stt Transformers integration, test `test_generation_batched`
# ------ install moshi ------
# git clone https://github.com/kyutai-labs/moshi.git
# cd moshi && git checkout 0395bd6c9a95e899c397a68c75f300f3b5409b2c
# uv pip install -e .
# ----------------------------
import torch
from moshi import run_inference
args = {
@eustlb
eustlb / csm_test_1b_model_integration_generate_batched.py
Last active April 23, 2025 09:39
Reproducer for CSM Transformers integration
# TEST GREEDY FLOAT 32
# make sure to clone [email protected]:eustlb/csm.git and checkout compare-trfms
import sys
sys.path.insert(0, "./csm")
from generator import load_csm_1b, Segment
from datasets import load_dataset, Audio
from huggingface_hub import hf_hub_download
@eustlb
eustlb / csm_test_1b_model_integration_generate_no_audio.py
Last active April 23, 2025 09:38
Reproducer for CSM Transformers integration
# TEST GREEDY FLOAT 32
# make sure to clone [email protected]:eustlb/csm.git and checkout compare-trfms
import sys
sys.path.insert(0, "./csm")
from generator import load_csm_1b, Segment
from huggingface_hub import hf_hub_download
import torch
import torchaudio
@eustlb
eustlb / csm_test_1b_model_integration_generate_multiple_audio.py
Created April 23, 2025 08:43
Reproducer for CSM Transformers integration
# TEST GREEDY FLOAT 32
# make sure to clone [email protected]:eustlb/csm.git and checkout compare-trfms
import sys
sys.path.insert(0, "./csm")
from generator import load_csm_1b, Segment
from datasets import load_dataset, Audio
from huggingface_hub import hf_hub_download
import torch
@eustlb
eustlb / csm_test_1b_model_integration_generate.py
Last active April 23, 2025 08:42
Reproducer for CSM Transformers integration
# TEST GREEDY FLOAT 32
# make sure to clone [email protected]:eustlb/csm.git and checkout compare-trfms
import sys
sys.path.insert(0, "./csm")
from generator import load_csm_1b, Segment
from datasets import load_dataset, Audio
from huggingface_hub import hf_hub_download
import torch
@eustlb
eustlb / infer_whisper.py
Last active March 31, 2025 12:44
infer whisper
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
from datasets import load_dataset
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3-turbo"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
from datasets import load_dataset, Audio
from transformers import (
CsmForConditionalGeneration,
TrainingArguments,
CsmProcessor,
Trainer
)
processor = CsmProcessor.from_pretrained("eustlb/csm-1b")
model = CsmForConditionalGeneration.from_pretrained("eustlb/csm-1b")
@eustlb
eustlb / hub_to_s3.py
Created February 26, 2025 15:55
HF hub to s3 bucket
from typing import IO
from datatrove.io import get_datafolder
from datatrove.executor import SlurmPipelineExecutor
from datatrove.pipeline.readers import ParquetReader
from datatrove.pipeline.writers import ParquetWriter
from datatrove.utils.typeshelper import StatHints
class ParquetReaderInMemory(ParquetReader):
@eustlb
eustlb / .gitignore
Last active February 25, 2025 15:07
Benchmark seq vs batched - Style TTS2
traces-*/