Skip to content

Instantly share code, notes, and snippets.

View eustlb's full-sized avatar

eustlb

  • Hugging Face
  • Paris, France
View GitHub Profile
import os
import time
import pickle
from tqdm import tqdm
import torch
from transformers import WhisperForConditionalGeneration
def benchmark_gen(
@eustlb
eustlb / benchmark_parler_static_cache_compile.py
Last active August 7, 2024 11:34
Benchmark parler-tts static cache and compilation.
from dataclasses import dataclass, asdict, field
import torch
import soundfile as sf
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import torch._dynamo.config
import torch._inductor.config
import json
import time
import os
@eustlb
eustlb / test_parler.py
Last active August 2, 2024 12:18
Test generation of a parler tts branch for different combinaisons of parameters.
from dataclasses import dataclass, asdict
import torch
import soundfile as sf
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import torch._dynamo.config
import torch._inductor.config
import numpy as np
import time
import os
@eustlb
eustlb / reproduce_bug_generation.py
Last active July 23, 2024 16:56
Reproduce generation error on dev branch
import soundfile as sf
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import torch
torch._logging.set_logs(graph_breaks=True, recompiles=True)
torch.manual_seed(0)
CUDA_DEVICE = 0
torch_device = f"cuda:{CUDA_DEVICE}"
@eustlb
eustlb / reproduce_bug_compile.py
Created July 23, 2024 16:30
Reproduce a bug happening with torch 2.3.1 and compile.
import soundfile as sf
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import torch
torch.manual_seed(0)
CUDA_DEVICE = 0
torch_device = f"cuda:{CUDA_DEVICE}"
import os
import torch
import soundfile as sf
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
# caching allows ~50% compilation time reduction
# see https://docs.google.com/document/d/1y5CRfMLdwEoF1nTk9q8qEu1mgMUuUtvhklPKJ2emLU8/edit#heading=h.o2asbxsrp1ma
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
os.environ["TORCHINDUCTOR_CACHE_DIR"] = os.path.join(CURRENT_DIR, "tmp")
@eustlb
eustlb / reproducer_bug_jenny.py
Created July 29, 2024 09:45
Reproduce generation bug for a long prompt with the Jenny model (Parler-TTS).
import torch
import soundfile as sf
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
model_name = "ylacombe/parler-tts-mini-jenny-30H"
torch_device = "cuda:0"
torch_dtype = torch.bfloat16
attn_implementation = "eager"
@eustlb
eustlb / reproduce_bug_description.py
Created September 5, 2024 14:00
Reproduces a bug when changing the description when using a compiled model
import os
import torch
import soundfile as sf
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
# caching allows ~50% compilation time reduction
# see https://docs.google.com/document/d/1y5CRfMLdwEoF1nTk9q8qEu1mgMUuUtvhklPKJ2emLU8/edit#heading=h.o2asbxsrp1ma
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
os.environ["TORCHINDUCTOR_CACHE_DIR"] = os.path.join(CURRENT_DIR, "tmp")
@eustlb
eustlb / test_compile_parler.py
Created September 5, 2024 16:38
Test compile on ParlerTTS + streaming
import os
import torch
import time
from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
from transformers import AutoTokenizer
from threading import Thread
# caching allows ~50% compilation time reduction
# see https://docs.google.com/document/d/1y5CRfMLdwEoF1nTk9q8qEu1mgMUuUtvhklPKJ2emLU8/edit#heading=h.o2asbxsrp1ma
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
@eustlb
eustlb / benchmark_parler_streaming.py
Last active September 19, 2024 09:45
Benchmark ParlerTTS + streaming time to first audio.
import os
import torch
import time
from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
from transformers import AutoTokenizer
from threading import Thread
# caching allows ~50% compilation time reduction
# see https://docs.google.com/document/d/1y5CRfMLdwEoF1nTk9q8qEu1mgMUuUtvhklPKJ2emLU8/edit#heading=h.o2asbxsrp1ma