Skip to content

Instantly share code, notes, and snippets.

@rubenvarela
Created November 9, 2023 00:58
Show Gist options
  • Save rubenvarela/e7d64544a6438b76397d07c270350141 to your computer and use it in GitHub Desktop.
Save rubenvarela/e7d64544a6438b76397d07c270350141 to your computer and use it in GitHub Desktop.
Correr Whisper en Perro Negro como benchmark
git+https://github.com/openai/whisper.git
transformers
pytube
pandas
import whisper
from pytube import YouTube
from pathlib import Path
import logging
import time
import pandas as pd
from textwrap import TextWrapper
logging.basicConfig(level=logging.INFO)
model = whisper.load_model(name="base")
def get_text(url: str = None):
if url:
yt = YouTube(url)
final_file_name = f"{yt.title}.mp3"
if not Path(final_file_name).exists():
video = yt.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
file_ref = Path(out_file)
file_stats = file_ref.stat()
logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
file_ref.rename(final_file_name)
output = []
start = time.time()
for i in range(10):
start_iter = time.time()
result = model.transcribe(final_file_name)
end_iter = time.time()
output.append(end_iter - start_iter)
end = time.time()
# Variance and mean
df = pd.DataFrame(output)
print("")
print(f"Var: {df.var()[0]}")
print(f"Mean: {df.mean()[0]}")
print(f"Time: {(end - start) / 10}")
return result['text'].strip()
return False
if __name__ == "__main__":
text = get_text("https://www.youtube.com/watch?v=a6tgD_CsYTQ")
wrapper = TextWrapper(width=120)
print(wrapper.fill(text))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment