Created
August 14, 2023 14:54
-
-
Save micseydel/835441109f69af51a876c151d0f359db to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import json | |
import whisper | |
from time import perf_counter, ctime | |
# dependency: https://github.com/openai/whisper#setup | |
def log(s) -> None: | |
print(f"[{ctime()}] {s}") | |
def get_segments(model, path: str) -> dict: | |
result_path = os.path.splitext(path)[0] + ".json" | |
if os.path.exists(result_path): | |
# log(f"Loading transcript from disk for {os.path.splitext(path)[-1]}") | |
with open(result_path) as f: | |
result = json.load(f) | |
else: | |
# log("Using Whisper, may take some time...") | |
result = model.transcribe(path, fp16=False, language='English') | |
# log("Saving Whisper results to disk") | |
try: | |
with open(result_path, 'w') as f: | |
json.dump(result, f) | |
except Exception as e: | |
log("Failed to dump result to disk:") | |
log(result) | |
raise e | |
# plaintext is available too... under 'text'? | |
return result["segments"] | |
def float_to_hour_minute_second(seconds: float) -> str: | |
i = int(seconds) | |
return f"{i // 3600:02}:{(i % 3600) // 60:02}:{i % 60:02}" | |
def segments_to_markdown(segments, audio_path: str) -> str: | |
def generate_bookmarks(): | |
return [ | |
{ | |
"start": segment["start"], | |
"text": segment["text"], | |
} for segment in segments | |
] | |
def generate_formatted_bookmarks(): | |
bookmarks = generate_bookmarks() | |
return "\n".join( | |
f"{float_to_hour_minute_second(bookmark['start'])} --- {bookmark['text']}" | |
for bookmark in bookmarks | |
) | |
def generate_text_segments(): | |
return "\n".join(f"- \\[{float_to_hour_minute_second(segment['start'])}\\] {segment['text']}" for segment in segments) | |
return f"""\ | |
--- | |
tags: | |
- audionote | |
--- | |
# Segments | |
{generate_text_segments()} | |
# Audio | |
```audio-player | |
[[{os.path.split(audio_path)[1]}]] | |
{generate_formatted_bookmarks()} | |
``` | |
""" | |
def files_to_transcribe(audio_file_dir): | |
for in_file in os.listdir(audio_file_dir): | |
ext = os.path.splitext(in_file)[1].lower() | |
if ext in {".mp3", ".wav", ".mp4", ".aac", ".m4a"}: | |
# audio path | |
path = os.path.join(audio_file_dir, in_file) | |
yield path | |
else: | |
log(f"Skipping file {in_file} with ext {ext}") | |
def main(): | |
log("Loading the Whisper model...") | |
model = whisper.load_model("large") | |
audio_file_dir = sys.argv[1] | |
log(f"Using dir {audio_file_dir}") | |
files = list(files_to_transcribe(audio_file_dir)) | |
for path_no, path in enumerate(files, 1): | |
markdown_path = os.path.splitext(path)[0] + ".md" | |
if os.path.exists(markdown_path): | |
print(f"File {markdown_path} already exists, skipping") | |
log(f"Transcribing path #{path_no}/{len(files)} - {path}") | |
t0 = perf_counter() | |
segments = get_segments(model, path) | |
t1 = perf_counter() | |
log(f"Done, took {float_to_hour_minute_second(t1 - t0)}") | |
log("Generating markdown") | |
markdown = segments_to_markdown(segments, path) | |
with open(markdown_path, "w") as f: | |
f.write(markdown) | |
# log("Done\n") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment