Skip to content

Instantly share code, notes, and snippets.

@micseydel
Created August 14, 2023 14:54
Show Gist options
  • Save micseydel/835441109f69af51a876c151d0f359db to your computer and use it in GitHub Desktop.
Save micseydel/835441109f69af51a876c151d0f359db to your computer and use it in GitHub Desktop.
import os
import sys
import json
import whisper
from time import perf_counter, ctime
# dependency: https://github.com/openai/whisper#setup
def log(s) -> None:
print(f"[{ctime()}] {s}")
def get_segments(model, path: str) -> dict:
result_path = os.path.splitext(path)[0] + ".json"
if os.path.exists(result_path):
# log(f"Loading transcript from disk for {os.path.splitext(path)[-1]}")
with open(result_path) as f:
result = json.load(f)
else:
# log("Using Whisper, may take some time...")
result = model.transcribe(path, fp16=False, language='English')
# log("Saving Whisper results to disk")
try:
with open(result_path, 'w') as f:
json.dump(result, f)
except Exception as e:
log("Failed to dump result to disk:")
log(result)
raise e
# plaintext is available too... under 'text'?
return result["segments"]
def float_to_hour_minute_second(seconds: float) -> str:
i = int(seconds)
return f"{i // 3600:02}:{(i % 3600) // 60:02}:{i % 60:02}"
def segments_to_markdown(segments, audio_path: str) -> str:
def generate_bookmarks():
return [
{
"start": segment["start"],
"text": segment["text"],
} for segment in segments
]
def generate_formatted_bookmarks():
bookmarks = generate_bookmarks()
return "\n".join(
f"{float_to_hour_minute_second(bookmark['start'])} --- {bookmark['text']}"
for bookmark in bookmarks
)
def generate_text_segments():
return "\n".join(f"- \\[{float_to_hour_minute_second(segment['start'])}\\] {segment['text']}" for segment in segments)
return f"""\
---
tags:
- audionote
---
# Segments
{generate_text_segments()}
# Audio
```audio-player
[[{os.path.split(audio_path)[1]}]]
{generate_formatted_bookmarks()}
```
"""
def files_to_transcribe(audio_file_dir):
for in_file in os.listdir(audio_file_dir):
ext = os.path.splitext(in_file)[1].lower()
if ext in {".mp3", ".wav", ".mp4", ".aac", ".m4a"}:
# audio path
path = os.path.join(audio_file_dir, in_file)
yield path
else:
log(f"Skipping file {in_file} with ext {ext}")
def main():
log("Loading the Whisper model...")
model = whisper.load_model("large")
audio_file_dir = sys.argv[1]
log(f"Using dir {audio_file_dir}")
files = list(files_to_transcribe(audio_file_dir))
for path_no, path in enumerate(files, 1):
markdown_path = os.path.splitext(path)[0] + ".md"
if os.path.exists(markdown_path):
print(f"File {markdown_path} already exists, skipping")
log(f"Transcribing path #{path_no}/{len(files)} - {path}")
t0 = perf_counter()
segments = get_segments(model, path)
t1 = perf_counter()
log(f"Done, took {float_to_hour_minute_second(t1 - t0)}")
log("Generating markdown")
markdown = segments_to_markdown(segments, path)
with open(markdown_path, "w") as f:
f.write(markdown)
# log("Done\n")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment