Skip to content

Instantly share code, notes, and snippets.

@eusoubrasileiro
Last active October 24, 2024 11:15
Show Gist options
  • Save eusoubrasileiro/2ab02c75586751c580a7424a8f8a75ec to your computer and use it in GitHub Desktop.
Save eusoubrasileiro/2ab02c75586751c580a7424a8f8a75ec to your computer and use it in GitHub Desktop.
Transcribe pregações peregrinos after downloading from soundcloud
# pip install scdl
# this downloads all tracks from that user to the path specified
# -a download all
# -c skip and continue those already downloaded
# scdl -l https://soundcloud.com/ipperegrinos -a -c --path /home/andre/music/ipperegrinos
%cd /mnt/Data/ipperegrinos
import subprocess
import pathlib
from pathlib import Path
import whisper
def process_audio(input_file: Path, output_file: Path):
"""
Process the audio file by applying loudness normalization, noise reduction,
and a high-pass filter using FFmpeg. Then convert to WAV format for Whisper.
"""
# Step 1: Apply audio processing with FFmpeg and save as MP3
ffmpeg_command = [
'ffmpeg', '-i', str(input_file),
'-vn', # Exclude the video stream (cover art)
'-af', 'loudnorm, afftdn=nf=-25, highpass=f=150',
'-ar', '16000', # Set audio sample rate to 16kHz - microphone maximum 16kHz
'-ac', '1', # Set audio channels to mono
'-b:a', '64k', # enough bit rate
str(output_file)
]
# Run FFmpeg to process MP3
try:
subprocess.run(ffmpeg_command, check=True)
except subprocess.CalledProcessError as e:
print(f"Error processing {input_file}: {e}")
return
def save_transcription(model, audiopath, txtpath):
# Transcribe the audio not using without_timestamps=True -
# default behavoir is better in quality?? - lets remove the stamps after.
result = model.transcribe(str(audiopath.absolute()), language="pt", without_timestamps=True)
transcription = result["text"]
with txtpath.open('w', encoding="utf-8") as f:
f.write(transcription)
return result
# Load the Whisper model
model = whisper.load_model("medium", download_root=str(pathlib.Path.cwd()))
for path in pathlib.Path.cwd().glob('*.mp3'):
propath = path.parent / 'processed' / path.name
txtpath = path.parent / 'text' / (path.stem + '.txt')
if not propath.exists():
process_audio(path, propath)
if not txtpath.exists():
tr = save_transcription(model, propath, txtpath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment