Last active
October 24, 2024 11:15
-
-
Save eusoubrasileiro/2ab02c75586751c580a7424a8f8a75ec to your computer and use it in GitHub Desktop.
Transcribe pregações peregrinos after downloading from soundcloud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install scdl | |
# this downloads all tracks from that user to the path specified | |
# -a download all | |
# -c skip and continue those already downloaded | |
# scdl -l https://soundcloud.com/ipperegrinos -a -c --path /home/andre/music/ipperegrinos | |
%cd /mnt/Data/ipperegrinos | |
import subprocess | |
import pathlib | |
from pathlib import Path | |
import whisper | |
def process_audio(input_file: Path, output_file: Path): | |
""" | |
Process the audio file by applying loudness normalization, noise reduction, | |
and a high-pass filter using FFmpeg. Then convert to WAV format for Whisper. | |
""" | |
# Step 1: Apply audio processing with FFmpeg and save as MP3 | |
ffmpeg_command = [ | |
'ffmpeg', '-i', str(input_file), | |
'-vn', # Exclude the video stream (cover art) | |
'-af', 'loudnorm, afftdn=nf=-25, highpass=f=150', | |
'-ar', '16000', # Set audio sample rate to 16kHz - microphone maximum 16kHz | |
'-ac', '1', # Set audio channels to mono | |
'-b:a', '64k', # enough bit rate | |
str(output_file) | |
] | |
# Run FFmpeg to process MP3 | |
try: | |
subprocess.run(ffmpeg_command, check=True) | |
except subprocess.CalledProcessError as e: | |
print(f"Error processing {input_file}: {e}") | |
return | |
def save_transcription(model, audiopath, txtpath): | |
# Transcribe the audio not using without_timestamps=True - | |
# default behavoir is better in quality?? - lets remove the stamps after. | |
result = model.transcribe(str(audiopath.absolute()), language="pt", without_timestamps=True) | |
transcription = result["text"] | |
with txtpath.open('w', encoding="utf-8") as f: | |
f.write(transcription) | |
return result | |
# Load the Whisper model | |
model = whisper.load_model("medium", download_root=str(pathlib.Path.cwd())) | |
for path in pathlib.Path.cwd().glob('*.mp3'): | |
propath = path.parent / 'processed' / path.name | |
txtpath = path.parent / 'text' / (path.stem + '.txt') | |
if not propath.exists(): | |
process_audio(path, propath) | |
if not txtpath.exists(): | |
tr = save_transcription(model, propath, txtpath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment