eusoubrasileiro · October 24, 2024 11:15
diff --git a/whispertranscribe.py b/whispertranscribe.py
 # pip install scdl
 # this downloads all tracks from that user to the path specified
 # -a download all
 # -c skip and continue those already downloaded 
 # scdl -l https://soundcloud.com/ipperegrinos -a -c --path /home/andre/music/ipperegrinos
 %cd /mnt/Data/ipperegrinos

 import subprocess
 import pathlib 
 from pathlib import Path
 import whisper

 def process_audio(input_file: Path, output_file: Path):
    """
    Process the audio file by applying loudness normalization, noise reduction,
    and a high-pass filter using FFmpeg. Then convert to WAV format for Whisper.
    """
    # Step 1: Apply audio processing with FFmpeg and save as MP3
    ffmpeg_command = [
        'ffmpeg', '-i', str(input_file),
        '-vn',  # Exclude the video stream (cover art)
        '-af', 'loudnorm, afftdn=nf=-25, highpass=f=150', 
        '-ar', '16000',  # Set audio sample rate to 16kHz - microphone maximum 16kHz
        '-ac', '1',  # Set audio channels to mono
        '-b:a', '64k', # enough bit rate
        str(output_file)
    ]   
    # Run FFmpeg to process MP3
    try:
        subprocess.run(ffmpeg_command, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error processing {input_file}: {e}")
        return    

 def save_transcription(model, audiopath, txtpath):
    # Transcribe the audio not using without_timestamps=True - 
    # default behavoir is better in quality?? - lets remove the stamps after. 
    result = model.transcribe(str(audiopath.absolute()), language="pt", without_timestamps=True) 
    transcription = result["text"]
    with txtpath.open('w', encoding="utf-8") as f:
        f.write(transcription)
    return result 

 # Load the Whisper model
 model = whisper.load_model("medium", download_root=str(pathlib.Path.cwd()))  

 for path in pathlib.Path.cwd().glob('*.mp3'):    
    propath = path.parent / 'processed' / path.name
    txtpath = path.parent / 'text' / (path.stem + '.txt')      
    if not propath.exists():
        process_audio(path, propath)    
    if not txtpath.exists():
        tr = save_transcription(model, propath, txtpath)
	# pip install scdl
	# this downloads all tracks from that user to the path specified
	# -a download all
	# -c skip and continue those already downloaded
	# scdl -l https://soundcloud.com/ipperegrinos -a -c --path /home/andre/music/ipperegrinos
	%cd /mnt/Data/ipperegrinos

	import subprocess
	import pathlib
	from pathlib import Path
	import whisper

	def process_audio(input_file: Path, output_file: Path):
	"""
	Process the audio file by applying loudness normalization, noise reduction,
	and a high-pass filter using FFmpeg. Then convert to WAV format for Whisper.
	"""
	# Step 1: Apply audio processing with FFmpeg and save as MP3
	ffmpeg_command = [
	'ffmpeg', '-i', str(input_file),
	'-vn', # Exclude the video stream (cover art)
	'-af', 'loudnorm, afftdn=nf=-25, highpass=f=150',
	'-ar', '16000', # Set audio sample rate to 16kHz - microphone maximum 16kHz
	'-ac', '1', # Set audio channels to mono
	'-b:a', '64k', # enough bit rate
	str(output_file)
	]
	# Run FFmpeg to process MP3
	try:
	subprocess.run(ffmpeg_command, check=True)
	except subprocess.CalledProcessError as e:
	print(f"Error processing {input_file}: {e}")
	return

	def save_transcription(model, audiopath, txtpath):
	# Transcribe the audio not using without_timestamps=True -
	# default behavoir is better in quality?? - lets remove the stamps after.
	result = model.transcribe(str(audiopath.absolute()), language="pt", without_timestamps=True)
	transcription = result["text"]
	with txtpath.open('w', encoding="utf-8") as f:
	f.write(transcription)
	return result

	# Load the Whisper model
	model = whisper.load_model("medium", download_root=str(pathlib.Path.cwd()))

	for path in pathlib.Path.cwd().glob('*.mp3'):
	propath = path.parent / 'processed' / path.name
	txtpath = path.parent / 'text' / (path.stem + '.txt')
	if not propath.exists():
	process_audio(path, propath)
	if not txtpath.exists():
	tr = save_transcription(model, propath, txtpath)