Skip to content

Instantly share code, notes, and snippets.

@tivrfoa
Created July 13, 2025 22:47
Show Gist options
  • Save tivrfoa/a62366cb0c0322c81cef29163f3ad080 to your computer and use it in GitHub Desktop.
Save tivrfoa/a62366cb0c0322c81cef29163f3ad080 to your computer and use it in GitHub Desktop.
Transcribing Audio Files using Python speech_recognition and ffmpeg, by ChatGPT
import speech_recognition as sr
import sys
import os
import subprocess
import tempfile
def convert_to_wav(input_file):
"""Convert input audio to a temporary WAV file using ffmpeg"""
tmp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
tmp_wav.close()
command = [
"ffmpeg", "-y", # overwrite if needed
"-i", input_file,
"-ac", "1", # mono
"-ar", "16000", # 16kHz sample rate
tmp_wav.name
]
try:
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
print("❌ ffmpeg failed to convert the audio file.")
sys.exit(1)
return tmp_wav.name
if len(sys.argv) < 2:
print("Usage: python3 speech1.py <audio_file>")
sys.exit(1)
input_path = sys.argv[1]
file_ext = os.path.splitext(input_path)[1].lower()
if file_ext != ".wav":
print(f"πŸ”„ Converting {file_ext} to .wav...")
input_path = convert_to_wav(input_path)
cleanup_needed = True
else:
cleanup_needed = False
recognizer = sr.Recognizer()
with sr.AudioFile(input_path) as source:
audio = recognizer.record(source)
print("πŸ“ Transcribing...")
try:
print(recognizer.recognize_google(audio))
except sr.UnknownValueError:
print("πŸ˜• Could not understand the audio.")
except sr.RequestError as e:
print(f"πŸ”Œ API error: {e}")
# Clean up temp file
if cleanup_needed:
os.remove(input_path)
@tivrfoa
Copy link
Author

tivrfoa commented Jul 13, 2025

It works with many languages, eg:

recognizer.recognize_google(audio, language="zh-CN")

https://cloud.google.com/speech-to-text/docs/speech-to-text-supported-languages

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment