Skip to content

Instantly share code, notes, and snippets.

@cdpath
Created December 28, 2024 11:01
Show Gist options
  • Save cdpath/3d7d57e19f54c9717c7932934ed30c4e to your computer and use it in GitHub Desktop.
Save cdpath/3d7d57e19f54c9717c7932934ed30c4e to your computer and use it in GitHub Desktop.
音频文件转简体中文:ffmpeg + whisper.cpp + opencc
#!/bin/bash
set -euo pipefail
if [ -z "$1" ]; then
echo "Usage: $0 <input_directory>"
exit 1
fi
input_dir="$1"
model_path="$HOME/Downloads/ggml-model-whisper-base.bin"
# Ensure the required tools are installed
for tool in ffmpeg whisper-cli opencc; do
if ! command -v "$tool" &> /dev/null; then
echo "Error: $tool is not installed. Please install it first."
exit 1
fi
done
# Ensure the model file exists
if [ ! -f "$model_path" ]; then
echo "Error: Model file not found at $model_path"
echo "Download at https://ggml.ggerganov.com/"
exit 1
fi
# Process each file in the directory
for file in "$input_dir"/*; do
# Skip if not a file
[ -f "$file" ] || continue
# Get file extension and base name
ext="${file##*.}"
base_name="${file%.*}"
# Check if target WAV file exists
if [ "$ext" != "wav" ]; then
wav_file="${base_name}.wav"
if [ ! -f "$wav_file" ]; then
echo "Converting $file to WAV format..."
if ! ffmpeg -i "$file" -ar 16000 -ac 1 -c:a pcm_s16le "$wav_file"; then
echo "Error: Failed to convert $file to WAV format."
continue
fi
else
echo "WAV file $wav_file already exists. Skipping conversion."
fi
else
wav_file="$file"
fi
# Check if transcription already exists
output_txt="${base_name}.txt"
if [ -f "$output_txt" ]; then
echo "Transcription $output_txt already exists. Skipping transcription."
continue
fi
# Transcribe using whisper-cli
echo "Transcribing $wav_file..."
if ! whisper-cli -f "$wav_file" -m "$model_path" -l zh > "${base_name}.tmp.txt"; then
echo "Error: Whisper transcription failed for $wav_file."
continue
fi
# Convert transcription to simplified Chinese
echo "Converting transcription to simplified Chinese..."
if ! opencc -i "${base_name}.tmp.txt" -o "$output_txt" -c t2s.json; then
echo "Error: Failed to convert transcription for $wav_file to simplified Chinese."
rm -f "${base_name}.tmp.txt" # Clean up temporary file even on error
continue
fi
# Clean up temporary transcription file
rm -f "${base_name}.tmp.txt"
echo "Transcription saved as $output_txt"
done
echo "All files processed!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment