cdpath · December 28, 2024 11:01
diff --git a/convert_audio.sh b/convert_audio.sh
 #!/bin/bash
 set -euo pipefail

 if [ -z "$1" ]; then
    echo "Usage: $0 <input_directory>"
    exit 1
 fi

 input_dir="$1"
 model_path="$HOME/Downloads/ggml-model-whisper-base.bin"

 # Ensure the required tools are installed
 for tool in ffmpeg whisper-cli opencc; do
    if ! command -v "$tool" &> /dev/null; then
        echo "Error: $tool is not installed. Please install it first."
        exit 1
    fi
 done

 # Ensure the model file exists
 if [ ! -f "$model_path" ]; then
    echo "Error: Model file not found at $model_path"
    echo "Download at https://ggml.ggerganov.com/"
    exit 1
 fi

 # Process each file in the directory
 for file in "$input_dir"/*; do
    # Skip if not a file
    [ -f "$file" ] || continue

    # Get file extension and base name
    ext="${file##*.}"
    base_name="${file%.*}"

    # Check if target WAV file exists
    if [ "$ext" != "wav" ]; then
        wav_file="${base_name}.wav"
        if [ ! -f "$wav_file" ]; then
            echo "Converting $file to WAV format..."
            if ! ffmpeg -i "$file" -ar 16000 -ac 1 -c:a pcm_s16le "$wav_file"; then
                echo "Error: Failed to convert $file to WAV format."
                continue
            fi
        else
            echo "WAV file $wav_file already exists. Skipping conversion."
        fi
    else
        wav_file="$file"
    fi

    # Check if transcription already exists
    output_txt="${base_name}.txt"
    if [ -f "$output_txt" ]; then
        echo "Transcription $output_txt already exists. Skipping transcription."
        continue
    fi

    # Transcribe using whisper-cli
    echo "Transcribing $wav_file..."
    if ! whisper-cli -f "$wav_file" -m "$model_path" -l zh > "${base_name}.tmp.txt"; then
        echo "Error: Whisper transcription failed for $wav_file."
        continue
    fi

    # Convert transcription to simplified Chinese
    echo "Converting transcription to simplified Chinese..."
    if ! opencc -i "${base_name}.tmp.txt" -o "$output_txt" -c t2s.json; then
        echo "Error: Failed to convert transcription for $wav_file to simplified Chinese."
        rm -f "${base_name}.tmp.txt" # Clean up temporary file even on error
        continue
    fi

    # Clean up temporary transcription file
    rm -f "${base_name}.tmp.txt"
    echo "Transcription saved as $output_txt"
 done

 echo "All files processed!"
	#!/bin/bash
	set -euo pipefail

	if [ -z "$1" ]; then
	echo "Usage: $0 <input_directory>"
	exit 1
	fi

	input_dir="$1"
	model_path="$HOME/Downloads/ggml-model-whisper-base.bin"

	# Ensure the required tools are installed
	for tool in ffmpeg whisper-cli opencc; do
	if ! command -v "$tool" &> /dev/null; then
	echo "Error: $tool is not installed. Please install it first."
	exit 1
	fi
	done

	# Ensure the model file exists
	if [ ! -f "$model_path" ]; then
	echo "Error: Model file not found at $model_path"
	echo "Download at https://ggml.ggerganov.com/"
	exit 1
	fi

	# Process each file in the directory
	for file in "$input_dir"/*; do
	# Skip if not a file
	[ -f "$file" ] \|\| continue

	# Get file extension and base name
	ext="${file##*.}"
	base_name="${file%.*}"

	# Check if target WAV file exists
	if [ "$ext" != "wav" ]; then
	wav_file="${base_name}.wav"
	if [ ! -f "$wav_file" ]; then
	echo "Converting $file to WAV format..."
	if ! ffmpeg -i "$file" -ar 16000 -ac 1 -c:a pcm_s16le "$wav_file"; then
	echo "Error: Failed to convert $file to WAV format."
	continue
	fi
	else
	echo "WAV file $wav_file already exists. Skipping conversion."
	fi
	else
	wav_file="$file"
	fi

	# Check if transcription already exists
	output_txt="${base_name}.txt"
	if [ -f "$output_txt" ]; then
	echo "Transcription $output_txt already exists. Skipping transcription."
	continue
	fi

	# Transcribe using whisper-cli
	echo "Transcribing $wav_file..."
	if ! whisper-cli -f "$wav_file" -m "$model_path" -l zh > "${base_name}.tmp.txt"; then
	echo "Error: Whisper transcription failed for $wav_file."
	continue
	fi

	# Convert transcription to simplified Chinese
	echo "Converting transcription to simplified Chinese..."
	if ! opencc -i "${base_name}.tmp.txt" -o "$output_txt" -c t2s.json; then
	echo "Error: Failed to convert transcription for $wav_file to simplified Chinese."
	rm -f "${base_name}.tmp.txt" # Clean up temporary file even on error
	continue
	fi

	# Clean up temporary transcription file
	rm -f "${base_name}.tmp.txt"
	echo "Transcription saved as $output_txt"
	done

	echo "All files processed!"