Skip to content

Instantly share code, notes, and snippets.

@stzsch
Created April 11, 2023 14:14
Show Gist options
  • Save stzsch/5328e5bfa9808c6a099cb8105d61d814 to your computer and use it in GitHub Desktop.
Save stzsch/5328e5bfa9808c6a099cb8105d61d814 to your computer and use it in GitHub Desktop.
Split audio extracted with ytdlp using youtube timestamps saved as csv
#!/bin/bash
# Function to sanitize a string by removing invalid characters
sanitize_filename() {
local filename="$1"
# Replace invalid characters with underscore
filename="${filename//[^a-zA-Z0-9_.-]/_}"
echo "$filename"
}
# Function to check if a file exists and prompt for overwrite if necessary
check_file_exists() {
local filename="$1"
local always_override="$2"
if [[ -e "$filename" ]]; then
if [[ "$always_override" == "true" ]]; then
rm "$filename"
else
read -p "File '$filename' already exists. Do you want to override it? (y/n): " overwrite
if [[ "$overwrite" == "y" || "$overwrite" == "Y" ]]; then
rm "$filename"
else
echo "Skipping file '$filename'."
return 1
fi
fi
fi
return 0
}
# Check if input files are provided
if [[ "$#" -lt 2 ]]; then
echo "Usage: $0 <input_audio_file> <input_csv_file> [--override]"
exit 1
fi
# Input audio file and CSV file
audio_file="$1"
csv_file="$2"
# Check if --override flag is provided
always_override="false"
if [[ "$#" -eq 3 && "$3" == "--override" ]]; then
always_override="true"
fi
# Check if input audio file exists
if [[ ! -e "$audio_file" ]]; then
echo "Error: Input audio file '$audio_file' does not exist."
exit 1
fi
# Check if input audio file is a valid audio file
file_type=$(file -b --mime-type "$audio_file")
if [[ ! $file_type =~ ^audio/ ]]; then
echo "Error: Input audio file '$audio_file' is not a valid audio file."
exit 1
fi
# Check if input CSV file exists
if [[ ! -e "$csv_file" ]]; then
echo "Error: Input CSV file '$csv_file' does not exist."
exit 1
fi
# Check if input CSV file is a valid CSV file
file_type=$(file -b --mime-type "$csv_file")
if [[ $file_type != "text/csv" ]]; then
echo "Error: Input CSV file '$csv_file' is not a valid CSV file."
exit 1
fi
# Counter for prefixing title with a number
counter=1
# Audio file extension
output_extension="${audio_file##*.}"
# Read CSV file into array
while IFS=',' read -r line || [[ -n "$line" ]]; do
# Add line to csv_lines array
csv_lines+=("$line")
done < "$csv_file"
# Iterate over the csv_lines array
for ((i=0; i<${#csv_lines[@]}; i++)); do
# Get current line from array
line="${csv_lines[$i]}"
# Split the line into start timestamp and title
start_timestamp=$(echo "$line" | cut -d ',' -f 1)
title=$(echo "$line" | cut -d ',' -f 2)
# Sanitize the title to use as a filename
sanitized_title=$(sanitize_filename "$title")
# Generate the output file name with counter prefix
output_file=$(printf "%02d_%s.%s" "$counter" "$sanitized_title" "$output_extension")
# Check if output file already exists
if ! check_file_exists "$output_file" "false"; then
# Increment the counter
((counter++))
continue
fi
# Check if current line is the last line in the array
if (( i == ${#csv_lines[@]}-1 )); then
# This is the last line, use end of audio file as end timestamp
end_timestamp=$(ffmpeg -i "$audio_file" 2>&1 | grep Duration | awk '{print $2}' | tr -d ,)
else
# Get next line from array and update the counter
((counter++))
line="${csv_lines[$((i+1))]}"
# Split the next line into start timestamp and title
end_timestamp=$(echo "$line" | cut -d ',' -f 1)
fi
# Extract the audio segment using ffmpeg
echo ffmpeg -i "$audio_file" -ss "$start_timestamp" -to "$end_timestamp" -c copy "$output_file"
ffmpeg -i "$audio_file" -ss "$start_timestamp" -to "$end_timestamp" -c copy "$output_file"
echo "Segment $output_file created successfully."
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment