Created
April 11, 2023 14:14
-
-
Save stzsch/5328e5bfa9808c6a099cb8105d61d814 to your computer and use it in GitHub Desktop.
Split audio extracted with ytdlp using youtube timestamps saved as csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Function to sanitize a string by removing invalid characters | |
sanitize_filename() { | |
local filename="$1" | |
# Replace invalid characters with underscore | |
filename="${filename//[^a-zA-Z0-9_.-]/_}" | |
echo "$filename" | |
} | |
# Function to check if a file exists and prompt for overwrite if necessary | |
check_file_exists() { | |
local filename="$1" | |
local always_override="$2" | |
if [[ -e "$filename" ]]; then | |
if [[ "$always_override" == "true" ]]; then | |
rm "$filename" | |
else | |
read -p "File '$filename' already exists. Do you want to override it? (y/n): " overwrite | |
if [[ "$overwrite" == "y" || "$overwrite" == "Y" ]]; then | |
rm "$filename" | |
else | |
echo "Skipping file '$filename'." | |
return 1 | |
fi | |
fi | |
fi | |
return 0 | |
} | |
# Check if input files are provided | |
if [[ "$#" -lt 2 ]]; then | |
echo "Usage: $0 <input_audio_file> <input_csv_file> [--override]" | |
exit 1 | |
fi | |
# Input audio file and CSV file | |
audio_file="$1" | |
csv_file="$2" | |
# Check if --override flag is provided | |
always_override="false" | |
if [[ "$#" -eq 3 && "$3" == "--override" ]]; then | |
always_override="true" | |
fi | |
# Check if input audio file exists | |
if [[ ! -e "$audio_file" ]]; then | |
echo "Error: Input audio file '$audio_file' does not exist." | |
exit 1 | |
fi | |
# Check if input audio file is a valid audio file | |
file_type=$(file -b --mime-type "$audio_file") | |
if [[ ! $file_type =~ ^audio/ ]]; then | |
echo "Error: Input audio file '$audio_file' is not a valid audio file." | |
exit 1 | |
fi | |
# Check if input CSV file exists | |
if [[ ! -e "$csv_file" ]]; then | |
echo "Error: Input CSV file '$csv_file' does not exist." | |
exit 1 | |
fi | |
# Check if input CSV file is a valid CSV file | |
file_type=$(file -b --mime-type "$csv_file") | |
if [[ $file_type != "text/csv" ]]; then | |
echo "Error: Input CSV file '$csv_file' is not a valid CSV file." | |
exit 1 | |
fi | |
# Counter for prefixing title with a number | |
counter=1 | |
# Audio file extension | |
output_extension="${audio_file##*.}" | |
# Read CSV file into array | |
while IFS=',' read -r line || [[ -n "$line" ]]; do | |
# Add line to csv_lines array | |
csv_lines+=("$line") | |
done < "$csv_file" | |
# Iterate over the csv_lines array | |
for ((i=0; i<${#csv_lines[@]}; i++)); do | |
# Get current line from array | |
line="${csv_lines[$i]}" | |
# Split the line into start timestamp and title | |
start_timestamp=$(echo "$line" | cut -d ',' -f 1) | |
title=$(echo "$line" | cut -d ',' -f 2) | |
# Sanitize the title to use as a filename | |
sanitized_title=$(sanitize_filename "$title") | |
# Generate the output file name with counter prefix | |
output_file=$(printf "%02d_%s.%s" "$counter" "$sanitized_title" "$output_extension") | |
# Check if output file already exists | |
if ! check_file_exists "$output_file" "false"; then | |
# Increment the counter | |
((counter++)) | |
continue | |
fi | |
# Check if current line is the last line in the array | |
if (( i == ${#csv_lines[@]}-1 )); then | |
# This is the last line, use end of audio file as end timestamp | |
end_timestamp=$(ffmpeg -i "$audio_file" 2>&1 | grep Duration | awk '{print $2}' | tr -d ,) | |
else | |
# Get next line from array and update the counter | |
((counter++)) | |
line="${csv_lines[$((i+1))]}" | |
# Split the next line into start timestamp and title | |
end_timestamp=$(echo "$line" | cut -d ',' -f 1) | |
fi | |
# Extract the audio segment using ffmpeg | |
echo ffmpeg -i "$audio_file" -ss "$start_timestamp" -to "$end_timestamp" -c copy "$output_file" | |
ffmpeg -i "$audio_file" -ss "$start_timestamp" -to "$end_timestamp" -c copy "$output_file" | |
echo "Segment $output_file created successfully." | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment