Skip to content

Instantly share code, notes, and snippets.

@Aldo-f
Created October 19, 2024 19:11
Show Gist options
  • Save Aldo-f/eefbc893dd7f2403953e7cd01e3c8848 to your computer and use it in GitHub Desktop.
Save Aldo-f/eefbc893dd7f2403953e7cd01e3c8848 to your computer and use it in GitHub Desktop.
Add extension to files where extension is missing
#!/bin/bash
# Function to display help
display_help() {
echo "Usage: $0 <source_directory> [destination_directory] [-overwrite] [-start <start_index>] [-limit <max_files>]"
echo
echo "Parameters:"
echo " source_directory Directory containing files without extensions (use '.' for current directory)"
echo " destination_directory Optional: Directory to save files with added extensions (defaults to source_directory)"
echo " -overwrite Optional flag to overwrite original files"
echo " -start <index> Optional starting file index (default is 0)"
echo " -limit <max_files> Optional maximum number of files to process (default is all)"
echo
echo "Example:"
echo " $0 ./source_dir ./destination_dir -overwrite -start 5 -limit 10"
exit 1
}
# Default values
overwrite="no"
start=0
limit=999999 # Process all files by default
# Check if at least the source directory is provided
if [[ $# -lt 1 ]]; then
display_help
fi
# Parse command-line arguments
while [[ $# -gt 0 ]]; do
case "$1" in
-overwrite)
overwrite="overwrite"
shift
;;
-start)
start="$2"
shift 2
;;
-limit)
limit="$2"
shift 2
;;
*)
if [[ -z "$source_dir" ]]; then
source_dir="$1"
elif [[ -z "$destination_dir" ]]; then
destination_dir="$1"
else
echo "Unexpected argument: $1"
display_help
fi
shift
;;
esac
done
# Get absolute source directory if it's relative
if [[ "$source_dir" != /* ]]; then
source_dir="$(pwd)/$source_dir"
fi
# Set destination directory to the source directory if not provided
if [[ -z "$destination_dir" ]]; then
destination_dir="$source_dir"
fi
# Create the destination directory if it doesn't exist
mkdir -p "$destination_dir"
# Function to map file descriptions to appropriate extensions
get_extension_by_description() {
local file_type="$1"
case "$file_type" in
*PDF*) echo "pdf" ;;
*EPUB*) echo "epub" ;;
*HTML*) echo "html" ;;
*ASCII*) echo "txt" ;;
*Zip*) echo "zip" ;;
*RAR*) echo "rar" ;;
*JPEG*) echo "jpg" ;;
*PNG*) echo "png" ;;
*DjVu*) echo "djvu" ;;
*Mobipocket\ E-book*) echo "mobi" ;;
*Microsoft\ Word*) echo "docx" ;;
*OpenDocument\ Text*) echo "odt" ;;
*Microsoft\ Excel*) echo "xlsx" ;;
*OpenDocument\ Spreadsheet*) echo "ods" ;;
*MP3*) echo "mp3" ;;
*MPEG*) echo "mpg" ;;
*Matroska*) echo "mkv" ;;
*Video*) echo "mp4" ;;
*Audio*) echo "wav" ;;
*) echo "unknown" ;;
esac
}
# Start timer
start_time=$(date +%s)
# Counter for processed files
count=0
errors=()
# Determine action to perform
action="Copying"
if [ "$overwrite" == "overwrite" ]; then
action="Overwriting"
fi
# Inform the user of the action being taken
echo "$action files from $source_dir to $destination_dir"
# Iterate over each file in the source directory
for file in "$source_dir"/*; do
# Skip directories and hidden files
if [ -d "$file" ] || [[ "$file" == .* ]]; then
continue
fi
# Skip files that already have an extension
if [[ "$file" == *.* ]]; then
continue
fi
# Check if we've reached the start limit
if [ "$count" -lt "$start" ]; then
count=$((count + 1))
continue
fi
# Break if we've reached the limit
if [ "$count" -ge $((start + limit)) ]; then
break
fi
# First try to guess the extension using `file --extension`
extension=$(file -b --extension "$file" | awk -F'/' '{print $1}')
# If the extension is "???" or similar (invalid), try the second method
if [[ "$extension" == "???" || -z "$extension" ]]; then
# Guess the file type description using `file`
file_type=$(file -b "$file")
# Use the function to get a better extension based on the description
extension=$(get_extension_by_description "$file_type")
fi
# If we still can't determine the extension, record the error
if [ "$extension" == "unknown" ]; then
errors+=("$file (Type: $file_type)")
continue
fi
# Create a new file path with the detected extension
filename=$(basename "$file")
new_file="$destination_dir/$filename.$extension"
# Perform the file operation based on the overwrite flag
if [ "$overwrite" == "overwrite" ]; then
mv "$file" "$new_file"
else
cp "$file" "$new_file"
fi
count=$((count + 1))
done
# End timer
end_time=$(date +%s)
duration=$((end_time - start_time))
# Summary of processed files
echo "Processed $count files in $duration seconds."
# If there were any errors, report them at the end
if [ ${#errors[@]} -gt 0 ]; then
echo "The following files could not be processed:"
for error in "${errors[@]}"; do
echo "$error"
done
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment