Created
June 8, 2022 15:50
-
-
Save undergroundwires/7b948b6d1e6e764869b9c067e21068c9 to your computer and use it in GitHub Desktop.
Fix encoding of files to target format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
readonly TARGET_ENCODING='UTF-8' # unicode | |
readonly FILE_EXTENSION='srt' | |
# readonly TARGET_ENCODING='UTF-16' | |
files=$(find . -type f -iname "*.$FILE_EXTENSION") | |
for file in $files | |
do | |
echo "Processing \"$file\"." | |
full_path=$(readlink -e "$file") | |
echo "Full path: \"$full_path\"." | |
if ! mime_name=$(file --brief --mime "$full_path"); then | |
>&2 echo '❌ Could not read mime name.' | |
continue | |
fi | |
echo "Mime: \"$mime_name\"." | |
if ! file_format=$(echo "$mime_name" | sed -e "s/.*[ ]charset=//"); then | |
>&2 echo '❌ Could not extract file format.' | |
continue | |
fi | |
echo "File format: \"$file_format\"." | |
temp_file="$(uuidgen)" | |
if ! iconv \ | |
-f "$file_format" \ | |
-t "$TARGET_ENCODING" \ | |
-o "$temp_file" \ | |
"$full_path"; then | |
>&2 echo '❌ Could not fix encoding.' | |
rm -f "$temp_file" | |
continue | |
fi | |
mv "$temp_file" "$full_path" | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment