pirate · January 22, 2025 00:49 · maltokyo · Apr 16, 2023 · pabloab · Jul 22, 2023
diff --git a/strip_bad_filename_characters.sh b/strip_bad_filename_characters.sh
 #!/usr/bin/env bash
 # Recursively remove all special characters from filenames by renaming them to their ASCII normalized forms.
 #
 # By default it does a dry run, to actually move the files uncomment the `mv -vi ...` line.
 #
 # This is useful for cleaning up network shares that will be shared via SMB/NFS between Unix/macOS/Windows
 # where non-ASCII filenames can sometimes cause "file does not exist" errors when trying to access the files.
 #
 # This script removes leading/trailing whitespace in filenames and replaces accents and non-english 
 # characters with their ASCII equivalent, if no ASCII equivalent exists, it removes the character e.g.:
 #    some_name_í.txt -> some_name_i.txt
 #    some_name_á.txt -> some_name_a.txt
 #    some_name_é.txt -> some_name_e.txt
 #    some_name_^.txt -> some_name_.txt
 #    some_name_🐞.txt -> some_name_.txt
 #    some_name_в.txt -> some_name_.txt

 IFS=$'\n'

 folder="."
 allowed_characters="a-zA-Z0-9_\. \/@#\~&$+()\'!-"
 normalize_cmd="
 import re
 import unicodedata

 normalized = unicodedata.normalize('NFD', input()).encode('ascii', 'ignore').decode('utf-8')
 stripped = re.sub('[^$allowed_characters]', '', normalized)
 print(stripped)"

 badfiles=$(
    find "$folder" -name '*'                                           # find all files in the folder recursively
    | grep ".*[^$allowed_characters].*"                                # filter for filenames containing characters allowed the specified charset
    | awk '{ print -length, $0 }' | sort -n -s | cut -d" " -f2-        # sort longest -> shortest so we rename child files before their parent folders to avoid breaking paths
 )

 for path in $badfiles; do
    oldpath="$path"
    newpath=$(echo "$oldpath" | python3 -c "$normalize_cmd")
    echo "From: $oldpath"
    echo "To:   $newpath"
    # mv -vi -- "$oldpath" "$newpath"
    echo "--------------------------------------------"
 done
	#!/usr/bin/env bash
	# Recursively remove all special characters from filenames by renaming them to their ASCII normalized forms.
	#
	# By default it does a dry run, to actually move the files uncomment the `mv -vi ...` line.
	#
	# This is useful for cleaning up network shares that will be shared via SMB/NFS between Unix/macOS/Windows
	# where non-ASCII filenames can sometimes cause "file does not exist" errors when trying to access the files.
	#
	# This script removes leading/trailing whitespace in filenames and replaces accents and non-english
	# characters with their ASCII equivalent, if no ASCII equivalent exists, it removes the character e.g.:
	# some_name_í.txt -> some_name_i.txt
	# some_name_á.txt -> some_name_a.txt
	# some_name_é.txt -> some_name_e.txt
	# some_name_^.txt -> some_name_.txt
	# some_name_🐞.txt -> some_name_.txt
	# some_name_в.txt -> some_name_.txt

	IFS=$'\n'

	folder="."
	allowed_characters="a-zA-Z0-9_\. \/@#\~&$+()\'!-"
	normalize_cmd="
	import re
	import unicodedata

	normalized = unicodedata.normalize('NFD', input()).encode('ascii', 'ignore').decode('utf-8')
	stripped = re.sub('[^$allowed_characters]', '', normalized)
	print(stripped)"

	badfiles=$(
	find "$folder" -name '*' # find all files in the folder recursively
	\| grep ".[^$allowed_characters]." # filter for filenames containing characters allowed the specified charset
	\| awk '{ print -length, $0 }' \| sort -n -s \| cut -d" " -f2- # sort longest -> shortest so we rename child files before their parent folders to avoid breaking paths
	)

	for path in $badfiles; do
	oldpath="$path"
	newpath=$(echo "$oldpath" \| python3 -c "$normalize_cmd")
	echo "From: $oldpath"
	echo "To: $newpath"
	# mv -vi -- "$oldpath" "$newpath"
	echo "--------------------------------------------"
	done