-
-
Save arlm/ca2143e4089e41fdd2c56dcdf2f44873 to your computer and use it in GitHub Desktop.
Bash script to change the encoding of source files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# I wrote this script to change the encoding of a large | |
# source code repository with inconsistent file encoding | |
SOURCE_DIR=$1 | |
FILE_MASK="*.$2" | |
DESTINATION_ROOT=$3 | |
if [ $# -ne 3 ]; then | |
echo "3 args needed SOURCE_DIR, FILE_MASK, DESTINATION_ROOT" | |
exit; | |
fi | |
for SOURCE_FILE in `find $SOURCE_DIR -iname "$FILE_MASK"`; do | |
ENCODING=`file -bi $SOURCE_FILE` | |
ENCODING=${ENCODING##*=} | |
# The windows CP1252 encoded files show up as unknown-8bit | |
if [ "$ENCODING" == "unknown-8bit" ]; then | |
ENCODING='CP1252' | |
fi | |
DESTINATION="$DESTINATION_ROOT/$SOURCE_FILE" | |
DESTINATION_DIR=$DESTINATION_ROOT/`dirname $SOURCE_FILE` | |
if [ ! -d "$DESTINATION_DIR" ]; then | |
mkdir -p $DESTINATION_DIR | |
fi | |
echo "converting $SOURCE_FILE from $ENCODING to $DESTINATION" | |
iconv -f $ENCODING -t UTF-8 $SOURCE_FILE > $DESTINATION | |
# echo ">> $DESTINATION is encoded: `file -bi $DESTINATION`" | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment