Created
September 29, 2019 09:14
-
-
Save cocobear/26c2f890981709284fee9047179b9706 to your computer and use it in GitHub Desktop.
批量转换非UTF8的文件到UTF8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
set -e # Exit script immediately on first error. | |
#set -x # Print commands and their arguments as they are executed. | |
NON_UTF_FILE_DIR="wiki" | |
PATTERN_FILE_NAME="*.md" | |
find $NON_UTF_FILE_DIR -type f -name $PATTERN_FILE_NAME > utf8list | |
iconv utf8list > asciilist | |
i=1 | |
for file in $(cat utf8list); do | |
CURRENT_CHARSET="$(chardetect "$file" | awk '{print $2}')" | |
if [ "$CURRENT_CHARSET" == utf-8 ]; then | |
let i++ | |
continue | |
fi | |
if [ "$CURRENT_CHARSET" == GB2312 ]; then | |
CURRENT_CHARSET="GB18030" | |
fi | |
newname=$(head -$i asciilist | tail -1 | tr -d '\n').utf8 | |
echo "converting file ($CURRENT_CHARSET) to utf-8 $file => $newname" | |
#iconv -f ISO-8859-1 -t utf8 $file > $newname | |
iconv -f "$CURRENT_CHARSET" -t utf8 $file > $newname | |
mv $newname $file | |
let i++ | |
done | |
rm utf8list asciilist |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment