Skip to content

Instantly share code, notes, and snippets.

@jerrylususu
Created July 7, 2020 10:08
Show Gist options
  • Save jerrylususu/7678173a46354992f0ba7117bb61a262 to your computer and use it in GitHub Desktop.
Save jerrylususu/7678173a46354992f0ba7117bb61a262 to your computer and use it in GitHub Desktop.
# source:https://gitee.com/wkc/autogbktoutf8/blob/master/autogbktoutf8.py
# requires: chardet
# tested on py3, possibly working on py2
#coding=utf8
from __future__ import print_function
from __future__ import unicode_literals
import os
import os.path
import codecs
import chardet
from chardet.universaldetector import UniversalDetector
import argparse
parser = argparse.ArgumentParser(description='')
parser.add_argument('path',nargs=1)
parser.add_argument('-v',choices=('none','all'),default="none")
args = parser.parse_args()
path = args.path[0]
file_list = []
for root, dirs, files in os.walk(path):
for name in files:
path = os.path.join(root, name)
file_list.append(path)
allow_file_type = ['.'+i for i in 'cpp|c|py|java|txt|cc|go|h'.split('|')]
all_file_type = list(set([os.path.splitext(i)[1] for i in file_list]))
print("all file type:",all_file_type)
print("allow file type:",allow_file_type)
print("ignore file type:",list(set(all_file_type)-set(allow_file_type)))
detector = UniversalDetector()
def check(name):
if os.path.splitext(name)[1] not in allow_file_type:
if args.v=='all':print("ignore",name)
return
detector.reset()
for line in open(name,'rb'):
detector.feed(line)
if detector.done:break
detector.close()
if args.v=='all':print(name,detector.result)
if detector.result['encoding'] in ['Big5','GB2312','GB18030','EUC-TW', 'HZ-GB-2312', 'ISO-2022-CN']:
print("start ",name,end=' ')
gbk_to_utf8(name)
print("success")
def gbk_to_utf8(name):
data = codecs.open(name,'r','gbk').read()
codecs.open(name,'w','utf8').write(data)
for i in file_list:
try:
check(i)
except Exception as e:
print(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment