Created
May 30, 2019 06:29
-
-
Save yeiichi/04f1eb6b1fe634c49401bddb5700a556 to your computer and use it in GitHub Desktop.
Detects encoding of a txt or csv file and converts it to utf-8.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#_*_ coding: utf-8 _* | |
import chardet | |
try: | |
input_file_name = input('File name?(txt or csv) ') | |
with open(input_file_name,'rb') as f_bin: | |
detected_encoding = chardet.detect(f_bin.read()) | |
print(' Detected: '+detected_encoding['encoding']) | |
with open(input_file_name, encoding=detected_encoding['encoding']) as f_utf8: | |
read_str = f_utf8.read() | |
with open(input_file_name[:len(input_file_name)-4]+'_conv.'+ | |
input_file_name[-3:],'w') as f_out: | |
f_out.write(read_str) | |
print(' Created: \''+input_file_name[:len(input_file_name)-4]+'_conv.'+ | |
input_file_name[-3:]) | |
except FileNotFoundError: | |
print('FileNotFoundError') | |
except UnicodeDecodeError: | |
print('UnicodeDecodeError') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment