Created
April 15, 2014 19:44
-
-
Save nodtem66/10764705 to your computer and use it in GitHub Desktop.
Encoding Detector in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from chardet.universaldetector import UniversalDetector | |
import os, sys | |
pwd = os.getcwd() | |
files = [] | |
if len(sys.argv) == 1: | |
for (dirpath, dirname, filenames) in os.walk(pwd): | |
files.extend(filenames) | |
break | |
else: | |
files.extend(sys.argv[1:]) | |
for filename in files: | |
if os.path.isfile(filename): | |
file = open(filename, 'r') | |
detector = UniversalDetector() | |
for line in file: | |
detector.feed(line) | |
if detector.done: | |
break | |
result = detector.close() | |
print "{0:>4} {1:>6}\t{2}".format(str(int(result['confidence']*100)) + '%', result['encoding'], filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Installation
pip install chardet
Usage
encode_detector.py
Detect encoding all files in currrent working directoryencode_detector.py file1 file2 file3
Detect encoding of the specific files in arguments