Skip to content

Instantly share code, notes, and snippets.

@un33k
Created July 11, 2014 20:49
Show Gist options
  • Save un33k/eb788819f5bb42afbfc8 to your computer and use it in GitHub Desktop.
Save un33k/eb788819f5bb42afbfc8 to your computer and use it in GitHub Desktop.
Open files of different encoding type automatically.
import codecs
# pip install chardet
file_location = './sometestfile.log'
from chardet.universaldetector import UniversalDetector
def get_file_encoding_type(filename):
"""
Return a string describing the probable encoding of a file
"""
encoding = None
with open(filename, 'rb') as f:
u = UniversalDetector()
for line in f:
u.feed(line)
u.close()
if u.result['encoding']:
encoding = u.result['encoding']
return encoding
encoding = get_file_encoding_type(file_location)
if encoding is not None:
with codecs.open(file_location, 'r', encoding) as f:
while True:
try:
line = f.next()
except UnicodeDecodeError:
print "skipped"
continue
except StopIteration:
break
# check if string is found in line
print line
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment