Created
December 20, 2021 08:51
-
-
Save ialexpovad/1bfd6144130b3f620de8241040bb87fa to your computer and use it in GitHub Desktop.
#python #encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def encodingfile(Path, NameFile): | |
from chardet.universaldetector import UniversalDetector | |
''' | |
Функция возвращает кодировку указанного файла в строчном формате. | |
:param Path: Директроия, где располагается файл. | |
:param NameFile: Имя файла с расширением (namefile.txt) | |
:return: Кодировка файла ('utf-8') | |
''' | |
enc = UniversalDetector() | |
with open(Path + f'\{NameFile}', 'rb') as flop: | |
for line in flop: | |
enc.feed(line) | |
if enc.done: | |
break | |
enc.close() | |
return enc.result['encoding'] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def PredictEncoding(Path, Namefile, listraw = 10): | |
import chardet | |
with open(Path + f'\{Namefile}', 'rb') as f: | |
# Join binary lines for specified number of lines | |
rawdata = b''.join([f.readline() for _ in range(listraw)]) | |
return chardet.detect(rawdata), rawdata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment