Last active
August 29, 2015 13:56
-
-
Save edouard-lopez/9208054 to your computer and use it in GitHub Desktop.
Encoding error
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python subtlex-parser.py | |
的: <type 'unicode'> | |
[[]] | |
Traceback (most recent call last): | |
File "subtlex-parser.py", line 32, in <module> | |
app = subtlexParser() # création de l'appli | |
File "subtlex-parser.py", line 17, in __init__ | |
self.convert() | |
File "subtlex-parser.py", line 29, in convert | |
outf.write(line) | |
UnicodeEncodeError: 'ascii' codec can't encode character u'\u7684' in position 0: ordinal not in range(128) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: UTF-8 -*- | |
__author__ = "Édoaurd Lopez" | |
__version__ = "v0.1" | |
__date__ = "2014-02-25" | |
# import sys, os.path | |
from cjklib import characterlookup | |
subtlexIn = "subtlex.csv" | |
subtlexOut = "subtlex-out.csv" | |
class SubtlexParser: | |
def __init__(self): | |
self.cjk = characterlookup.CharacterLookup('T') | |
self.convert() | |
def convert(self): | |
cjk = self.cjk | |
with open(subtlexIn, 'r') as inf: | |
with open(subtlexOut, 'a') as outf: | |
for l in inf: | |
line = l.strip().decode("utf-8") | |
print u'{0}: {1}'.format(line, type(line)) | |
trad = cjk.getCharacterVariants(line, 'C') | |
print u'[{0}]'.format(trad) | |
outf.write(line) | |
if __name__ == "__main__": | |
app = SubtlexParser() # création de l'appli |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
的 | |
我 | |
你 | |
是 | |
了 | |
不 | |
在 | |
他 | |
我们 | |
好 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment