Skip to content

Instantly share code, notes, and snippets.

@edouard-lopez
Last active August 29, 2015 13:56
Show Gist options
  • Save edouard-lopez/9208054 to your computer and use it in GitHub Desktop.
Save edouard-lopez/9208054 to your computer and use it in GitHub Desktop.
Encoding error
python subtlex-parser.py
的: <type 'unicode'>
[[]]
Traceback (most recent call last):
File "subtlex-parser.py", line 32, in <module>
app = subtlexParser() # création de l'appli
File "subtlex-parser.py", line 17, in __init__
self.convert()
File "subtlex-parser.py", line 29, in convert
outf.write(line)
UnicodeEncodeError: 'ascii' codec can't encode character u'\u7684' in position 0: ordinal not in range(128)
#!/usr/bin/python
# -*- coding: UTF-8 -*-
__author__ = "Édoaurd Lopez"
__version__ = "v0.1"
__date__ = "2014-02-25"
# import sys, os.path
from cjklib import characterlookup
subtlexIn = "subtlex.csv"
subtlexOut = "subtlex-out.csv"
class SubtlexParser:
def __init__(self):
self.cjk = characterlookup.CharacterLookup('T')
self.convert()
def convert(self):
cjk = self.cjk
with open(subtlexIn, 'r') as inf:
with open(subtlexOut, 'a') as outf:
for l in inf:
line = l.strip().decode("utf-8")
print u'{0}: {1}'.format(line, type(line))
trad = cjk.getCharacterVariants(line, 'C')
print u'[{0}]'.format(trad)
outf.write(line)
if __name__ == "__main__":
app = SubtlexParser() # création de l'appli
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
我们
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment