Skip to content

Instantly share code, notes, and snippets.

@hideaki-t
Created September 1, 2011 12:44
Show Gist options
  • Save hideaki-t/1186089 to your computer and use it in GitHub Desktop.
Save hideaki-t/1186089 to your computer and use it in GitHub Desktop.
igo-python testing tool
import lxml.html
import sys
if sys.version_info[0] < 3:
import codecs
sys.stdout = codecs.lookup('utf-8').streamwriter(sys.stdout)
else:
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
target = sys.argv[1] if len(sys.argv) > 1 else sys.stdin
r = lxml.html.parse(target).getroot()
print(r.text_content())
import sys
import igo
if sys.version_info[0] < 3:
u = lambda s: s.decode('utf-8')
import codecs
sys.stdout = codecs.lookup('utf-8').streamwriter(sys.stdout)
sys.stdin = codecs.lookup('utf-8').streamreader(sys.stdin)
else:
import io
u = str
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def pp(sf, ft, st):
sys.stdout.write(u("%s\t%s\n") % (sf, ft))
tagger = igo.tagger.Tagger('ipadic')
for l in sys.stdin:
for m in tagger.parse(l):
pp(m.surface, m.feature, m.start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment