Skip to content

Instantly share code, notes, and snippets.

@yono
Created May 21, 2010 12:21
Show Gist options
  • Save yono/408767 to your computer and use it in GitHub Desktop.
Save yono/408767 to your computer and use it in GitHub Desktop.
Yahoo日本語形態素解析APIを使って文をわかち書きするスクリプト
from xml.parsers.expat import ParserCreate
import yahoowakati
class Wakati(object):
def __init__(self):
self.p = ParserCreate()
self.p.buffer_text = True
self.p.StartElementHandler = self.start_element
self.p.EndElementHandler = self.end_element
self.p.CharacterDataHandler = self.char_data
self.words = []
def start_element(self, name, attrs):
global flag
flag = False
if name == "Surface":
flag = True
def end_element(self, name):
global flag
flag = False
def char_data(self, data):
global flag
if flag:
self.words.append(data)
def parse_text(self, text):
self.p.ParseFile(yahoowakati.get_xml(text))
def get_words(self):
return self.words
if __name__=="__main__":
"""
Usage:
% python wakati.py '今日はとてもいい天気です'
今日
とても
いい
天気
です
"""
import sys
text = sys.argv[1]
w = Wakati()
w.parse_text(text)
words = w.get_words()
for word in words:
print word
def get_xml(text):
url = 'http://jlp.yahooapis.jp/DAService/V1/parse'
appid = "Yahoo API Application ID"
postdata = {
'appid': appid,
'sentence': text
}
params = urllib.urlencode(postdata)
result = urllib.urlopen(url, params)
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment