Skip to content

Instantly share code, notes, and snippets.

@akkijp
Last active January 14, 2016 00:33
Show Gist options
  • Save akkijp/c5ad188ce2581957b1a6 to your computer and use it in GitHub Desktop.
Save akkijp/c5ad188ce2581957b1a6 to your computer and use it in GitHub Desktop.
yahoo apiを使って、文章を日本語形態素解析をおこなうスクリプト
# -*- coding: utf-8 -*-
# from https://github.com/IshitaTakeshi/NaiveBayes/blob/master/src/config.py
import os
from configparser import ConfigParser
class Config(object):
def __init__(self, filename, section):
if not(os.path.exists(filename)):
raise ValueError("{} does not exist".format(filename))
parser = ConfigParser()
parser.read(filename)
config = parser.items(section)
config = dict(config)
for key, item in config.items():
config[key] = eval(item)
#set params as attributes
self.__dict__ = config
# -*- coding: utf-8 -*-
# from https://github.com/IshitaTakeshi/NaiveBayes/blob/master/src/splitter.py
# need `pip3 install beautifulsoup4 lxml`
from pprint import pprint
from urllib.parse import urlencode
from urllib.request import urlopen
from bs4 import BeautifulSoup
from config import Config
config = Config('settings.cfg', 'YAHOO')
pageurl = "http://jlp.yahooapis.jp/MAService/V1/parse"
results = "ma"
filter_ = "1|2|3|4|5|9|10"
def split(sentence):
params = urlencode({'appid': config.appid,
'results': results,
'filter': filter_,
'sentence': sentence})
params = bytes(params, encoding='utf-8')
responce = urlopen(pageurl, params)
# soup = BeautifulSoup(responce.read(), "lxml")
# return [w.surface.string for w in soup.ma_result.word_list]
# OR
# return responce.read().decode("utf-8")
if __name__ == '__main__':
s = "庭にわ二羽の鶏がいる"
pprint(split(""))
[YAHOO]
# obtain from http://developer.yahoo.co.jp/webapi/jlp/ma/v1/parse.html
appid = 'your yahoo app id'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment