このスクリプトを動作させるには、2つのライブラリのインストールが必要です。
- beautifulsoup4
- lxml
pip3 install beautifulsoup4 lxml
python3 ~
http://qiita.com/IshitaTakeshi/items/dcc8d9271fb14c0a6f58
https://github.com/IshitaTakeshi/NaiveBayes
このスクリプトを動作させるには、2つのライブラリのインストールが必要です。
pip3 install beautifulsoup4 lxml
python3 ~
http://qiita.com/IshitaTakeshi/items/dcc8d9271fb14c0a6f58
https://github.com/IshitaTakeshi/NaiveBayes
# -*- coding: utf-8 -*- | |
# from https://github.com/IshitaTakeshi/NaiveBayes/blob/master/src/config.py | |
import os | |
from configparser import ConfigParser | |
class Config(object): | |
def __init__(self, filename, section): | |
if not(os.path.exists(filename)): | |
raise ValueError("{} does not exist".format(filename)) | |
parser = ConfigParser() | |
parser.read(filename) | |
config = parser.items(section) | |
config = dict(config) | |
for key, item in config.items(): | |
config[key] = eval(item) | |
#set params as attributes | |
self.__dict__ = config |
# -*- coding: utf-8 -*- | |
# from https://github.com/IshitaTakeshi/NaiveBayes/blob/master/src/splitter.py | |
# need `pip3 install beautifulsoup4 lxml` | |
from pprint import pprint | |
from urllib.parse import urlencode | |
from urllib.request import urlopen | |
from bs4 import BeautifulSoup | |
from config import Config | |
config = Config('settings.cfg', 'YAHOO') | |
pageurl = "http://jlp.yahooapis.jp/MAService/V1/parse" | |
results = "ma" | |
filter_ = "1|2|3|4|5|9|10" | |
def split(sentence): | |
params = urlencode({'appid': config.appid, | |
'results': results, | |
'filter': filter_, | |
'sentence': sentence}) | |
params = bytes(params, encoding='utf-8') | |
responce = urlopen(pageurl, params) | |
# soup = BeautifulSoup(responce.read(), "lxml") | |
# return [w.surface.string for w in soup.ma_result.word_list] | |
# OR | |
# return responce.read().decode("utf-8") | |
if __name__ == '__main__': | |
s = "庭にわ二羽の鶏がいる" | |
pprint(split("")) |
[YAHOO] | |
# obtain from http://developer.yahoo.co.jp/webapi/jlp/ma/v1/parse.html | |
appid = 'your yahoo app id' |