Last active
April 9, 2016 08:23
-
-
Save ikegami-yukino/13d586338637e83ebbab to your computer and use it in GitHub Desktop.
Pure Python 版オンライン形態素解析ツール Rakuten MA ref: http://qiita.com/yukinoi/items/925bc238185aa2fad8a7
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rakutenma import RakutenMA | |
rma = RakutenMA(phi=1024, c=0.007812) | |
rma.load("model_ja.json") | |
rma.hash_func = rma.create_hash_func(15) | |
print(rma.tokenize("うらにわにはにわにわとりがいる")) | |
print(rma.train_one( | |
[["うらにわ","N-nc"], | |
["に","P-k"], | |
["は","P-rj"], | |
["にわ","N-n"], | |
["にわとり","N-nc"], | |
["が","P-k"], | |
["いる","V-c"]])) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ time python rakutenma_benchmark.py | |
real 0m5.143s | |
user 0m5.026s | |
sys 0m0.041s | |
$ time node rakutenma_benchmark.js | |
real 0m2.386s | |
user 0m2.265s | |
sys 0m0.042s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ time python rakutenma_benchmark.py | |
real 0m3.583s | |
user 0m3.573s | |
sys 0m0.009s | |
$ time node rakutenma_benchmark.js | |
real 0m1.852s | |
user 0m1.831s | |
sys 0m0.027s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ time pypy3 test.py | |
real 0m1.908s | |
user 0m1.859s | |
sys 0m0.042s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var RakutenMA = require('./rakutenma'); | |
var rma = new RakutenMA(); | |
rma.featset = RakutenMA.default_featset_ja; | |
for (var i = 0; i < 1000; i++) { | |
rma.tokenize("もう何も怖くない"); | |
rma.train_one( | |
[["もう","F"], | |
["何","D"], | |
["も","P-rj"], | |
["怖く","A-c"], | |
["ない","X"]]); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from rakutenma import RakutenMA | |
rma = RakutenMA() | |
for i in range(1000): | |
rma.tokenize("もう何も怖くない") | |
rma.train_one( | |
[["もう","F"], | |
["何","D"], | |
["も","P-rj"], | |
["怖く","A-c"], | |
["ない","X"]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment