Created
September 4, 2014 05:04
-
-
Save rimms/b0a192112df745adfa88 to your computer and use it in GitHub Desktop.
NN_with_idf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"converter" : { | |
"string_filter_types": {}, | |
"string_filter_rules":[], | |
"num_filter_types": {}, | |
"num_filter_rules": [], | |
"string_types": { | |
"unigram": { "method": "ngram", "char_num": "1" } | |
}, | |
"string_rules":[ | |
{"key" : "*", "type" : "unigram", "sample_weight":"bin", "global_weight" : "bin"} | |
], | |
"num_types": {}, | |
"num_rules": [] | |
}, | |
"parameter" : { | |
"hash_num" : 64 | |
}, | |
"method": "euclid_lsh" | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"converter" : { | |
"string_filter_types": {}, | |
"string_filter_rules":[], | |
"num_filter_types": {}, | |
"num_filter_rules": [], | |
"string_types": { | |
"unigram": { "method": "ngram", "char_num": "1" } | |
}, | |
"string_rules":[ | |
{"key" : "*", "type" : "unigram", "sample_weight":"bin", "global_weight" : "idf"} | |
], | |
"num_types": {}, | |
"num_rules": [] | |
}, | |
"parameter" : { | |
"hash_num" : 64 | |
}, | |
"method": "euclid_lsh" | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ jubanearest_neighbor -f bin.json & | |
$ ./nn_shogn.py | |
[id_with_score{id: 徳川12, score: 1.27169287205}, id_with_score{id: 足利10, score: 1.57138991356}, id_with_score{id: 足利12, score: 1.57138991356}, id_with_score{id: 北条11, score: 1.68489217758}, id_with_score{id: 足利02, score: 1.79433512688}, id_with_score{id: 足利07, score: 1.79433512688}, id_with_score{id: 北条06, score: 1.79433512688}, id_with_score{id: 徳川02, score: 1.84745168686}, id_with_score{id: 足利08, score: 1.84745168686}, id_with_score{id: 北条09, score: 1.84745168686}] | |
徳川02 慶喜 | |
[id_with_score{id: 足利03, score: 1.27169287205}, id_with_score{id: 足利13, score: 1.27169287205}, id_with_score{id: 足利14, score: 1.27169287205}, id_with_score{id: 足利15, score: 1.33331120014}, id_with_score{id: 足利04, score: 1.39412653446}, id_with_score{id: 足利06, score: 1.39412653446}, id_with_score{id: 足利09, score: 1.39412653446}, id_with_score{id: 足利11, score: 1.39412653446}, id_with_score{id: 足利07, score: 1.45410203934}, id_with_score{id: 徳川09, score: 1.57138991356}] | |
徳川09 義昭 | |
[id_with_score{id: 北条14, score: 1.01793670654}, id_with_score{id: 北条12, score: 1.20930838585}, id_with_score{id: 北条03, score: 1.27169287205}, id_with_score{id: 北条08, score: 1.27169287205}, id_with_score{id: 北条09, score: 1.27169287205}, id_with_score{id: 北条10, score: 1.27169287205}, id_with_score{id: 北条13, score: 1.27169287205}, id_with_score{id: 北条01, score: 1.33331120014}, id_with_score{id: 北条02, score: 1.33331120014}, id_with_score{id: 北条04, score: 1.33331120014}] | |
北条01 守時 | |
$ jubanearest_neighbor -f idf.json & | |
$ ./nn_shogn.py | |
[id_with_score{id: 徳川01, score: 0.0}, id_with_score{id: 徳川02, score: 0.0}, id_with_score{id: 徳川03, score: 0.0}, id_with_score{id: 徳川04, score: 0.0}, id_with_score{id: 徳川05, score: 0.0}, id_with_score{id: 徳川06, score: 0.0}, id_with_score{id: 徳川07, score: 0.0}, id_with_score{id: 徳川08, score: 0.0}, id_with_score{id: 徳川09, score: 0.0}, id_with_score{id: 徳川10, score: 0.0}] | |
徳川01 慶喜 | |
[id_with_score{id: 徳川01, score: 0.0}, id_with_score{id: 徳川02, score: 0.0}, id_with_score{id: 徳川03, score: 0.0}, id_with_score{id: 徳川04, score: 0.0}, id_with_score{id: 徳川05, score: 0.0}, id_with_score{id: 徳川06, score: 0.0}, id_with_score{id: 徳川07, score: 0.0}, id_with_score{id: 徳川08, score: 0.0}, id_with_score{id: 徳川09, score: 0.0}, id_with_score{id: 徳川10, score: 0.0}] | |
徳川01 義昭 | |
[id_with_score{id: 徳川01, score: 0.0}, id_with_score{id: 徳川02, score: 0.0}, id_with_score{id: 徳川03, score: 0.0}, id_with_score{id: 徳川04, score: 0.0}, id_with_score{id: 徳川05, score: 0.0}, id_with_score{id: 徳川06, score: 0.0}, id_with_score{id: 徳川07, score: 0.0}, id_with_score{id: 徳川08, score: 0.0}, id_with_score{id: 徳川09, score: 0.0}, id_with_score{id: 徳川10, score: 0.0}] | |
徳川01 守時 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
host = '127.0.0.1' | |
port = 9199 | |
name = 'test' | |
import sys | |
import json | |
import random | |
import jubatus | |
from jubatus.common import Datum | |
def train(client): | |
client.set_row(u'徳川01', Datum({'name': u'家康'})) | |
client.set_row(u'徳川02', Datum({'name': u'秀忠'})) | |
client.set_row(u'徳川03', Datum({'name': u'家光'})) | |
client.set_row(u'徳川04', Datum({'name': u'家綱'})) | |
client.set_row(u'徳川05', Datum({'name': u'綱吉'})) | |
client.set_row(u'徳川06', Datum({'name': u'家宣'})) | |
client.set_row(u'徳川07', Datum({'name': u'家継'})) | |
client.set_row(u'徳川08', Datum({'name': u'吉宗'})) | |
client.set_row(u'徳川09', Datum({'name': u'家重'})) | |
client.set_row(u'徳川10', Datum({'name': u'家治'})) | |
client.set_row(u'徳川11', Datum({'name': u'家斉'})) | |
client.set_row(u'徳川12', Datum({'name': u'家慶'})) | |
client.set_row(u'徳川13', Datum({'name': u'家定'})) | |
client.set_row(u'徳川14', Datum({'name': u'家茂'})) | |
client.set_row(u'足利01', Datum({'name': u'尊氏'})) | |
client.set_row(u'足利02', Datum({'name': u'義詮'})) | |
client.set_row(u'足利03', Datum({'name': u'義満'})) | |
client.set_row(u'足利04', Datum({'name': u'義持'})) | |
client.set_row(u'足利05', Datum({'name': u'義量'})) | |
client.set_row(u'足利06', Datum({'name': u'義教'})) | |
client.set_row(u'足利07', Datum({'name': u'義勝'})) | |
client.set_row(u'足利08', Datum({'name': u'義政'})) | |
client.set_row(u'足利09', Datum({'name': u'義尚'})) | |
client.set_row(u'足利10', Datum({'name': u'義稙'})) | |
client.set_row(u'足利11', Datum({'name': u'義澄'})) | |
client.set_row(u'足利12', Datum({'name': u'義稙'})) | |
client.set_row(u'足利13', Datum({'name': u'義晴'})) | |
client.set_row(u'足利14', Datum({'name': u'義輝'})) | |
client.set_row(u'足利15', Datum({'name': u'義栄'})) | |
client.set_row(u'北条01', Datum({'name': u'時政'})) | |
client.set_row(u'北条02', Datum({'name': u'義時'})) | |
client.set_row(u'北条03', Datum({'name': u'泰時'})) | |
client.set_row(u'北条04', Datum({'name': u'経時'})) | |
client.set_row(u'北条05', Datum({'name': u'時頼'})) | |
client.set_row(u'北条06', Datum({'name': u'長時'})) | |
client.set_row(u'北条07', Datum({'name': u'政村'})) | |
client.set_row(u'北条08', Datum({'name': u'時宗'})) | |
client.set_row(u'北条09', Datum({'name': u'貞時'})) | |
client.set_row(u'北条10', Datum({'name': u'師時'})) | |
client.set_row(u'北条11', Datum({'name': u'宗宣'})) | |
client.set_row(u'北条12', Datum({'name': u'煕時'})) | |
client.set_row(u'北条13', Datum({'name': u'基時'})) | |
client.set_row(u'北条14', Datum({'name': u'高時'})) | |
client.set_row(u'北条15', Datum({'name': u'貞顕'})) | |
def predict(client): | |
# predict the last shogun | |
data = [ | |
Datum({'name': u'慶喜'}), | |
Datum({'name': u'義昭'}), | |
Datum({'name': u'守時'}), | |
] | |
for d in data: | |
res = client.neighbor_row_from_datum(d, 10) | |
print res | |
# get the predicted shogun name | |
sys.stdout.write(max(res, key = lambda x: x.score).id) | |
sys.stdout.write(' ') | |
sys.stdout.write(d.string_values[0][1].encode('utf-8')) | |
sys.stdout.write('\n') | |
if __name__ == '__main__': | |
# connect to the jubatus | |
client = jubatus.NearestNeighbor(host, port, name) | |
# run example | |
train(client) | |
predict(client) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment