Last active
April 9, 2019 07:23
-
-
Save echan00/ed0d9029c7c75963ec08e1133e090fb6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import jieba | |
from kashgari.tasks.seq_labeling import BLSTMCRFModel | |
from kashgari.corpus import ChinaPeoplesDailyNerCorpus | |
from kashgari.embeddings import BERTEmbedding | |
embedding = BERTEmbedding('/home/eee/sentence-alignment-classification-model/model/multi_cased_L-12_H-768_A-12', 100) | |
train_x, train_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data('train') | |
validate_x, validate_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data('validate') | |
test_x, test_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data('test') | |
model = BLSTMCRFModel(embedding) | |
model.fit(train_x, | |
train_y, | |
validate_y=validate_y, | |
validate_x=validate_x, | |
epochs=200, | |
batch_size=500) | |
model.save('./model') | |
new_model = BLSTMCRFModel.load_model('./model') | |
# EXAMPLE 1 | |
news = "「DeepMind 击败人类职业玩家的方式与他们声称的 AI 使命,以及所声称的『正确』方式完全相反。」" | |
x = list(jieba.cut(news)) | |
new_model.predict(x) | |
>>> x | |
['「', 'DeepMind', ' ', '击败', '人类', '职业', '玩家', '的', '方式', '与', '他们', '声称', '的', ' ', 'AI', ' ', '使命', ',', '以及', '所', '声称', '的', '『', '正确', '』', '方式', '完全', '相反', '。', '」'] | |
>>> new_model.predict(x) | |
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] | |
# EXAMPLE 2 | |
news = "陈志衍是有个非常好的男孩子,他住在香港的九龙塘区,他今年二十三号生日。" | |
x = list(jieba.cut(news)) | |
>>> x | |
['陈志衍', '是', '有', '个', '非常', '好', '的', '男孩子', ',', '他', '住', '在', '香港', '的', '九龙塘', '区', ',', '他', '今年', '二十三', '号', '生日', '。'] | |
>>> new_model.predict(x) | |
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment