Last active
October 9, 2015 11:12
-
-
Save KitaitiMakoto/88a2361a849ef162ea3e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
require 'kuromoji' | |
while title = ARGF.gets | |
result = Kuromoji.tokenize(title).to_a | |
puts result.sort {|word1, word2| | |
word1_chars = word1[1].split(',') | |
word2_chars = word2[1].split(',') | |
word1_class = word1_chars.first | |
word2_class = word2_chars.first | |
res = case | |
when word1_class == word2_class | |
-1 | |
when word1_class == '名詞' | |
-1 | |
when word2_class == '名詞' | |
1 | |
when word1_class == '形容詞' | |
-1 | |
when word2_class == '形容詞' | |
1 | |
else | |
-1 | |
end | |
res | |
}.select {|(word, characteristics)| | |
word.match(/\A(?:\w|\p{Hiragana}|\p{Katakana}|\p{Han})+\z/) and | |
characteristics.split(',')[1] != '数' | |
}.take(2).collect {|(word, characteristics)| | |
word | |
}.join("\t") | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment