Created
January 8, 2017 14:06
-
-
Save tattyamm/2813c935111e986a801c3d51e48aaae4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def text2sentiment(source, dictionary) | |
score = 0; | |
# sourceのノーマリゼーション?が必要。全角半角とか。特に数字。 | |
# 単語が複数回登場することがあるがその場合の評価は? | |
dictionary.each{ |dic| | |
pattern = dic[:word].gsub(/ /, '.*') #スペースは単純に削除でも良いかもしれない | |
if source.match(/#{pattern}/) | |
p "score = " + dic[:score].to_s + ", match = " + pattern | |
score += dic[:score] | |
end | |
} | |
score | |
end | |
def readDictionary() | |
dictionary = [] | |
#日本語評価極性辞書(用言編) | |
begin | |
File.open('./dic/wago.121808.pn.txt') do |file| | |
file.each_line do |line| | |
label, word = line.chomp.split(/\t/) | |
if word.nil? then | |
next | |
end | |
score = if label[0,2] == "ポジ" | |
1 | |
else | |
-1 | |
end | |
dictionary.push({:score => score, :word => word}) | |
end | |
end | |
rescue SystemCallError => e | |
puts e.message | |
rescue IOError => e | |
puts e.message | |
end | |
#日本語評価極性辞書(名詞編) | |
begin | |
File.open('./dic/pn.csv.m3.120408.trim') do |file| | |
file.each_line do |line| | |
word, label = line.chomp.split(/\t/) | |
if word.nil? then | |
next | |
end | |
score = if label == "p" | |
1 | |
elsif label == "n" | |
-1 | |
else | |
0 | |
end | |
dictionary.push({:score => score, :word => word}) | |
end | |
end | |
rescue SystemCallError => e | |
puts e.message | |
rescue IOError => e | |
puts e.message | |
end | |
dictionary | |
end | |
source = "テスト文字列をここに。" | |
dictionary = readDictionary() | |
score = text2sentiment(source, dictionary) | |
p source | |
p "result = " + score.to_s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment