Created
September 3, 2012 03:13
-
-
Save tabris2012/3606494 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@vector.each_with_index do |attrVector, i| | |
attrVector.each do |line| #属性ごとに行を呼び出す | |
chunkFeasible.each do |chunk, value| | |
if line[0] =~ /#{chunk}/ && !chunkTemp[i].include?(chunk) | |
chunkTemp[i][chunk] = value #行にチャンクが含まれて、そのチャンクがまだ存在していなければ追加 | |
end | |
end | |
end | |
end | |
#作成された新チャンクリストの内容検証 | |
chunkUsage = Hash.new #属性間でチャンクが何回使われているか | |
#3回以上使われているものは推定の効果が薄いので破棄 | |
chunkTemp.each do |hashTemp| | |
hashTemp.each_key do |chunk| | |
if chunkUsage.include?(chunk) | |
chunkUsage[chunk] +=1 | |
else #まだ使用回数が登録されていなければ追加 | |
chunkUsage[chunk] =1 | |
end | |
end | |
end | |
chunkUsage.each do |chunk, value| | |
if value >2 #出現回数が3回より多いものは削除 | |
chunkSenseless.write "#{chunk}\n" | |
chunkTemp.each do |hashTemp| | |
hashTemp.delete(chunk) #不要チャンクを削除 | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment