Last active
December 15, 2015 10:09
-
-
Save mitmul/5243518 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "gnuplot" | |
require "narray" | |
def draw_chart(x, y) | |
Gnuplot.open do |gp| | |
Gnuplot::Plot.new(gp) do |plot| | |
y.each do |name, value| | |
if x.size == value.size | |
plot.data << Gnuplot::DataSet.new([x, value]) do |ds| | |
ds.with = "lines" | |
ds.title = name | |
end | |
end | |
end | |
end | |
end | |
end | |
def nrand | |
Math.sqrt(-2 * Math.log(rand)) * Math.cos(2 * Math::PI * rand) | |
end | |
def result(rate) | |
rand <= rate ? 0 : 1 | |
end | |
def choose(a_rate) | |
rand <= a_rate ? 0 : 1 | |
end | |
def automaton_RP(try, exp, a, b, alpha) | |
# 行動の成功確率 | |
a_rate = a | |
b_rate = b | |
# 最適な行動 | |
correct = a_rate >= b_rate ? 0 : 1 | |
# 行動の選択確率 | |
a_choice_prob = 0.5 | |
# 最適行動選択数 | |
correct_choice = 0 | |
# 最適行動選択確率履歴 | |
path = [] | |
try.times do |t| | |
choice = choose(a_choice_prob) | |
correct_choice += 1 if choice == correct | |
path << (correct_choice.to_f / (t + 1).to_f) | |
# 成功:0 失敗:1 | |
reward = | |
if choice == 0 | |
# Aを選んだ | |
result(a_rate) | |
else | |
# Bを選んだ | |
result(b_rate) | |
end | |
# 選択確率更新 | |
# prob = choice == 0 ? a_choice_prob : 1 - a_choice_prob | |
a_choice_prob += alpha * (-1)**choice * (-1)**reward * (1 - a_choice_prob) | |
end | |
path | |
end | |
def automaton_RI(try, exp, a, b, alpha) | |
# 行動の成功確率 | |
a_rate = a | |
b_rate = b | |
# 最適な行動 | |
correct = a_rate >= b_rate ? 0 : 1 | |
# 行動の選択確率 | |
a_choice_prob = 0.5 | |
# 最適行動選択数 | |
correct_choice = 0 | |
# 最適行動選択確率履歴 | |
path = [] | |
try.times do |t| | |
choice = choose(a_choice_prob) | |
correct_choice += 1 if choice == correct | |
path << (correct_choice.to_f / (t + 1).to_f) | |
# 成功:0 失敗:1 | |
reward = | |
if choice == 0 | |
# Aを選んだ | |
result(a_rate) | |
else | |
# Bを選んだ | |
result(b_rate) | |
end | |
# 選択確率更新 | |
prob = choice == 0 ? a_choice_prob : 1 - a_choice_prob | |
if reward == 0 | |
a_choice_prob += alpha * (-1)**choice * (1 - prob) | |
end | |
end | |
path | |
end | |
def exp | |
try = 500 | |
exp = 2000 | |
# 結果格納用 | |
automaton_RP_path = automaton_RI_path = NVector.float(try) | |
# 実験 | |
exp.times do |i| | |
automaton_RP_path += NVector.to_na(automaton_RP(try, i, 0.8, 0.9, 0.1)) | |
automaton_RI_path += NVector.to_na(automaton_RI(try, i, 0.8, 0.9, 0.1)) | |
end | |
automaton_RP_path = (automaton_RP_path / exp.to_f).to_a | |
automaton_RI_path = (automaton_RI_path / exp.to_f).to_a | |
draw_chart(NArray[0..try-1].to_a, {"Learning Automaton (RP)" => automaton_RP_path, | |
"Learning Automaton (RI)" => automaton_RI_path}) | |
end | |
exp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment