Created
March 26, 2013 08:59
-
-
Save mitmul/5244017 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require "gnuplot" | |
| require "narray" | |
| def draw_chart(x, y) | |
| Gnuplot.open do |gp| | |
| Gnuplot::Plot.new(gp) do |plot| | |
| y.each do |name, value| | |
| if x.size == value.size | |
| plot.data << Gnuplot::DataSet.new([x, value]) do |ds| | |
| ds.with = "lines" | |
| ds.title = name | |
| end | |
| end | |
| end | |
| end | |
| end | |
| end | |
| def make_hist(prob_array) | |
| hist = Array.new(100, 0) | |
| prob_array[0].each do |prob| | |
| hist[prob*10.to_i + 50] += 1 | |
| end | |
| draw_chart(NArray[0..hist.size-1].to_a, {"Normal Distribution!!!!" => hist}) | |
| end | |
| def nrand | |
| 12.times.inject(0){|a, i| a += rand} - 6.0 | |
| end | |
| def greedy(try, reward_array, exp) | |
| q_t_a = Array.new(10, 0.0) | |
| r_sum = Array.new(10, 0.0) | |
| result = 0.0 | |
| choices = Array.new(10, 0) | |
| path = [] | |
| # 真に最適な手 | |
| exp_reward_array = [] | |
| (0..9).each do |i| | |
| exp_reward_array << reward_array[i][exp] | |
| end | |
| opt_choice = NVector[exp_reward_array].flatten.sort_index[-1] | |
| opt_select = 0 | |
| opt_path = [] | |
| try.times do |t| | |
| choice = NVector[q_t_a].flatten.sort_index[-1] | |
| opt_select += 1 if choice == opt_choice | |
| choices[choice] += 1 | |
| reward = nrand + reward_array[choice][exp] | |
| r_sum[choice] += reward | |
| result += reward | |
| q_t_a[choice] = r_sum[choice] / choices[choice] | |
| path << reward | |
| opt_path << opt_select.to_f / t.to_f | |
| end | |
| [path, opt_path] | |
| end | |
| def epsilon_greedy(try, reward_array, exp, epsilon) | |
| q_t_a = Array.new(10, 5.0) | |
| r_sum = Array.new(10, 0.0) | |
| result = 0.0 | |
| choices = Array.new(10, 0) | |
| path = [] | |
| # 真に最適な手 | |
| exp_reward_array = [] | |
| (0..9).each do |i| | |
| exp_reward_array << reward_array[i][exp] | |
| end | |
| opt_choice = NVector[exp_reward_array].flatten.sort_index[-1] | |
| opt_select = 0 | |
| opt_path = [] | |
| try.times do |t| | |
| choice = 0 | |
| if rand <= epsilon ? true : false | |
| choice = rand(10) | |
| else | |
| choice = NVector[q_t_a].flatten.sort_index[-1] | |
| end | |
| opt_select += 1 if choice == opt_choice | |
| choices[choice] += 1 | |
| reward = nrand + reward_array[choice][exp] | |
| r_sum[choice] += reward | |
| result += reward | |
| q_t_a[choice] = r_sum[choice] / choices[choice] | |
| path << reward | |
| opt_path << opt_select.to_f / t.to_f | |
| end | |
| [path, opt_path] | |
| end | |
| def get_occurrence_id(distribution) | |
| occur_id = rand(distribution.size) | |
| prob_sum = 0.0 | |
| r = rand | |
| distribution.each_with_index do |d, i| | |
| if prob_sum <= r && r < prob_sum + d | |
| occur_id = i | |
| break | |
| end | |
| prob_sum += d | |
| end | |
| occur_id | |
| end | |
| def softmax(try, reward_array, exp, tau) | |
| q_t_a = Array.new(10, 0.0) | |
| r_sum = Array.new(10, 0.0) | |
| result = 0.0 | |
| choices = Array.new(10, 0) | |
| path = [] | |
| # 真に最適な手 | |
| exp_reward_array = [] | |
| (0..9).each do |i| | |
| exp_reward_array << reward_array[i][exp] | |
| end | |
| opt_choice = NVector[exp_reward_array].flatten.sort_index[-1] | |
| opt_select = 0 | |
| opt_path = [] | |
| choice_prob = [] | |
| try.times do |t| | |
| # 各行動の選択確率(Boltzmann分布) | |
| denominator = q_t_a.inject(0){|sum, qta| sum += Math.exp(qta / tau)} | |
| if denominator.infinite? | |
| q_t_a.each_with_index do |qta, i| | |
| choice_prob[i] = 0 | |
| end | |
| else | |
| q_t_a.each_with_index do |qta, i| | |
| numerator = Math.exp(qta / tau) | |
| choice_prob[i] = numerator / denominator | |
| end | |
| end | |
| # 選択確率に従って行動を選択 | |
| choice = get_occurrence_id(choice_prob) | |
| opt_select += 1 if choice == opt_choice | |
| choices[choice] += 1 | |
| reward = nrand + reward_array[choice][exp] | |
| r_sum[choice] += reward | |
| result += reward | |
| q_t_a[choice] = r_sum[choice] / choices[choice] | |
| path << reward | |
| opt_path << opt_select.to_f / t.to_f | |
| end | |
| [path, opt_path] | |
| end | |
| def greedy_experiment | |
| try = 1000 | |
| exp = 2000 | |
| # 問題作成 | |
| reward_array = [] | |
| 10.times do | |
| reward_array << exp.times.inject([]){|a, r| a << 12.times.inject(0){|a, i| a += rand} - 6.0} | |
| end | |
| # 正解率の推移 | |
| greedy_path = ep_greedy_path = ep2_greedy_path = NVector.float(try) | |
| # 最適度の推移 | |
| greedy_opt_path = ep_greedy_opt_path = ep2_greedy_opt_path = NVector.float(try) | |
| # 実験 | |
| exp.times do |i| | |
| greedy_result = epsilon_greedy(try, reward_array, i, 0.0) | |
| greedy_path += NVector.to_na(greedy_result[0]) | |
| greedy_opt_path += NVector.to_na(greedy_result[1]) | |
| ep_greedy_result = epsilon_greedy(try, reward_array, i, 0.01) | |
| ep_greedy_path += NVector.to_na(ep_greedy_result[0]) | |
| ep_greedy_opt_path += NVector.to_na(ep_greedy_result[1]) | |
| ep2_greedy_result = epsilon_greedy(try, reward_array, i, 0.1) | |
| ep2_greedy_path += NVector.to_na(ep2_greedy_result[0]) | |
| ep2_greedy_opt_path += NVector.to_na(ep2_greedy_result[1]) | |
| end | |
| greedy_path = (greedy_path / exp).to_a | |
| greedy_opt_path = (greedy_opt_path / exp).to_a | |
| ep_greedy_path = (ep_greedy_path / exp).to_a | |
| ep_greedy_opt_path = (ep_greedy_opt_path / exp).to_a | |
| ep2_greedy_path = (ep2_greedy_path / exp).to_a | |
| ep2_greedy_opt_path = (ep2_greedy_opt_path / exp).to_a | |
| draw_chart(NArray[0..try-1].to_a, {"greedy" => greedy_path, | |
| "epsilon(=0.01) greedy" => ep_greedy_path, | |
| "epsilon(=0.1) greedy" => ep2_greedy_path}) | |
| draw_chart(NArray[0..try-1].to_a, {"greedy" => greedy_opt_path, | |
| "epsilon(=0.01) greedy" => ep_greedy_opt_path, | |
| "epsilon(=0.1) greedy" => ep2_greedy_opt_path}) | |
| end | |
| def softmax_experiment | |
| try = 1000 | |
| exp = 2000 | |
| # 問題作成 | |
| reward_array = [] | |
| 10.times do | |
| reward_array << exp.times.inject([]){|a, r| a << 12.times.inject(0){|a, i| a += rand} - 6.0} | |
| end | |
| softmax_path_a = softmax_path_b = softmax_path_c = NVector.float(try) | |
| softmax_opt_path_a = softmax_opt_path_b = softmax_opt_path_c = NVector.float(try) | |
| # 実験 | |
| exp.times do |i| | |
| softmax_result_a = softmax(try, reward_array, i, 0.1) | |
| softmax_path_a += NVector.to_na(softmax_result_a[0]) | |
| softmax_opt_path_a += NVector.to_na(softmax_result_a[1]) | |
| softmax_result_b = softmax(try, reward_array, i, 0.5) | |
| softmax_path_b += NVector.to_na(softmax_result_b[0]) | |
| softmax_opt_path_b += NVector.to_na(softmax_result_b[1]) | |
| softmax_result_c = softmax(try, reward_array, i, 1) | |
| softmax_path_c += NVector.to_na(softmax_result_c[0]) | |
| softmax_opt_path_c += NVector.to_na(softmax_result_c[1]) | |
| end | |
| softmax_path_a = (softmax_path_a / exp).to_a | |
| softmax_opt_path_a = (softmax_opt_path_a / exp).to_a | |
| softmax_path_b = (softmax_path_b / exp).to_a | |
| softmax_opt_path_b = (softmax_opt_path_b / exp).to_a | |
| softmax_path_c = (softmax_path_c / exp).to_a | |
| softmax_opt_path_c = (softmax_opt_path_c / exp).to_a | |
| draw_chart(NArray[0..try-1].to_a, {"softmax(tau=0.1)" => softmax_path_a, | |
| "softmax(tau=0.5)" => softmax_path_b, | |
| "softmax(tau=1)" => softmax_path_c}) | |
| draw_chart(NArray[0..try-1].to_a, {"softmax(tau=0.1)" => softmax_opt_path_a, | |
| "softmax(tau=0.5)" => softmax_opt_path_b, | |
| "softmax(tau=1)" => softmax_opt_path_c}) | |
| end | |
| def greedy_softmax_experiment | |
| try = 1000 | |
| exp = 2000 | |
| # 問題作成 | |
| reward_array = [] | |
| 10.times do | |
| reward_array << exp.times.inject([]){|a, r| a << nrand} | |
| end | |
| greedy_path = ep_greedy_path = ep2_greedy_path = NVector.float(try) | |
| greedy_opt_path = ep_greedy_opt_path = ep2_greedy_opt_path = NVector.float(try) | |
| softmax_path_a = softmax_path_b = softmax_path_c = NVector.float(try) | |
| softmax_opt_path_a = softmax_opt_path_b = softmax_opt_path_c = NVector.float(try) | |
| # 実験 | |
| exp.times do |i| | |
| greedy_result = epsilon_greedy(try, reward_array, i, 0.0) | |
| greedy_path += NVector.to_na(greedy_result[0]) | |
| greedy_opt_path += NVector.to_na(greedy_result[1]) | |
| ep_greedy_result = epsilon_greedy(try, reward_array, i, 0.01) | |
| ep_greedy_path += NVector.to_na(ep_greedy_result[0]) | |
| ep_greedy_opt_path += NVector.to_na(ep_greedy_result[1]) | |
| ep2_greedy_result = epsilon_greedy(try, reward_array, i, 0.1) | |
| ep2_greedy_path += NVector.to_na(ep2_greedy_result[0]) | |
| ep2_greedy_opt_path += NVector.to_na(ep2_greedy_result[1]) | |
| softmax_result_a = softmax(try, reward_array, i, 0.1) | |
| softmax_path_a += NVector.to_na(softmax_result_a[0]) | |
| softmax_opt_path_a += NVector.to_na(softmax_result_a[1]) | |
| softmax_result_b = softmax(try, reward_array, i, 0.5) | |
| softmax_path_b += NVector.to_na(softmax_result_b[0]) | |
| softmax_opt_path_b += NVector.to_na(softmax_result_b[1]) | |
| softmax_result_c = softmax(try, reward_array, i, 1) | |
| softmax_path_c += NVector.to_na(softmax_result_c[0]) | |
| softmax_opt_path_c += NVector.to_na(softmax_result_c[1]) | |
| end | |
| greedy_path = (greedy_path / exp).to_a | |
| greedy_opt_path = (greedy_opt_path / exp).to_a | |
| ep_greedy_path = (ep_greedy_path / exp).to_a | |
| ep_greedy_opt_path = (ep_greedy_opt_path / exp).to_a | |
| ep2_greedy_path = (ep2_greedy_path / exp).to_a | |
| ep2_greedy_opt_path = (ep2_greedy_opt_path / exp).to_a | |
| softmax_path_a = (softmax_path_a / exp).to_a | |
| softmax_opt_path_a = (softmax_opt_path_a / exp).to_a | |
| softmax_path_b = (softmax_path_b / exp).to_a | |
| softmax_opt_path_b = (softmax_opt_path_b / exp).to_a | |
| softmax_path_c = (softmax_path_c / exp).to_a | |
| softmax_opt_path_c = (softmax_opt_path_c / exp).to_a | |
| draw_chart(NArray[0..try-1].to_a, {"greedy" => greedy_path, | |
| "epsilon(=0.01) greedy" => ep_greedy_path, | |
| "epsilon(=0.1) greedy" => ep2_greedy_path, | |
| "softmax(tau=0.1)" => softmax_path_a, | |
| "softmax(tau=0.5)" => softmax_path_b, | |
| "softmax(tau=1)" => softmax_path_c}) | |
| end | |
| greedy_softmax_experiment |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment