Created
February 4, 2020 20:22
-
-
Save demircancelebi/649607d0c9c9d485733f5d59134fe990 to your computer and use it in GitHub Desktop.
K-Armed Bandit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const ARMS = 10; | |
const STEPS = 100000; | |
const means = []; | |
const epsilons = [0, 0.01, 0.1]; | |
let rewards = []; | |
let pulls = []; | |
for (let i = 0; i < ARMS; i++) { | |
means.push(Math.random() * 6 - 3); | |
rewards.push(0); | |
pulls.push(0); | |
} | |
function pullArm(arm) { | |
return means[arm] + Math.random() - 0.5; | |
} | |
for (let e = 0; e < epsilons.length; e++) { | |
const eps = epsilons[e]; | |
for (let i = 0; i < STEPS; i++) { | |
const rnd = Math.random(); | |
let arm; | |
// console.log(rnd); | |
if (rnd < eps) { | |
arm = Math.floor(Math.random() * ARMS); | |
} else { | |
arm = chooseBest(); | |
} | |
const reward = pullArm(arm); | |
rewards[arm] += reward; | |
pulls[arm] += 1; | |
} | |
// clear ev | |
console.log(eps); | |
// console.log(pulls); | |
// console.log(rewards); | |
console.log(rewards.reduce((a, b) => a + b)); | |
console.log("----"); | |
rewards = []; | |
pulls = []; | |
for (let i = 0; i < ARMS; i++) { | |
rewards.push(0); | |
pulls.push(0); | |
} | |
} | |
console.log(means); | |
function chooseBest() { | |
let best = -1; | |
let bestResult = -1000000; | |
rewards.forEach((reward, i) => { | |
let result; | |
if (pulls[i] == 0) { | |
result = 0; | |
} else { | |
result = reward/pulls[i]; | |
} | |
// console.log(result); | |
if (result > bestResult) { | |
bestResult = result; | |
best = i; | |
} | |
}); | |
// console.log(best); | |
return best; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment