Skip to content

Instantly share code, notes, and snippets.

@MCarlomagno
Created October 17, 2020 20:14
Show Gist options
  • Save MCarlomagno/88029943e122038b3276824880acbc98 to your computer and use it in GitHub Desktop.
Save MCarlomagno/88029943e122038b3276824880acbc98 to your computer and use it in GitHub Desktop.
var train = async function () {
var done = false;
var state = {};
var time_step = 0;
var batchSize = 32;
var subzeroAction;
var kanoAction;
var subzeroLife = 100;
var kanoLife = 100;
var done = false;
var episodes = 100;
setupHiperparams();
// where each epside is a round
for (var e = 0; e < episodes; e++) {
subzeroLife = 100;
kanoLife = 100;
time_step = 0;
startGame();
state = getState();
// until 1000 time steps fighting or one of the fighters won
while (time_step < 1000 || done) {
// predicts the best action from the given state.
subzeroAction = predictSubzeroAction(state);
kanoAction = predictKanoAction(state);
// executes the action in the game
executeSubzeroAction(subzeroAction);
executeKanoAction(kanoAction);
// jumps to the next step
await sleep(200);
time_step++;
// gets the updated state
nextState = getState();
// calculate rewards from the previous action
rewardSubzero = kanoLife - nextState.kanoLife;
rewardKano = subzeroLife - nextState.subzeroLife;
done = nextState.kanoLife === 0 || nextState.subzeroLife === 0;
// adds the new secuence to the memory list of each agent
memorize(state, subzeroAction, rewardSubzero, nextState, done, subzeroMemory);
memorize(state, kanoAction, rewardKano, nextState, done, kanoMemory);
// updates the state
state = nextState;
if (subzeroMemory.length > batchSize || kanoMemory.length > batchSize) {
await replay(batchSize, subzeroIndex);
await replay(batchSize, kanoIndex);
}
}
// after each round updates the target model
updateTargetModel();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment