MCarlomagno · October 17, 2020 20:14
diff --git a/index.js b/index.js
 var train = async function () {
  var done = false;
  var state = {};
  var time_step = 0;
  var batchSize = 32;
  var subzeroAction;
  var kanoAction;
  var subzeroLife = 100;
  var kanoLife = 100;
  var done = false;
  var episodes = 100;

  setupHiperparams();

  // where each epside is a round
  for (var e = 0; e < episodes; e++) { 
    subzeroLife = 100;
    kanoLife = 100;
    time_step = 0;

    startGame();
    state = getState();

    // until 1000 time steps fighting or one of the fighters won
    while (time_step < 1000 || done) {

      // predicts the best action from the given state.
      subzeroAction = predictSubzeroAction(state);
      kanoAction = predictKanoAction(state);

      // executes the action in the game
      executeSubzeroAction(subzeroAction);
      executeKanoAction(kanoAction);

      // jumps to the next step
      await sleep(200);
      time_step++;

      // gets the updated state
      nextState = getState();

      // calculate rewards from the previous action
      rewardSubzero = kanoLife - nextState.kanoLife;
      rewardKano = subzeroLife - nextState.subzeroLife;

      done = nextState.kanoLife === 0 || nextState.subzeroLife === 0;

      // adds the new secuence to the memory list of each agent
      memorize(state, subzeroAction, rewardSubzero, nextState, done, subzeroMemory);
      memorize(state, kanoAction, rewardKano, nextState, done, kanoMemory);

      // updates the state
      state = nextState;
 
      if (subzeroMemory.length > batchSize || kanoMemory.length > batchSize) { 
        await replay(batchSize, subzeroIndex);
        await replay(batchSize, kanoIndex);
      }  
    }

    // after each round updates the target model
    updateTargetModel();
  }

 }
	var train = async function () {
	var done = false;
	var state = {};
	var time_step = 0;
	var batchSize = 32;
	var subzeroAction;
	var kanoAction;
	var subzeroLife = 100;
	var kanoLife = 100;
	var done = false;
	var episodes = 100;

	setupHiperparams();

	// where each epside is a round
	for (var e = 0; e < episodes; e++) {
	subzeroLife = 100;
	kanoLife = 100;
	time_step = 0;

	startGame();
	state = getState();

	// until 1000 time steps fighting or one of the fighters won
	while (time_step < 1000 \|\| done) {

	// predicts the best action from the given state.
	subzeroAction = predictSubzeroAction(state);
	kanoAction = predictKanoAction(state);

	// executes the action in the game
	executeSubzeroAction(subzeroAction);
	executeKanoAction(kanoAction);

	// jumps to the next step
	await sleep(200);
	time_step++;

	// gets the updated state
	nextState = getState();

	// calculate rewards from the previous action
	rewardSubzero = kanoLife - nextState.kanoLife;
	rewardKano = subzeroLife - nextState.subzeroLife;

	done = nextState.kanoLife === 0 \|\| nextState.subzeroLife === 0;

	// adds the new secuence to the memory list of each agent
	memorize(state, subzeroAction, rewardSubzero, nextState, done, subzeroMemory);
	memorize(state, kanoAction, rewardKano, nextState, done, kanoMemory);

	// updates the state
	state = nextState;

	if (subzeroMemory.length > batchSize \|\| kanoMemory.length > batchSize) {
	await replay(batchSize, subzeroIndex);
	await replay(batchSize, kanoIndex);
	}
	}

	// after each round updates the target model
	updateTargetModel();
	}

	}