Skip to content

Instantly share code, notes, and snippets.

View MCarlomagno's full-sized avatar
🚀

Marcos Carlomagno MCarlomagno

🚀
  • OpenZeppelin
  • Rosario - Argentina
View GitHub Profile
var dispatchAction = function (action) {
var pressed = {},
self = this,
f1 = mk.game.fighters[0],
f2 = mk.game.fighters[1];
// where pressed is a map of type <keyboard_code, boolean>
pressed[keyCode] = true;
var move = self._getMove(pressed, mk.controllers.keys.p1, 0);
self._moveFighter(f1, move);
var playGame = async function () {
var subzeroIndex = 0;
var kanoIndex = 1;
startGame();
var state = getState(subzeroIndex);
// while both fighters keep alive.
while (state.myLife !== 0 && state.opponentLife !== 0) {
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
# agent.load("./save/cartpole-ddqn.h5")
done = False
batch_size = 32
for e in range(EPISODES):
state = env.reset()
var subzeroMemory = [];
var kanoMemory = [];
var subzeroIndex = 0;
var kanoIndex = 1;
var gamma = 0.95; // discount rate
var epsilon = 1.0; // exploration rate
var epsilonMin = 0.01;
var epsilonDecay = 0.99;
var learningRate = 0.001;
var train = async function () {
var done = false;
var state = {};
var time_step = 0;
var batchSize = 32;
var subzeroAction;
var kanoAction;
var subzeroLife = 100;
var kanoLife = 100;
var done = false;
var buildModel = function () {
var model = tf.sequential();
model.add(tf.layers.dense({ units: 24, inputShape: [6], activation: 'relu' })); // input shape === state shape
model.add(tf.layers.dense({ units: 24, activation: 'relu' }));
model.add(tf.layers.dense({ units: 9, activation: 'linear' })); // units === action_size
model.compile({ loss: tf.losses.huberLoss, optimizer: tf.train.adam(learningRate) });
return model
}
var updateTargetModel = function () {
// copy weights from model to target_model
for (let i = 0; i < model.layers.length; i++) {
targetModel.layers[i].setWeights(model.layers[i].getWeights());
}
}
var predictSubzeroAction = function (state) {
if (Math.random() <= epsilon) {
return randomAction(subzeroActions);
}
var inputState = stateToTensor(state, subzeroIndex);
output = model.predict(inputState).dataSync();
return output.indexOf(Math.max(...output));
}
var memorize = function (state, action, reward, nextState, done, memory) {
memory.push({state, action, reward, nextState, done});
}
var underscore = require('underscore');
var replay = async function (batchSize, index) {
var minibatch = [];
if (index === subzeroIndex) {
minibatch = underscore.sample(subzeroMemory, batchSize);
} else {
minibatch = underscore.sample(kanoMemory, batchSize);
}