Skip to content

Instantly share code, notes, and snippets.

@babo
Last active January 19, 2017 22:13
Show Gist options
  • Save babo/908c595f2db1bd696a4ba688f4f5df54 to your computer and use it in GitHub Desktop.
Save babo/908c595f2db1bd696a4ba688f4f5df54 to your computer and use it in GitHub Desktop.
//<![CDATA[
// a few things don't have var in front of them - they update already existing variables the game needs
// Input is 7 lanes, security is 5 slots back and 6 ahead
lanesSide = 2;
patchesAhead = 11;
patchesBehind = 6;
trainIterations = 10000;
var num_inputs = (lanesSide * 2 + 1) * (patchesAhead + patchesBehind);
var num_actions = 5;
var temporal_window = 192;
var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs;
var layer_defs = [];
layer_defs.push({type: 'input', out_sx: 1, out_sy: 1, out_depth: network_size});
layer_defs.push({type: 'fc', num_neurons: 170, activation: 'relu' });
layer_defs.push({type: 'fc', num_neurons: 75, activation: 'relu' });
layer_defs.push({type: 'fc', num_neurons: 30, activation: 'relu' });
layer_defs.push({type: 'regression', num_neurons: num_actions });
var tdtrainer_options = {
learning_rate: 0.001,
momentum: 0.0,
batch_size: 64,
l2_decay: 0.01
};
var opt = {};
opt.temporal_window = temporal_window;
opt.experience_size = 6000;
opt.start_learn_threshold = 500;
opt.gamma = 0.7;
opt.learning_steps_burnin = 5000;
opt.epsilon_min = 0.05;
opt.epsilon_test_time = 0.01;
opt.layer_defs = layer_defs;
opt.random_action_distribution = [0.2, 0.25, 0.05, 0.25, 0.25];
opt.tdtrainer_options = tdtrainer_options;
brain = new deepqlearn.Brain(num_inputs, num_actions, opt);
learn = function (state, lastReward) {
brain.backward(lastReward);
var action = brain.forward(state);
draw_net();
draw_stats();
return action;
}
//]]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment