Last active
April 21, 2016 04:19
-
-
Save chrahunt/881595e87e3e960ad92f to your computer and use it in GitHub Desktop.
Example stabilizer learning bot. Tries to center self on bottom middle tile of OFM map.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name TagPro Stabilizer Bot Trainer | |
// @description Stabilize your ball perfectly on a square. | |
// @version 0.1 | |
// @include http://tagpro-maptest.koalabeast.com:* | |
// @include http://tangent.jukejuice.com:* | |
// @include http://*.newcompte.fr:* | |
// @require https://raw.githubusercontent.com/karpathy/reinforcejs/master/lib/rl.js | |
// @require https://github.com/eligrey/FileSaver.js/raw/master/FileSaver.min.js | |
// @require https://gist.github.com/chrahunt/4843f0258c516882eea0/raw/loopback.user.js | |
// @author snaps_ | |
// @namespace http://www.reddit.com/user/snaps_ | |
// @license MIT | |
// ==/UserScript== | |
(function(window, $) { | |
// This script is not allowed on public servers. Changing the header | |
// above to include them is pointless, as actually using the script on | |
// a public server will get you caught and banned. | |
// Settings. | |
var HOTKEY_START= 69; // e | |
var HOTKEY_STOP = 81; // q | |
// End settings. | |
// Override timer functions. | |
var setTimeout = window.setTimeout, | |
clearTimeout = window.clearTimeout, | |
setInterval = window.setInterval, | |
clearInterval = window.clearInterval; | |
// Physics step-size. | |
var STEP = (1 / 60); | |
// Threshold for distances. | |
var EPS = (1 / 1e4); | |
var TILE_WIDTH = 40; | |
// Test whether two values are within EPSILON. | |
function equals(a, b) { | |
return Math.abs(a - b) < EPS; | |
} | |
// Randomly sample number from between a and b. | |
function sample(a, b) { | |
return a + (b - a) * Math.random(); | |
} | |
// Get random integer between a and b, non-inclusive. | |
function discreteSample(a, b) { | |
var val = Math.floor(a + (b - a) * Math.random()); | |
if (val == b) { | |
return discreteSample(a, b); | |
} | |
return val; | |
} | |
// Wait until the tagpro object exists, and add the function to tagpro.ready | |
function addToTagproReady(fn) { | |
// Make sure the tagpro object exists. | |
if (typeof tagpro !== "undefined") { | |
tagpro.ready(fn); | |
} else { | |
// If not ready, try again after a short delay. | |
setTimeout(function() { | |
addToTagproReady(fn); | |
}, 0); | |
} | |
} | |
function chat_message(s) { | |
tagpro.socket.emit("local:chat", { | |
to: "all", | |
message: s | |
}); | |
} | |
// Given a position, get the target tile center. | |
function getTarget(pos) { | |
return Math.floor(pos.x / TILE_WIDTH) * TILE_WIDTH + 20; | |
} | |
// Learning agent. | |
var Agent = function(env) { | |
// Settings for DQNAgent. | |
var spec = { alpha: 0.01 }; | |
this.brain = new RL.DQNAgent(env, spec); | |
this.env = env; | |
}; | |
// Get and take action. | |
Agent.prototype.forward = function() { | |
var state = this.env.normalize(this.env.getState()); | |
var action = this.brain.act(state); | |
this.env.act(action); | |
}; | |
// Learn from reward. | |
Agent.prototype.backward = function(reward) { | |
this.brain.learn(reward); | |
}; | |
// Interface for interacting with world. | |
var Env = function() { | |
this.numStates = 2; | |
this.numActions = 3; | |
// Overrides to keep velocity up to date. | |
Box2D.Dynamics.b2Body.prototype.GetLinearVelocity = function() { | |
tagpro.players[this.player.id].vx = this.m_linearVelocity.x; | |
return this.m_linearVelocity; | |
}; | |
// Actions. | |
this.actions = { | |
"0": ["left"], | |
"1": ["right"], | |
"2": [] | |
}; | |
this.dirs = ["left", "right"]; | |
this.oppositeDir = { | |
left: "right", | |
right: "left" | |
}; | |
this.viewport = $("#viewport"); | |
this.range = 2 * TILE_WIDTH; | |
}; | |
Env.prototype.getKeyCode = function(dir) { | |
return tagpro.keys[dir][0]; | |
}; | |
Env.prototype.setTarget = function(target) { | |
this.target = target; | |
}; | |
Env.prototype.getPosition = function() { | |
return tagpro.players[tagpro.playerId].x + TILE_WIDTH / 2; | |
}; | |
Env.prototype.getVelocity = function() { | |
return tagpro.players[tagpro.playerId].vx; | |
}; | |
// Return a normalized state for learning. | |
Env.prototype.normalize = function(state) { | |
return [ | |
(state[0] - 30) / this.range, | |
(state[1] - 0.25) / 0.5 | |
]; | |
}; | |
Env.prototype.isPressing = function(dir) { | |
return tagpro.players[tagpro.playerId].pressing[dir]; | |
}; | |
// Act on environment. | |
Env.prototype.act = function(n) { | |
var action = this.actions[n]; | |
this.move(action); | |
}; | |
// Release directional button. | |
Env.prototype.release = function(dir) { | |
var e = $.Event("keyup"); | |
e.keyCode = this.getKeyCode(dir); | |
this.viewport.trigger(e); | |
}; | |
// Release all directional buttons. | |
Env.prototype.releaseAll = function() { | |
this.move([]); | |
}; | |
// Press directional button. | |
Env.prototype.press = function(dir) { | |
tagpro.sendKeyPress(dir); | |
}; | |
// Takes array of directions to press. | |
Env.prototype.move = function(presses) { | |
presses.forEach(function(dir) { | |
this.press(dir); | |
var opposite = this.oppositeDir[dir]; | |
if (this.isPressing(opposite)) { | |
this.release(opposite); | |
} | |
}, this); | |
this.dirs.forEach(function(dir) { | |
if (presses.indexOf(dir) == -1) { | |
this.release(dir); | |
} | |
}, this); | |
}; | |
// Get environment state. | |
Env.prototype.getState = function() { | |
var pos = this.getPosition(); | |
var v = this.getVelocity(); | |
var t = this.target - pos; | |
var state = [t, v]; | |
return state; | |
}; | |
Env.prototype.isTerminalState = function(state) { | |
var onTarget = equals(state[0], 0) && equals(state[1], 0); | |
var outOfRange = Math.abs(state[0]) > this.range; | |
return onTarget || outOfRange; | |
}; | |
Env.prototype.getNumStates = function() { | |
return this.numStates; | |
}; | |
Env.prototype.getMaxNumActions = function() { | |
return this.numActions; | |
}; | |
// Calculate reward. | |
Env.prototype.getReward = function(state) { | |
if (this.isTerminalState(state)) { | |
if (equals(state[0], 0) && equals(state[1], 0)) { | |
// Desired terminal state. | |
return 10; | |
} else if (Math.abs(state[0]) > this.range) { | |
// Outside of range, bad terminal state. | |
return -10; | |
} | |
} else if (equals(state[0], 0)) { | |
// On top of center point even if it hasn't stopped. | |
return 1; | |
} | |
// Calculate reward otherwise. | |
var dist = Math.abs(state[0]); | |
if (Math.abs(state[1]) > 0.2) { | |
return -(dist / this.range) + -(Math.abs(state[1]) / 2.5); | |
} else { | |
return -(dist / this.range); | |
} | |
}; | |
var Experiment = function(opts) { | |
this.env = opts.env; | |
this.agent = opts.agent; | |
this.start = opts.start; // left/right | |
this.target_loc = opts.target_loc; // Location to initiate the agent. | |
this.target_v = opts.target_v; | |
this.interval = STEP * 10; // step size. | |
// Starting locations on OFM map. | |
this.start_locs = { | |
left: 59, | |
right: 941 | |
}; | |
}; | |
// Run experiment. | |
Experiment.prototype.run = function() { | |
// Get into start position. | |
this.env.press(this.start); | |
var start = setInterval(function() { | |
if (equals(this.env.getPosition(), this.start_locs[this.start])) { | |
clearInterval(start); | |
this.env.releaseAll(); | |
// Initiate experiment. | |
this._start(); | |
} | |
}.bind(this), 20); | |
}; | |
// Get into state for experiment and handoff control to agent. | |
Experiment.prototype._start = function() { | |
//console.log("Starting experiment."); | |
chat_message("Starting episode."); | |
// Interval to ensure velocity and position constraints. | |
var running = setInterval(function() { | |
var pos = this.env.getPosition(); | |
var v = this.env.getVelocity(); | |
// Transfer control to agent when close. | |
if (Math.abs(this.target_loc - pos) < 5) { | |
clearInterval(running); | |
this.transferToAgent(); | |
return; | |
} | |
if (Math.abs(v - this.target_v) < 0.05) { | |
this.env.releaseAll(); | |
} else if (v < this.target_v) { | |
if (pos < this.target_loc) { | |
this.env.press("right"); | |
} else if (pos > this.target_loc) { | |
this.env.press("left"); | |
} | |
} | |
}.bind(this), 20); | |
}; | |
// Set function to be called on each learning step of the agent. | |
Experiment.prototype.onStep = function(fn) { | |
this.step_fn = fn; | |
}; | |
// Set function to be called when actual experiment starts. | |
Experiment.prototype.onStart = function(fn) { | |
this.start_fn = fn; | |
}; | |
// Set function to be called when experiment has ended. | |
Experiment.prototype.onComplete = function(fn) { | |
this.complete_fn = fn; | |
}; | |
// Terminate experiment prematurely. | |
Experiment.prototype.terminate = function() { | |
clearTimeout(this.update); | |
this.env.releaseAll(); | |
if (this.stdUpdate) { | |
tagpro.world.update = this.stdUpdate; | |
} | |
chat_message("Episode terminated."); | |
//console.log("Experiment terminated."); | |
}; | |
// Reset necessary state. | |
Experiment.prototype.complete = function(forced) { | |
clearTimeout(this.update); | |
this.env.releaseAll(); | |
//console.log("Experiment completed."); | |
chat_message("Episode completed."); | |
if (this.complete_fn) { | |
this.complete_fn(); | |
} | |
}; | |
// Transfer control to agent and start learning cycle. | |
Experiment.prototype.transferToAgent = function() { | |
//console.log("Transferring control to agent."); | |
chat_message("Transferring control to agent."); | |
// Whether the agent is anticipating a reward. | |
var giveReward = false; | |
// Overrides to reward agent immediately after actions taken into | |
// account. | |
this.stdUpdate = tagpro.world.update; | |
var update = function rewardUpdate() { | |
if (giveReward) { | |
giveReward = false; | |
//console.log("Updating."); | |
var state = this.env.getState(); | |
var reward = this.env.getReward(state); | |
this.agent.backward(reward); | |
if (this.step_fn) { | |
this.step_fn(reward, state, this.agent, this.env); | |
} | |
// Check if terminal state and quit if so. | |
if (this.env.isTerminalState(state)) { | |
this.complete(); | |
tagpro.world.update = this.stdUpdate; | |
window.override = false; | |
} | |
} | |
}.bind(this); | |
var stdUpdate = this.stdUpdate; | |
window.newUpdate = function testName() { | |
stdUpdate.apply(tagpro.world, arguments); | |
setTimeout(update); | |
}; | |
tagpro.world.update = newUpdate; | |
window.override = true; | |
var update_fn = function actionInterval() { | |
// Skip if reward hasn't been sent to agent yet. | |
if (!giveReward) { | |
giveReward = true; | |
this.agent.forward(); | |
//console.log("Forward."); | |
} else { | |
console.log("Skipping."); | |
} | |
this.update = setTimeout(update_fn, this.interval); | |
}.bind(this); | |
// Call start callback. | |
if (this.start_fn) { | |
this.start_fn(); | |
} | |
this.update = setTimeout(update_fn, this.interval); | |
}; | |
// Get the range that a tile spans given its x location. | |
function tileRange(x) { | |
return [x * TILE_WIDTH, (x + 1) * TILE_WIDTH]; | |
} | |
// Get the value of the center of a tile. | |
function tileCenter(x) { | |
return x * TILE_WIDTH + TILE_WIDTH / 2; | |
} | |
// Interface for drawing on the game. | |
var Draw = function() { | |
this.graphics = new PIXI.Graphics(); | |
this._waitForBackground(function() { | |
tagpro.renderer.layers.background.addChild(this.graphics); | |
}.bind(this)); | |
this.tile_color = 0xdddddd; | |
this.tile_opacity = 0.5; | |
}; | |
// Wait until background is constructed to execute function. | |
Draw.prototype._waitForBackground = function(fn) { | |
if (tagpro.renderer.layers.background.children.length === 1) { | |
fn(); | |
} else { | |
setTimeout(function() { | |
this._waitForBackground(fn); | |
}.bind(this), 100); | |
} | |
}; | |
// Highlight tile at provided location. | |
Draw.prototype.highlightTile = function(x, y) { | |
this.graphics.beginFill(this.tile_color, this.tile_opacity); | |
this.graphics.drawRect(x * TILE_WIDTH, y * TILE_WIDTH, TILE_WIDTH, TILE_WIDTH); | |
this.graphics.endFill(); | |
}; | |
Draw.prototype.line = function(x1, y1, x2, y2) { | |
this.graphics.lineStyle(2, 0xff1111, 1); | |
this.graphics.moveTo(x1, y1); | |
this.graphics.lineTo(x2, y2); | |
this.graphics.lineStyle(0, 0, 0); | |
}; | |
Draw.prototype.point = function(x, y) { | |
this.circle(x, y, 2); | |
}; | |
Draw.prototype.circle = function(x, y, r) { | |
this.graphics.lineStyle(2, 0xff1111, 1); | |
this.graphics.drawCircle(x, y, r); | |
this.graphics.lineStyle(0, 0, 0); | |
}; | |
// Reset drawings. | |
Draw.prototype.reset = function() { | |
this.graphics.clear(); | |
}; | |
// Takes and displays information about the state of the agent. | |
var Info = function() { | |
// Info about the agent, its velocity and position. | |
var real_time = { | |
velocity: '', | |
position: '' | |
}; | |
// Minimum number of ms between interface updates. | |
this.update_interval = 200; | |
// Start of text offset. | |
this.x_offset = 20; | |
this.y_offset = 100; | |
// Holds line text. | |
this.lines = {}; | |
// Hold pixi text objects. | |
this.texts = {}; | |
// Hold actual values. | |
this.vals = {}; | |
// Time last updated. | |
this.last_update = {}; | |
this._inject(); | |
}; | |
Info.prototype._inject = function() { | |
var stdUpdate = tagpro.ui.update; | |
tagpro.ui.update = function() { | |
stdUpdate.apply(tagpro.ui, arguments); | |
for (var name in this.vals) { | |
this.texts[name].setText(this.lines[name] + ": " + this.vals[name]); | |
} | |
}.bind(this); | |
}; | |
// Add a line to the display. | |
Info.prototype.addLine = function(name, label) { | |
this.lines[name] = label; | |
this.last_update[name] = 0; | |
var text = tagpro.renderer.prettyText(''); | |
text.x = this.x_offset; | |
text.y = this.y_offset; | |
this.y_offset += 20; | |
this.texts[name] = text; | |
tagpro.renderer.layers.ui.addChild(text); | |
}; | |
// Update information display. | |
// vals is an object with keys corresponding to the lines | |
// display is a boolean, whether the values should be forced to display. | |
Info.prototype.update = function(vals) { | |
for (var name in vals) { | |
this.vals[name] = vals[name]; | |
} | |
}; | |
// For tracking stats long-term. | |
// Takes display object. | |
var Stats = function(display) { | |
// Number of experiments. | |
this.experiments = 0; | |
this.smooth_rewards = []; | |
// Steps between saving reward values. | |
this.steps = 200; | |
// Counter for reward value steps. | |
this.step = 0; | |
// Maximum number of previous rewards to keep. | |
this.max_rewards = 1000; | |
this.time = 0; | |
this.display = display; | |
display.addLine('smooth_reward', 'Smooth Reward'); | |
display.addLine('experiment', 'Episodes'); | |
display.addLine('time_trained', 'Total Time Trained (ms)'); | |
}; | |
// Call when starting an experiment. | |
Stats.prototype.startExp = function() { | |
this.last_start = Date.now(); | |
}; | |
// Call when experiment ended. | |
Stats.prototype.endExp = function() { | |
this.time += Date.now() - this.last_start; | |
this.experiments++; | |
this.display.update({ | |
time_trained: this.time, | |
experiment: this.experiments | |
}); | |
}; | |
Stats.prototype.addReward = function(reward) { | |
this.step++; | |
if (!this.smooth_reward) { | |
this.smooth_reward = reward; | |
} | |
this.smooth_reward = 0.999 * this.smooth_reward + 0.001 * reward; | |
if (this.step % this.steps === 0) { | |
this.step = 0; | |
this.display.update({ | |
smooth_reward: this.smooth_reward.toFixed(3) | |
}); | |
if (this.smooth_rewards.length > this.max_rewards) { | |
this.smooth_rewards = this.smooth_rewards.slice(1); | |
} | |
this.smooth_rewards.push(this.smooth_reward); | |
} | |
}; | |
addToTagproReady(function() { | |
var drawing = new Draw(); | |
var env = new Env(); | |
var agent = new Agent(env); | |
var info = new Info(); | |
// Add lines for experiment information display. | |
info.addLine('error', 'TD Error'); | |
info.addLine('reward', 'Reward'); | |
info.addLine('exp', 'Saved Experiences'); | |
info.addLine('distance', 'Distance'); | |
info.addLine('velocity', 'Velocity'); | |
var stats = new Stats(info); | |
// Range of possible ball velocity. | |
var velocity_range = [0.5, 1.5]; | |
// Default y for bottom of map. | |
var default_y = 17; | |
var defauly_y_coord = 700; | |
// Holds active experiment. | |
var exp; | |
window.override = false; | |
// Show velocity. | |
// Show center location. | |
// Show target location. | |
// Show release location. | |
// Show bounds of area. | |
// Make accessible outside script. | |
window.myAgent = agent; | |
function stepUpdate(reward, state, agent, env) { | |
stats.addReward(reward); | |
info.update({ | |
reward: reward.toFixed(3), | |
error: agent.brain.tderror.toFixed(2), | |
exp: agent.brain.expi, | |
distance: state[0].toFixed(3), | |
velocity: state[1].toFixed(3) | |
}); | |
} | |
// Sets up and runs an experiment. | |
function setupAndRun() { | |
drawing.reset(); | |
// Get random tile in middle range. | |
var tile = discreteSample(10, 16); | |
// Highlight our tile. | |
drawing.highlightTile(tile, default_y); | |
var center = tileCenter(tile); | |
env.setTarget(center); | |
// Draw tile center. | |
drawing.point(center, defauly_y_coord); | |
// Draw bounds. | |
drawing.circle(center, defauly_y_coord, env.range); | |
var range = tileRange(tile); | |
// Get transfer location. | |
var stop = sample(range[0], range[1]); | |
var v = sample(velocity_range[0], velocity_range[1]); | |
exp = new Experiment({ | |
env: env, | |
agent: agent, | |
start: "left", | |
target_loc: stop, // Location to initiate agent. | |
target_v: v | |
}); | |
// Update display and statistics. | |
exp.onStart(function() { | |
stats.startExp(); | |
}); | |
exp.onStep(stepUpdate); | |
exp.onComplete(function() { | |
stats.endExp(); | |
setupAndRun(); | |
}); | |
exp.run(); | |
} | |
window.agentState = {}; | |
agentState.running = false; | |
//var running = false; | |
// Start experiment. | |
function init() { | |
if (agentState.running) return; | |
// Pre-game. | |
if (tagpro.state == 3) return; | |
agentState.running = true; | |
setupAndRun(); | |
} | |
function reset() { | |
if (!agentState.running) return; | |
agentState.running = false; | |
// Stop experiment. | |
exp.terminate(); | |
// Release all buttons. | |
env.releaseAll(); | |
} | |
// Key listeners. | |
document.addEventListener("keydown", function(e) { | |
if (tagpro.disableControls) return; | |
if (e.keyCode === HOTKEY_START) { | |
init(); | |
} | |
}); | |
document.addEventListener("keyup", function(e) { | |
if (tagpro.disableControls) return; | |
if (e.keyCode === HOTKEY_STOP) { | |
reset(); | |
} | |
}); | |
// Handle death. | |
tagpro.socket.on("p", function(msg) { | |
var updates = msg.u || msg; | |
var dead = updates.some(function(update) { | |
return update.id === tagpro.playerId && update.dead; | |
}); | |
if (dead) { | |
reset(); | |
} | |
}); | |
// Handle end. | |
tagpro.socket.on("end", function() { | |
reset(); | |
}); | |
function downloadData() { | |
var data = [JSON.stringify(agent.brain.toJSON())]; | |
var blob = new Blob(data, { type: "application/json" }); | |
saveAs(blob, "data-" + Date.now() + ".json"); | |
} | |
//window.addEventListener('beforeunload', downloadData); | |
}); | |
})(unsafeWindow, $); // Using page jQuery. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment