Skip to content

Instantly share code, notes, and snippets.

@kybernetikos
Created June 29, 2014 17:37
Show Gist options
  • Save kybernetikos/a264fba207527d775032 to your computer and use it in GitHub Desktop.
Save kybernetikos/a264fba207527d775032 to your computer and use it in GitHub Desktop.
Asteroids: Machine Learning
try {
var WAIT = "wait"
var FIRE = "fire"
var ACTIONS = [WAIT, FIRE]
if (!context.classesDefined) {
context.classesDefined = true;
context.WorldState = function WorldState(numberOfAsteroids, asteroidX, asteroidVx, fired) {
this.asteroidX = asteroidX
this.asteroidVx = asteroidVx
this.numberOfAsteroids = numberOfAsteroids
this.fired = fired
}
var Approximator = context.Approximator = function Approximator(funcs) {
this.funcs = funcs
this.weights = []
for (var i = 0; i < this.funcs.length; ++i) {
this.weights[i] = Math.random()
}
}
Approximator.prototype.evaluateState = function(state) {
return this.funcs.map(function(func, i) {
return func(state) * this.weights[i]
}.bind(this)).reduce(function(a, b) {
return a + b
})
}
// state is state in which we made the decision
// reward is positive or negative
Approximator.prototype.correct = function(state, reward) {
var totalScore = this.evaluateState(state)
var correctionAmount = reward - totalScore
var learningParam = 0.1
console.log('correcting', state, reward)
this.weights = this.weights.map(function(weight, i) {
var func = this.funcs[i]
var fnScore = func(state)
return weight + correctionAmount * learningParam * fnScore
}.bind(this))
}
var Decision = context.Decision = function Decision(state, action) {
this.state = state;
this.action = action;
}
var Agent = context.Agent = function Agent(approximator, timeFn) {
this.decisions = []
this.approximator = approximator
this.timeFired = null
}
Agent.prototype.actualScoreFinalState = function(score) {
this.decisions.map(function(decision, i) {
this.approximator.correct(decision.state, score - (this.decisions.length - i))
}.bind(this))
this.decisions = []
this.timeFired = null
}
Agent.prototype.tick = function(time, state) {
console.log(time, state)
if (state.fired === false) {
var chosenAction = this.chooseAction(state)
this.performAction(time, state, chosenAction)
}
if (state.asteroidCount === 0) {
/// WHOOOO HOOOO!
this.actualScoreFinalState(1000)
} else if (this.timeFired != null && time >= this.timeFired + 80) {
//// OH NOOES!
this.actualScoreFinalState(-100)
}
}
Agent.prototype.performAction = function(time, state, action) {
var decision = new Decision(Object.create(state), action)
this.decisions.push(decision)
if (action === FIRE) {
decision.state.fired = true
this.timeFired = time
fire()
}
}
Agent.prototype.chooseAction = function(currentState) {
var firedState = Object.create(currentState, {fired: {value: true}})
var notfiredState = Object.create(currentState, {fired: {value: false}})
var firedScore = this.approximator.evaluateState(firedState)
var notfiredScore = this.approximator.evaluateState(notfiredState)
if (firedScore > notfiredScore && currentState.fired === false) {
return FIRE
}
return WAIT
}
context.agent = new context.Agent(new context.Approximator([
function(state) {
return state.fired ? 1 : 0
},
function(state) {
return state.asteroidX;
}
]))
}
/////////////////////////////////////////////////
context.agent.tick(time, new context.WorldState(asteroids.length, asteroids[0].x, asteroids[0].speed, context.agent.timeFired !== null))
} catch (e) {
console.log(e)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment