kybernetikos · June 29, 2014 17:37
diff --git a/gistfile1.js b/gistfile1.js
 try {

    var WAIT = "wait"
    var FIRE = "fire"
    var ACTIONS = [WAIT, FIRE]

    if (!context.classesDefined) {
        context.classesDefined = true;
        context.WorldState = function WorldState(numberOfAsteroids, asteroidX, asteroidVx, fired) {
            this.asteroidX = asteroidX
            this.asteroidVx = asteroidVx
            this.numberOfAsteroids = numberOfAsteroids
            this.fired = fired
        }

        var Approximator = context.Approximator = function Approximator(funcs) {
            this.funcs = funcs
            this.weights = []
            for (var i = 0; i < this.funcs.length; ++i) {
                this.weights[i] = Math.random()
            }
        }

        Approximator.prototype.evaluateState = function(state) {
            return this.funcs.map(function(func, i) {
                return func(state) * this.weights[i]
            }.bind(this)).reduce(function(a, b) {
                return a + b
            })
        }

        // state is state in which we made the decision
        // reward is positive or negative
        Approximator.prototype.correct = function(state, reward) {
            var totalScore = this.evaluateState(state)
            var correctionAmount = reward - totalScore
            var learningParam = 0.1
            console.log('correcting', state, reward)
            this.weights = this.weights.map(function(weight, i) {
                var func = this.funcs[i]
                var fnScore = func(state)
                return weight + correctionAmount * learningParam * fnScore
            }.bind(this))
        }

        var Decision = context.Decision = function Decision(state, action) {
            this.state = state;
            this.action = action;
        }

        var Agent = context.Agent = function Agent(approximator, timeFn) {
            this.decisions = []
            this.approximator = approximator
            this.timeFired = null
        }

        Agent.prototype.actualScoreFinalState = function(score) {
            this.decisions.map(function(decision, i) {
                this.approximator.correct(decision.state, score - (this.decisions.length - i))
            }.bind(this))
            this.decisions = []
            this.timeFired = null
        }

        Agent.prototype.tick = function(time, state) {
            console.log(time, state)
            if (state.fired === false) {
                var chosenAction = this.chooseAction(state)
                this.performAction(time, state, chosenAction)
            }
            if (state.asteroidCount === 0) {
                /// WHOOOO HOOOO!
                this.actualScoreFinalState(1000)
            } else if (this.timeFired != null && time >= this.timeFired + 80) {
                //// OH NOOES!
                this.actualScoreFinalState(-100)
            }
        }

        Agent.prototype.performAction = function(time, state, action) {
            var decision = new Decision(Object.create(state), action)
            this.decisions.push(decision)
            if (action === FIRE) {
                decision.state.fired = true
                this.timeFired = time
                fire()
            }
        }

        Agent.prototype.chooseAction = function(currentState) {
            var firedState = Object.create(currentState, {fired: {value: true}})
            var notfiredState = Object.create(currentState, {fired: {value: false}})

            var firedScore = this.approximator.evaluateState(firedState)
            var notfiredScore = this.approximator.evaluateState(notfiredState)

            if (firedScore > notfiredScore && currentState.fired === false) {
                return FIRE
            }
            return WAIT
        }

        context.agent = new context.Agent(new context.Approximator([
            function(state) {
                return state.fired ? 1 : 0
            },
            function(state) {
                return state.asteroidX;
            }
        ]))

    }

 /////////////////////////////////////////////////

    context.agent.tick(time, new context.WorldState(asteroids.length, asteroids[0].x, asteroids[0].speed, context.agent.timeFired !== null))

 } catch (e) {
    console.log(e)
 }
	try {

	var WAIT = "wait"
	var FIRE = "fire"
	var ACTIONS = [WAIT, FIRE]

	if (!context.classesDefined) {
	context.classesDefined = true;
	context.WorldState = function WorldState(numberOfAsteroids, asteroidX, asteroidVx, fired) {
	this.asteroidX = asteroidX
	this.asteroidVx = asteroidVx
	this.numberOfAsteroids = numberOfAsteroids
	this.fired = fired
	}

	var Approximator = context.Approximator = function Approximator(funcs) {
	this.funcs = funcs
	this.weights = []
	for (var i = 0; i < this.funcs.length; ++i) {
	this.weights[i] = Math.random()
	}
	}

	Approximator.prototype.evaluateState = function(state) {
	return this.funcs.map(function(func, i) {
	return func(state) * this.weights[i]
	}.bind(this)).reduce(function(a, b) {
	return a + b
	})
	}

	// state is state in which we made the decision
	// reward is positive or negative
	Approximator.prototype.correct = function(state, reward) {
	var totalScore = this.evaluateState(state)
	var correctionAmount = reward - totalScore
	var learningParam = 0.1
	console.log('correcting', state, reward)
	this.weights = this.weights.map(function(weight, i) {
	var func = this.funcs[i]
	var fnScore = func(state)
	return weight + correctionAmount * learningParam * fnScore
	}.bind(this))
	}

	var Decision = context.Decision = function Decision(state, action) {
	this.state = state;
	this.action = action;
	}

	var Agent = context.Agent = function Agent(approximator, timeFn) {
	this.decisions = []
	this.approximator = approximator
	this.timeFired = null
	}

	Agent.prototype.actualScoreFinalState = function(score) {
	this.decisions.map(function(decision, i) {
	this.approximator.correct(decision.state, score - (this.decisions.length - i))
	}.bind(this))
	this.decisions = []
	this.timeFired = null
	}

	Agent.prototype.tick = function(time, state) {
	console.log(time, state)
	if (state.fired === false) {
	var chosenAction = this.chooseAction(state)
	this.performAction(time, state, chosenAction)
	}
	if (state.asteroidCount === 0) {
	/// WHOOOO HOOOO!
	this.actualScoreFinalState(1000)
	} else if (this.timeFired != null && time >= this.timeFired + 80) {
	//// OH NOOES!
	this.actualScoreFinalState(-100)
	}
	}

	Agent.prototype.performAction = function(time, state, action) {
	var decision = new Decision(Object.create(state), action)
	this.decisions.push(decision)
	if (action === FIRE) {
	decision.state.fired = true
	this.timeFired = time
	fire()
	}
	}

	Agent.prototype.chooseAction = function(currentState) {
	var firedState = Object.create(currentState, {fired: {value: true}})
	var notfiredState = Object.create(currentState, {fired: {value: false}})

	var firedScore = this.approximator.evaluateState(firedState)
	var notfiredScore = this.approximator.evaluateState(notfiredState)

	if (firedScore > notfiredScore && currentState.fired === false) {
	return FIRE
	}
	return WAIT
	}

	context.agent = new context.Agent(new context.Approximator([
	function(state) {
	return state.fired ? 1 : 0
	},
	function(state) {
	return state.asteroidX;
	}
	]))

	}

	/////////////////////////////////////////////////

	context.agent.tick(time, new context.WorldState(asteroids.length, asteroids[0].x, asteroids[0].speed, context.agent.timeFired !== null))

	} catch (e) {
	console.log(e)
	}