numpde · July 7, 2017 16:46
diff --git a/peer_learning.md b/peer_learning.md
diff --git a/peer_learning.py b/peer_learning.py
 import numpy as np
 import matplotlib.pyplot as plt

 from numpy.random import randint, seed, choice

 from keras.models import Sequential
 from keras.layers import Dense

 class Env :
    # Grid size
    xm = 12
    ym = 15
    
    def __init__(self, I=100) :
        # I = number of moves per round
        self.I = I
        # X = history of the last round
        self.X = []
    
    def P2R(self, P) :
        # Convert a list of token locations to a matrix
        R = np.zeros((self.xm, self.ym))
        for (x, y) in P : R[x, y] += 1
        return R

    def __call__(self, ai, show=False) :
        # Announce new round
        ai.ready()
        self.X = []
        # Initial location of the agent
        (xa, ya) = (0, 0)
        # List of token locations
        P = []
        # Initial score
        score = 0
        # Coordinates wrap-around helper
        MOD = (lambda x, y : (x % self.xm, y % self.ym))
        
        # Play the round
        for i in range(self.I) :
            # Put tokens
            if (not P) :
                while (len(P) < 5) : 
                    P.append((randint(0, self.xm), randint(0, self.ym)))
        
            self.X.append( (P[:], (xa, ya)) )
            
            # State relative to the agent's location (as a row vector)
            S = self.P2R([MOD(x - xa, y - ya) for (x, y) in P]).reshape((1,-1))
            # Ask the agent for a direction
            (dx, dy) = [(0,0), (-1,0), (1,0), (0,-1), (0,+1)][ ai.move(S) ]
            # Move the agent in that direction
            (xa, ya) = MOD(xa + dx, ya + dy)
            
            # Pick up tokens
            while (xa, ya) in P : 
                P.remove((xa, ya))
                score += 1
            
        self.X.append( (P[:], (xa, ya)) )
        
        if show : self.visualize()
        
        return score

    def visualize(self, i=None) :
        X = self.X if (i is None) else [self.X[i]]
        
        for (P, p) in X :
            im = self.P2R(P)
            im[p] = -1
            plt.cla(); plt.axis('off')
            plt.imshow(im, interpolation='nearest', origin='lower', vmin=-1, vmax=5)
            if (i is None) : plt.ion(); plt.show(); plt.pause(0.001)


 class AI :
    def __init__(self) :
        # Experience buffer
        self.X = []

        # Size of visual input
        idim = Env.xm * Env.ym 
        # Number of possible actions
        odim = 5
        
        self.nn = Sequential()
        self.nn.add(Dense(odim, activation='softmax', init='normal', input_dim=idim))
        
        self.nn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    @staticmethod
    def pdf(a) :
        # Convert numpy.ndarray to a list with sum = 1
        # Corrects float32 rounding errors of the NN prediction
        b = [float(x) for x in list(a.flat)]
        b = [x/sum(b) for x in b]
        return b
    
    def ready(self) :
        # Reset the experience buffer
        self.X = []
        
    def move(self, S) :
        # Ask the neural network for a direction 
        # Interpret as probabilities on classes
        p = self.pdf(self.nn.predict(S))
        # Choose one direction according to those proba
        d = choice(range(len(p)), p=p)
        # d-th unit vector
        a = np.eye(1, len(p), k=d)
        # Record state-action pair
        self.X.append((S, a))
        # Return the chosen direction
        return d

    def learn_from(self, peer, nb_epoch=8) :
        I = np.vstack(S for (S, _) in peer.X)
        O = np.vstack(a for (_, a) in peer.X)
        # Train the NN on the I-O pairs of the peer
        self.nn.fit(I, O, nb_epoch=nb_epoch, verbose=0)


 class Culture :
    def __init__(self, agent_type=None, N=17) :
        self.pool = [agent_type() for _ in range(N)]
    
    def evolve(self, fitness_criterion, gens=99) :
        for gen in range(gens) :
            # Measure the fitness of each agent
            for agent in self.pool : 
                agent.fitness = fitness_criterion(agent)
            
            # Agents learn from fitter peers
            self.pool.sort(key=(lambda agent: -agent.fitness))
            for (n, agent) in enumerate(self.pool) :
                if n : agent.learn_from(choice(self.pool[:n]))


 if (__name__ == "__main__") :
    seed(0) # for numpy.random
    
    c = Culture(AI)
    
    for era in range(1000) :
        print("Evolving culture", "(Era #{})".format(era))
        c.evolve(Env())
        print("Fitness:", [agent.fitness for agent in c.pool])
        
        # Visualize the performance of a random agent
        Env(I=80)(choice(c.pool), show=True)
diff --git a/peer_learning_plot1.zip b/peer_learning_plot1.zip
	import numpy as np
	import matplotlib.pyplot as plt

	from numpy.random import randint, seed, choice

	from keras.models import Sequential
	from keras.layers import Dense

	class Env :
	# Grid size
	xm = 12
	ym = 15

	def __init__(self, I=100) :
	# I = number of moves per round
	self.I = I
	# X = history of the last round
	self.X = []

	def P2R(self, P) :
	# Convert a list of token locations to a matrix
	R = np.zeros((self.xm, self.ym))
	for (x, y) in P : R[x, y] += 1
	return R

	def __call__(self, ai, show=False) :
	# Announce new round
	ai.ready()
	self.X = []
	# Initial location of the agent
	(xa, ya) = (0, 0)
	# List of token locations
	P = []
	# Initial score
	score = 0
	# Coordinates wrap-around helper
	MOD = (lambda x, y : (x % self.xm, y % self.ym))

	# Play the round
	for i in range(self.I) :
	# Put tokens
	if (not P) :
	while (len(P) < 5) :
	P.append((randint(0, self.xm), randint(0, self.ym)))

	self.X.append( (P[:], (xa, ya)) )

	# State relative to the agent's location (as a row vector)
	S = self.P2R([MOD(x - xa, y - ya) for (x, y) in P]).reshape((1,-1))
	# Ask the agent for a direction
	(dx, dy) = [(0,0), (-1,0), (1,0), (0,-1), (0,+1)][ ai.move(S) ]
	# Move the agent in that direction
	(xa, ya) = MOD(xa + dx, ya + dy)

	# Pick up tokens
	while (xa, ya) in P :
	P.remove((xa, ya))
	score += 1

	self.X.append( (P[:], (xa, ya)) )

	if show : self.visualize()

	return score

	def visualize(self, i=None) :
	X = self.X if (i is None) else [self.X[i]]

	for (P, p) in X :
	im = self.P2R(P)
	im[p] = -1
	plt.cla(); plt.axis('off')
	plt.imshow(im, interpolation='nearest', origin='lower', vmin=-1, vmax=5)
	if (i is None) : plt.ion(); plt.show(); plt.pause(0.001)


	class AI :
	def __init__(self) :
	# Experience buffer
	self.X = []

	# Size of visual input
	idim = Env.xm * Env.ym
	# Number of possible actions
	odim = 5

	self.nn = Sequential()
	self.nn.add(Dense(odim, activation='softmax', init='normal', input_dim=idim))

	self.nn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

	@staticmethod
	def pdf(a) :
	# Convert numpy.ndarray to a list with sum = 1
	# Corrects float32 rounding errors of the NN prediction
	b = [float(x) for x in list(a.flat)]
	b = [x/sum(b) for x in b]
	return b

	def ready(self) :
	# Reset the experience buffer
	self.X = []

	def move(self, S) :
	# Ask the neural network for a direction
	# Interpret as probabilities on classes
	p = self.pdf(self.nn.predict(S))
	# Choose one direction according to those proba
	d = choice(range(len(p)), p=p)
	# d-th unit vector
	a = np.eye(1, len(p), k=d)
	# Record state-action pair
	self.X.append((S, a))
	# Return the chosen direction
	return d

	def learn_from(self, peer, nb_epoch=8) :
	I = np.vstack(S for (S, _) in peer.X)
	O = np.vstack(a for (_, a) in peer.X)
	# Train the NN on the I-O pairs of the peer
	self.nn.fit(I, O, nb_epoch=nb_epoch, verbose=0)


	class Culture :
	def __init__(self, agent_type=None, N=17) :
	self.pool = [agent_type() for _ in range(N)]

	def evolve(self, fitness_criterion, gens=99) :
	for gen in range(gens) :
	# Measure the fitness of each agent
	for agent in self.pool :
	agent.fitness = fitness_criterion(agent)

	# Agents learn from fitter peers
	self.pool.sort(key=(lambda agent: -agent.fitness))
	for (n, agent) in enumerate(self.pool) :
	if n : agent.learn_from(choice(self.pool[:n]))


	if (__name__ == "__main__") :
	seed(0) # for numpy.random

	c = Culture(AI)

	for era in range(1000) :
	print("Evolving culture", "(Era #{})".format(era))
	c.evolve(Env())
	print("Fitness:", [agent.fitness for agent in c.pool])

	# Visualize the performance of a random agent
	Env(I=80)(choice(c.pool), show=True)