josephmisiti · November 11, 2016 18:19 · AtomBG · Aug 26, 2018 · samuelrodgers · Dec 26, 2018
diff --git a/helloevolve.py b/helloevolve.py
 """
 helloevolve.py implements a genetic algorithm that starts with a base
 population of randomly generated strings, iterates over a certain number of
 generations while implementing 'natural selection', and prints out the most fit
 string.

 The parameters of the simulation can be changed by modifying one of the many
 global variables. To change the "most fit" string, modify OPTIMAL. POP_SIZE
 controls the size of each generation, and GENERATIONS is the amount of 
 generations that the simulation will loop through before returning the fittest
 string.

 This program subject to the terms of the BSD license listed below.

 ---

 Copyright (c) 2011 Colin Drake

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """

 import random

 #
 # Global variables
 # Setup optimal string and GA input variables.
 #

 OPTIMAL     = "Hello, World"
 DNA_SIZE    = len(OPTIMAL)
 POP_SIZE    = 20
 GENERATIONS = 5000

 #
 # Helper functions
 # These are used as support, but aren't direct GA-specific functions.
 #

 def weighted_choice(items):
  """
  Chooses a random element from items, where items is a list of tuples in
  the form (item, weight). weight determines the probability of choosing its
  respective item. Note: this function is borrowed from ActiveState Recipes.
  """
  weight_total = sum((item[1] for item in items))
  n = random.uniform(0, weight_total)
  for item, weight in items:
    if n < weight:
      return item
    n = n - weight
  return item

 def random_char():
  """
  Return a random character between ASCII 32 and 126 (i.e. spaces, symbols,
  letters, and digits). All characters returned will be nicely printable.
  """
  return chr(int(random.randrange(32, 126, 1)))

 def random_population():
  """
  Return a list of POP_SIZE individuals, each randomly generated via iterating
  DNA_SIZE times to generate a string of random characters with random_char().
  """
  pop = []
  for i in xrange(POP_SIZE):
    dna = ""
    for c in xrange(DNA_SIZE):
      dna += random_char()
    pop.append(dna)
  return pop

 #
 # GA functions
 # These make up the bulk of the actual GA algorithm.
 #

 def fitness(dna):
  """
  For each gene in the DNA, this function calculates the difference between
  it and the character in the same position in the OPTIMAL string. These values
  are summed and then returned.
  """
  fitness = 0
  for c in xrange(DNA_SIZE):
    fitness += abs(ord(dna[c]) - ord(OPTIMAL[c]))
  return fitness

 def mutate(dna):
  """
  For each gene in the DNA, there is a 1/mutation_chance chance that it will be
  switched out with a random character. This ensures diversity in the
  population, and ensures that is difficult to get stuck in local minima.
  """
  dna_out = ""
  mutation_chance = 100
  for c in xrange(DNA_SIZE):
    if int(random.random()*mutation_chance) == 1:
      dna_out += random_char()
    else:
      dna_out += dna[c]
  return dna_out

 def crossover(dna1, dna2):
  """
  Slices both dna1 and dna2 into two parts at a random index within their
  length and merges them. Both keep their initial sublist up to the crossover
  index, but their ends are swapped.
  """
  pos = int(random.random()*DNA_SIZE)
  return (dna1[:pos]+dna2[pos:], dna2[:pos]+dna1[pos:])

 #
 # Main driver
 # Generate a population and simulate GENERATIONS generations.
 #

 if __name__ == "__main__":
  # Generate initial population. This will create a list of POP_SIZE strings,
  # each initialized to a sequence of random characters.
  population = random_population()

  # Simulate all of the generations.
  for generation in xrange(GENERATIONS):
    print "Generation %s... Random sample: '%s'" % (generation, population[0])
    weighted_population = []

    # Add individuals and their respective fitness levels to the weighted
    # population list. This will be used to pull out individuals via certain
    # probabilities during the selection phase. Then, reset the population list
    # so we can repopulate it after selection.
    for individual in population:
      fitness_val = fitness(individual)

      # Generate the (individual,fitness) pair, taking in account whether or
      # not we will accidently divide by zero.
      if fitness_val == 0:
        pair = (individual, 1.0)
      else:
        pair = (individual, 1.0/fitness_val)

      weighted_population.append(pair)

    population = []

    # Select two random individuals, based on their fitness probabilites, cross
    # their genes over at a random point, mutate them, and add them back to the
    # population for the next iteration.
    for _ in xrange(POP_SIZE/2):
      # Selection
      ind1 = weighted_choice(weighted_population)
      ind2 = weighted_choice(weighted_population)

      # Crossover
      ind1, ind2 = crossover(ind1, ind2)

      # Mutate and add back into the population.
      population.append(mutate(ind1))
      population.append(mutate(ind2))

  # Display the highest-ranked string after all generations have been iterated
  # over. This will be the closest string to the OPTIMAL string, meaning it
  # will have the smallest fitness value. Finally, exit the program.
  fittest_string = population[0]
  minimum_fitness = fitness(population[0])

  for individual in population:
    ind_fitness = fitness(individual)
    if ind_fitness <= minimum_fitness:
      fittest_string = individual
      minimum_fitness = ind_fitness

  print "Fittest String: %s" % fittest_string
  exit(0)
	"""
	helloevolve.py implements a genetic algorithm that starts with a base
	population of randomly generated strings, iterates over a certain number of
	generations while implementing 'natural selection', and prints out the most fit
	string.

	The parameters of the simulation can be changed by modifying one of the many
	global variables. To change the "most fit" string, modify OPTIMAL. POP_SIZE
	controls the size of each generation, and GENERATIONS is the amount of
	generations that the simulation will loop through before returning the fittest
	string.

	This program subject to the terms of the BSD license listed below.

	---

	Copyright (c) 2011 Colin Drake

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights to
	use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	of the Software, and to permit persons to whom the Software is furnished to do
	so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
	"""

	import random

	#
	# Global variables
	# Setup optimal string and GA input variables.
	#

	OPTIMAL = "Hello, World"
	DNA_SIZE = len(OPTIMAL)
	POP_SIZE = 20
	GENERATIONS = 5000

	#
	# Helper functions
	# These are used as support, but aren't direct GA-specific functions.
	#

	def weighted_choice(items):
	"""
	Chooses a random element from items, where items is a list of tuples in
	the form (item, weight). weight determines the probability of choosing its
	respective item. Note: this function is borrowed from ActiveState Recipes.
	"""
	weight_total = sum((item[1] for item in items))
	n = random.uniform(0, weight_total)
	for item, weight in items:
	if n < weight:
	return item
	n = n - weight
	return item

	def random_char():
	"""
	Return a random character between ASCII 32 and 126 (i.e. spaces, symbols,
	letters, and digits). All characters returned will be nicely printable.
	"""
	return chr(int(random.randrange(32, 126, 1)))

	def random_population():
	"""
	Return a list of POP_SIZE individuals, each randomly generated via iterating
	DNA_SIZE times to generate a string of random characters with random_char().
	"""
	pop = []
	for i in xrange(POP_SIZE):
	dna = ""
	for c in xrange(DNA_SIZE):
	dna += random_char()
	pop.append(dna)
	return pop

	#
	# GA functions
	# These make up the bulk of the actual GA algorithm.
	#

	def fitness(dna):
	"""
	For each gene in the DNA, this function calculates the difference between
	it and the character in the same position in the OPTIMAL string. These values
	are summed and then returned.
	"""
	fitness = 0
	for c in xrange(DNA_SIZE):
	fitness += abs(ord(dna[c]) - ord(OPTIMAL[c]))
	return fitness

	def mutate(dna):
	"""
	For each gene in the DNA, there is a 1/mutation_chance chance that it will be
	switched out with a random character. This ensures diversity in the
	population, and ensures that is difficult to get stuck in local minima.
	"""
	dna_out = ""
	mutation_chance = 100
	for c in xrange(DNA_SIZE):
	if int(random.random()*mutation_chance) == 1:
	dna_out += random_char()
	else:
	dna_out += dna[c]
	return dna_out

	def crossover(dna1, dna2):
	"""
	Slices both dna1 and dna2 into two parts at a random index within their
	length and merges them. Both keep their initial sublist up to the crossover
	index, but their ends are swapped.
	"""
	pos = int(random.random()*DNA_SIZE)
	return (dna1[:pos]+dna2[pos:], dna2[:pos]+dna1[pos:])

	#
	# Main driver
	# Generate a population and simulate GENERATIONS generations.
	#

	if __name__ == "__main__":
	# Generate initial population. This will create a list of POP_SIZE strings,
	# each initialized to a sequence of random characters.
	population = random_population()

	# Simulate all of the generations.
	for generation in xrange(GENERATIONS):
	print "Generation %s... Random sample: '%s'" % (generation, population[0])
	weighted_population = []

	# Add individuals and their respective fitness levels to the weighted
	# population list. This will be used to pull out individuals via certain
	# probabilities during the selection phase. Then, reset the population list
	# so we can repopulate it after selection.
	for individual in population:
	fitness_val = fitness(individual)

	# Generate the (individual,fitness) pair, taking in account whether or
	# not we will accidently divide by zero.
	if fitness_val == 0:
	pair = (individual, 1.0)
	else:
	pair = (individual, 1.0/fitness_val)

	weighted_population.append(pair)

	population = []

	# Select two random individuals, based on their fitness probabilites, cross
	# their genes over at a random point, mutate them, and add them back to the
	# population for the next iteration.
	for _ in xrange(POP_SIZE/2):
	# Selection
	ind1 = weighted_choice(weighted_population)
	ind2 = weighted_choice(weighted_population)

	# Crossover
	ind1, ind2 = crossover(ind1, ind2)

	# Mutate and add back into the population.
	population.append(mutate(ind1))
	population.append(mutate(ind2))

	# Display the highest-ranked string after all generations have been iterated
	# over. This will be the closest string to the OPTIMAL string, meaning it
	# will have the smallest fitness value. Finally, exit the program.
	fittest_string = population[0]
	minimum_fitness = fitness(population[0])

	for individual in population:
	ind_fitness = fitness(individual)
	if ind_fitness <= minimum_fitness:
	fittest_string = individual
	minimum_fitness = ind_fitness

	print "Fittest String: %s" % fittest_string
	exit(0)