foolnotion · January 30, 2020 02:18 · DMTSource · Dec 30, 2019 · rbbgit · Jan 23, 2020
diff --git a/deap-symreg.py b/deap-symreg.py
 #    This file is part of EAP.
 #
 #    EAP is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU Lesser General Public License as
 #    published by the Free Software Foundation, either version 3 of
 #    the License, or (at your option) any later version.
 #
 #    EAP is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 #    GNU Lesser General Public License for more details.
 #
 #    You should have received a copy of the GNU Lesser General Public
 #    License along with EAP. If not, see <http://www.gnu.org/licenses/>.

 import operator
 import math
 import random
 import warnings # suppress some warnings related to invalid values

 import numpy as np
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import r2_score
 from sklearn.metrics import mean_squared_error
 import multiprocessing
 import timeit

 from deap import algorithms
 from deap import base
 from deap import creator
 from deap import tools
 from deap import gp


 def evalSymbReg(individual, pset, X_train, y_train):
    # Transform the tree expression in a callable function
    func = gp.compile(expr=individual, pset=pset)
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        y_pred = np.array([ func(*x) for x in X_train ])
        min_ = np.nanmin(y_pred)
        max_ = np.nanmax(y_pred)
        
        if ~np.isfinite(min_) or ~np.isfinite(max_):
            return 0,
        
        mid_ = (min_ + max_) / 2
        np.nan_to_num(y_pred, copy=False, nan=mid_, posinf=mid_, neginf=mid_)
        fit = r2_score(y_train, y_pred)
        
        if ~np.isfinite(fit):
            fit = 0
        
        return fit,

 # load data
 df = pd.read_csv('./data/Poly-10.csv', sep=',')
 X = df.iloc[:,:-1].to_numpy()
 y = df.iloc[:,-1].to_numpy()

 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1234)
 _, cols = X_train.shape

 # set static height limit for all generated trees
 pset = gp.PrimitiveSet("MAIN", cols)
 pset.addPrimitive(np.add, 2, name="vadd")
 pset.addPrimitive(np.subtract, 2, name="vsub")
 pset.addPrimitive(np.multiply, 2, name="vmul")
 pset.addPrimitive(np.divide, 2, name="vdiv")
 pset.addPrimitive(np.negative, 1, name="vneg")
 pset.addPrimitive(np.cos, 1, name="vcos")
 pset.addPrimitive(np.sin, 1, name="vsin")
 pset.addPrimitive(np.exp, 1, name="vexp")
 pset.addPrimitive(np.log, 1, name="vlog")
 pset.addEphemeralConstant("rand101", lambda: np.random.uniform(-1.0, 1.0))
    
 creator.create("FitnessMin", base.Fitness, weights=(1.0,))
 creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

 maxHeight = 10
 maxLength = 50

 toolbox = base.Toolbox()
 toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=maxHeight)
 toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
 toolbox.register("population", tools.initRepeat, list, toolbox.individual)
 toolbox.register("evaluate", evalSymbReg, pset=pset, X_train=X_train, y_train=y_train)
 toolbox.register("select", tools.selTournament, tournsize=5)

 limitHeight = gp.staticLimit(operator.attrgetter('height'), maxHeight)
 limitLength = gp.staticLimit(len, maxLength)

 mutOperators = [ gp.mutUniform ]

 def mutOperator(*args, **kwargs):
    mut = np.random.choice(mutOperators)
    return mut(*args, **kwargs)
               

 toolbox.register("mate", gp.cxOnePoint)
 toolbox.decorate("mate", limitHeight)
 toolbox.decorate("mate", limitLength)
 toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
 toolbox.register('mutate', mutOperator, expr=toolbox.expr_mut, pset=pset) 
 toolbox.decorate("mutate", limitHeight)
 toolbox.decorate("mutate", limitLength)
        
    
 def main():
    np.seterr(all='ignore')
    random.seed(318)
    
    pool = multiprocessing.Pool()
    toolbox.register("map", pool.map)


    pop = toolbox.population(n=1000)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.nanmean)
    stats.register("std", np.nanstd)
    stats.register("min", np.nanmin)
    stats.register("max", np.nanmax)
    
    algorithms.eaSimple(pop, toolbox, cxpb=1, mutpb=0.25, ngen=100, stats=stats, halloffame=hof)

    return pop, stats, hof

 if __name__ == "__main__":
    print(timeit.timeit(stmt=main, number=1))
	# This file is part of EAP.
	#
	# EAP is free software: you can redistribute it and/or modify
	# it under the terms of the GNU Lesser General Public License as
	# published by the Free Software Foundation, either version 3 of
	# the License, or (at your option) any later version.
	#
	# EAP is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with EAP. If not, see <http://www.gnu.org/licenses/>.

	import operator
	import math
	import random
	import warnings # suppress some warnings related to invalid values

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import r2_score
	from sklearn.metrics import mean_squared_error
	import multiprocessing
	import timeit

	from deap import algorithms
	from deap import base
	from deap import creator
	from deap import tools
	from deap import gp


	def evalSymbReg(individual, pset, X_train, y_train):
	# Transform the tree expression in a callable function
	func = gp.compile(expr=individual, pset=pset)

	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	y_pred = np.array([ func(*x) for x in X_train ])
	min_ = np.nanmin(y_pred)
	max_ = np.nanmax(y_pred)

	if ~np.isfinite(min_) or ~np.isfinite(max_):
	return 0,

	mid_ = (min_ + max_) / 2
	np.nan_to_num(y_pred, copy=False, nan=mid_, posinf=mid_, neginf=mid_)
	fit = r2_score(y_train, y_pred)

	if ~np.isfinite(fit):
	fit = 0

	return fit,

	# load data
	df = pd.read_csv('./data/Poly-10.csv', sep=',')
	X = df.iloc[:,:-1].to_numpy()
	y = df.iloc[:,-1].to_numpy()

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1234)
	_, cols = X_train.shape

	# set static height limit for all generated trees
	pset = gp.PrimitiveSet("MAIN", cols)
	pset.addPrimitive(np.add, 2, name="vadd")
	pset.addPrimitive(np.subtract, 2, name="vsub")
	pset.addPrimitive(np.multiply, 2, name="vmul")
	pset.addPrimitive(np.divide, 2, name="vdiv")
	pset.addPrimitive(np.negative, 1, name="vneg")
	pset.addPrimitive(np.cos, 1, name="vcos")
	pset.addPrimitive(np.sin, 1, name="vsin")
	pset.addPrimitive(np.exp, 1, name="vexp")
	pset.addPrimitive(np.log, 1, name="vlog")
	pset.addEphemeralConstant("rand101", lambda: np.random.uniform(-1.0, 1.0))

	creator.create("FitnessMin", base.Fitness, weights=(1.0,))
	creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

	maxHeight = 10
	maxLength = 50

	toolbox = base.Toolbox()
	toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=maxHeight)
	toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
	toolbox.register("population", tools.initRepeat, list, toolbox.individual)
	toolbox.register("evaluate", evalSymbReg, pset=pset, X_train=X_train, y_train=y_train)
	toolbox.register("select", tools.selTournament, tournsize=5)

	limitHeight = gp.staticLimit(operator.attrgetter('height'), maxHeight)
	limitLength = gp.staticLimit(len, maxLength)

	mutOperators = [ gp.mutUniform ]

	def mutOperator(args, *kwargs):
	mut = np.random.choice(mutOperators)
	return mut(args, *kwargs)


	toolbox.register("mate", gp.cxOnePoint)
	toolbox.decorate("mate", limitHeight)
	toolbox.decorate("mate", limitLength)
	toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
	toolbox.register('mutate', mutOperator, expr=toolbox.expr_mut, pset=pset)
	toolbox.decorate("mutate", limitHeight)
	toolbox.decorate("mutate", limitLength)


	def main():
	np.seterr(all='ignore')
	random.seed(318)

	pool = multiprocessing.Pool()
	toolbox.register("map", pool.map)


	pop = toolbox.population(n=1000)
	hof = tools.HallOfFame(1)
	stats = tools.Statistics(lambda ind: ind.fitness.values)
	stats.register("avg", np.nanmean)
	stats.register("std", np.nanstd)
	stats.register("min", np.nanmin)
	stats.register("max", np.nanmax)

	algorithms.eaSimple(pop, toolbox, cxpb=1, mutpb=0.25, ngen=100, stats=stats, halloffame=hof)

	return pop, stats, hof

	if __name__ == "__main__":
	print(timeit.timeit(stmt=main, number=1))