Last active
January 30, 2020 02:18
-
-
Save foolnotion/5a99ae8c76c27de614320ac272ebe010 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file is part of EAP. | |
# | |
# EAP is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU Lesser General Public License as | |
# published by the Free Software Foundation, either version 3 of | |
# the License, or (at your option) any later version. | |
# | |
# EAP is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU Lesser General Public License for more details. | |
# | |
# You should have received a copy of the GNU Lesser General Public | |
# License along with EAP. If not, see <http://www.gnu.org/licenses/>. | |
import operator | |
import math | |
import random | |
import warnings # suppress some warnings related to invalid values | |
import numpy as np | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import r2_score | |
from sklearn.metrics import mean_squared_error | |
import multiprocessing | |
import timeit | |
from deap import algorithms | |
from deap import base | |
from deap import creator | |
from deap import tools | |
from deap import gp | |
def evalSymbReg(individual, pset, X_train, y_train): | |
# Transform the tree expression in a callable function | |
func = gp.compile(expr=individual, pset=pset) | |
with warnings.catch_warnings(): | |
warnings.simplefilter("ignore") | |
y_pred = np.array([ func(*x) for x in X_train ]) | |
min_ = np.nanmin(y_pred) | |
max_ = np.nanmax(y_pred) | |
if ~np.isfinite(min_) or ~np.isfinite(max_): | |
return 0, | |
mid_ = (min_ + max_) / 2 | |
np.nan_to_num(y_pred, copy=False, nan=mid_, posinf=mid_, neginf=mid_) | |
fit = r2_score(y_train, y_pred) | |
if ~np.isfinite(fit): | |
fit = 0 | |
return fit, | |
# load data | |
df = pd.read_csv('./data/Poly-10.csv', sep=',') | |
X = df.iloc[:,:-1].to_numpy() | |
y = df.iloc[:,-1].to_numpy() | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1234) | |
_, cols = X_train.shape | |
# set static height limit for all generated trees | |
pset = gp.PrimitiveSet("MAIN", cols) | |
pset.addPrimitive(np.add, 2, name="vadd") | |
pset.addPrimitive(np.subtract, 2, name="vsub") | |
pset.addPrimitive(np.multiply, 2, name="vmul") | |
pset.addPrimitive(np.divide, 2, name="vdiv") | |
pset.addPrimitive(np.negative, 1, name="vneg") | |
pset.addPrimitive(np.cos, 1, name="vcos") | |
pset.addPrimitive(np.sin, 1, name="vsin") | |
pset.addPrimitive(np.exp, 1, name="vexp") | |
pset.addPrimitive(np.log, 1, name="vlog") | |
pset.addEphemeralConstant("rand101", lambda: np.random.uniform(-1.0, 1.0)) | |
creator.create("FitnessMin", base.Fitness, weights=(1.0,)) | |
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) | |
maxHeight = 10 | |
maxLength = 50 | |
toolbox = base.Toolbox() | |
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=maxHeight) | |
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) | |
toolbox.register("population", tools.initRepeat, list, toolbox.individual) | |
toolbox.register("evaluate", evalSymbReg, pset=pset, X_train=X_train, y_train=y_train) | |
toolbox.register("select", tools.selTournament, tournsize=5) | |
limitHeight = gp.staticLimit(operator.attrgetter('height'), maxHeight) | |
limitLength = gp.staticLimit(len, maxLength) | |
mutOperators = [ gp.mutUniform ] | |
def mutOperator(*args, **kwargs): | |
mut = np.random.choice(mutOperators) | |
return mut(*args, **kwargs) | |
toolbox.register("mate", gp.cxOnePoint) | |
toolbox.decorate("mate", limitHeight) | |
toolbox.decorate("mate", limitLength) | |
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2) | |
toolbox.register('mutate', mutOperator, expr=toolbox.expr_mut, pset=pset) | |
toolbox.decorate("mutate", limitHeight) | |
toolbox.decorate("mutate", limitLength) | |
def main(): | |
np.seterr(all='ignore') | |
random.seed(318) | |
pool = multiprocessing.Pool() | |
toolbox.register("map", pool.map) | |
pop = toolbox.population(n=1000) | |
hof = tools.HallOfFame(1) | |
stats = tools.Statistics(lambda ind: ind.fitness.values) | |
stats.register("avg", np.nanmean) | |
stats.register("std", np.nanstd) | |
stats.register("min", np.nanmin) | |
stats.register("max", np.nanmax) | |
algorithms.eaSimple(pop, toolbox, cxpb=1, mutpb=0.25, ngen=100, stats=stats, halloffame=hof) | |
return pop, stats, hof | |
if __name__ == "__main__": | |
print(timeit.timeit(stmt=main, number=1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I agree with foolnotion about evolvability and a desire to keep 'dead genes' around as complexity can pay off down the line. The above code utilizes length and depth operator limits in order to manage the progress of this grown over man generations:
Additionally, there are many other methods you can use in addition to this to manage size(so many white papers to read) if that is an issue perhaps many generations into the evolutions and problems start to arise because of individual size.
One example is using a double tournmanet:
https://deap.readthedocs.io/en/master/api/tools.html#deap.tools.selDoubleTournament
Another brute force method may be more... messy, like having your mutation operator randomly choose between the normal mutation, and something like a shrink operator with a growing probability as the size reaches some defines limit that we don't necessarily want to converge too...this encourages the evolvability up to a point that we happen to know already.
deap.gp.shrink
https://deap.readthedocs.io/en/master/api/tools.html#deap.gp.mutShrink
Here is an example of that...but this is sorta tricky to really tune so be wary of slapping it into your code: