This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We might use this array to set absolute boundaries | |
universe_constraints = df.describe().loc[["min","max","std"],:] | |
# We might use this array to set values on specific features | |
constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature") | |
# We define the number of individuals at each generation and the selected number | |
generation_size = 100 | |
population_out_size = 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model): | |
# The names, min-max and number of features are extracted from the DataFrame | |
features_names = population_in.columns | |
features_nb = population_in.shape[1] | |
replication_factor = generation_size // population_in.shape[0] | |
# We replicate the population_in according to the replication factor | |
new_generation = pd.concat([population_in]*replication_factor, ignore_index=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We might use this array to set values on specific features | |
constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature") | |
# We define the number of individuals at each generation and the selected number | |
generation_size = 100 | |
population_out_size = 10 | |
# We initiate the 1st population, based on the original dataset features | |
starting_population = generate_min_max_population(df.drop("Y", axis=1), constraints, generation_size) | |
features_names = starting_population.columns |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def min_max_select(constraints, population_in, features_names, generation_size, population_out_size, target, model): | |
# We create a new generation, based on the input population characteristics | |
new_generation = generate_min_max_population(population_in, constraints, generation_size) | |
# We append the original population to the new generation to keep the best | |
# individuals of these two DataFrames | |
new_generation = new_generation.append(population_in, ignore_index=True) | |
# We calculate Y thanks to the model and the distance from target |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_min_max_population(df, constraints, generation_size): | |
# The names, min-max and number of features are extracted from the DataFrame | |
features_nb = df.shape[1] | |
features_names = df.columns | |
df_min_max = df.describe().loc[["min","max"],:] | |
# We initialize the new population DataFrame with zeros | |
new_population = pd.DataFrame(np.zeros((generation_size,features_nb)), columns=features_names) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
target = 42 | |
population["Y"] = RFR.predict(population) | |
population["target_distance"]=abs(population["Y"]-np.ones(population_size)*target) | |
population |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We store the describe() results inside a dataframe | |
df_describe = df.describe() | |
display(df_describe) | |
# We define the parameters of the virtual population we generate | |
population_size = 1000 | |
features_names = df.columns[:-1] | |
n_features = len(features_names) | |
# As an example, we assign a constant value for the third most important characteristic |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_feature_importances = pd.DataFrame(RFR.feature_importances_, | |
columns=["Importance"], | |
index=col_names) | |
df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False) | |
df_feature_importances |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestRegressor | |
RFR = RandomForestRegressor() | |
RFR.fit(X, y) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import time | |
from sklearn.datasets import make_regression | |
n_samples = 1000 | |
n_features = 10 | |
n_informative = 3 |
NewerOlder