Pierre-Louis BESCOND pierrelouisbescond

Head of Data & Advanced Analytics

pierrelouisbescond / std_dev_selection_process.py

Created May 19, 2020 04:12

	# We might use this array to set absolute boundaries
	universe_constraints = df.describe().loc[["min","max","std"],:]

	# We might use this array to set values on specific features
	constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")

	# We define the number of individuals at each generation and the selected number
	generation_size = 100
	population_out_size = 10

pierrelouisbescond / std_dev_select.py

Created May 19, 2020 04:11

	def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model):

	# The names, min-max and number of features are extracted from the DataFrame
	features_names = population_in.columns
	features_nb = population_in.shape[1]
	replication_factor = generation_size // population_in.shape[0]

	# We replicate the population_in according to the replication factor
	new_generation = pd.concat([population_in]*replication_factor, ignore_index=True)

pierrelouisbescond / min_max_selection_process.py

Created May 18, 2020 11:44

	# We might use this array to set values on specific features
	constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")

	# We define the number of individuals at each generation and the selected number
	generation_size = 100
	population_out_size = 10

	# We initiate the 1st population, based on the original dataset features
	starting_population = generate_min_max_population(df.drop("Y", axis=1), constraints, generation_size)
	features_names = starting_population.columns

pierrelouisbescond / min_max_select.py

Created May 18, 2020 11:41

	def min_max_select(constraints, population_in, features_names, generation_size, population_out_size, target, model):

	# We create a new generation, based on the input population characteristics
	new_generation = generate_min_max_population(population_in, constraints, generation_size)

	# We append the original population to the new generation to keep the best
	# individuals of these two DataFrames
	new_generation = new_generation.append(population_in, ignore_index=True)

	# We calculate Y thanks to the model and the distance from target

pierrelouisbescond / generate_min_max_population.py

Created May 18, 2020 11:40

	def generate_min_max_population(df, constraints, generation_size):

	# The names, min-max and number of features are extracted from the DataFrame
	features_nb = df.shape[1]
	features_names = df.columns
	df_min_max = df.describe().loc[["min","max"],:]

	# We initialize the new population DataFrame with zeros
	new_population = pd.DataFrame(np.zeros((generation_size,features_nb)), columns=features_names)

pierrelouisbescond / target_distance_calculation.py

Last active May 9, 2020 15:16

	target = 42

	population["Y"] = RFR.predict(population)
	population["target_distance"]=abs(population["Y"]-np.ones(population_size)*target)

	population

pierrelouisbescond / virtual_population_generation_1.py

Last active May 21, 2020 16:46

	# We store the describe() results inside a dataframe
	df_describe = df.describe()
	display(df_describe)

	# We define the parameters of the virtual population we generate
	population_size = 1000
	features_names = df.columns[:-1]
	n_features = len(features_names)

	# As an example, we assign a constant value for the third most important characteristic

pierrelouisbescond / feature_importances.py

Last active May 9, 2020 15:15

	df_feature_importances = pd.DataFrame(RFR.feature_importances_,
	columns=["Importance"],
	index=col_names)
	df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)
	df_feature_importances

pierrelouisbescond / create_non_linear_model.py

Last active May 9, 2020 15:15

	from sklearn.ensemble import RandomForestRegressor
	RFR = RandomForestRegressor()
	RFR.fit(X, y)

pierrelouisbescond / make_regression.py

Last active May 18, 2020 11:37

	import pandas as pd
	import numpy as np
	import time

	from sklearn.datasets import make_regression

	n_samples = 1000
	n_features = 10
	n_informative = 3