Created
October 23, 2019 01:40
-
-
Save kperry2215/5afea2976bc4aff114e21163d02ff802 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def run_tpot_automl(dataframe, | |
variable_to_predict, | |
number_generations, | |
file_to_export_pipeline_to = 'tpot_classifier_pipeline.py'): | |
""" | |
This function runs a TPOT classifier on the dataset, after splitting into | |
a training and test set, and then oversampling the training set. | |
Args: | |
dataframe: pandas dataframe. Master dataframe containing the feature and target | |
data | |
variable_to_predict: String. Name of the target variable that we want to predict. | |
number_of_generations: Int. Number of generations to iterate through. | |
Outputs: | |
File containing the machine learning pipeline for the best performing model. | |
""" | |
#Remvoe the target column to get the features dataframe | |
features_dataframe = dataframe.loc[:, dataframe.columns != variable_to_predict] | |
X_train, X_test, y_train, y_test = train_test_split(features_dataframe, dataframe[variable_to_predict], | |
train_size=0.75, test_size=0.25) | |
#Run the TPOT pipeline | |
tpot = TPOTClassifier(generations= number_generations, population_size=20, verbosity=2) | |
tpot.fit(X_train, y_train) | |
print(tpot.score(X_test, y_test)) | |
tpot.export(file_to_export_pipeline_to) | |
################################################################################################# | |
#Run in main block | |
run_tpot_automl(dataframe = df_label_encoded, | |
variable_to_predict = 'Class', | |
number_generations =10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment