kperry2215 · October 23, 2019 01:40
diff --git a/run_tpot_automl.py b/run_tpot_automl.py
 def run_tpot_automl(dataframe, 
                    variable_to_predict, 
                    number_generations,
                    file_to_export_pipeline_to = 'tpot_classifier_pipeline.py'):
    """
    This function runs a TPOT classifier on the dataset, after splitting into
    a training and test set, and then oversampling the training set.
    Args:
        dataframe: pandas dataframe. Master dataframe containing the feature and target
        data
        variable_to_predict: String. Name of the target variable that we want to predict.
        number_of_generations: Int. Number of generations to iterate through.
    Outputs:
        File containing the machine learning pipeline for the best performing model.
    """
    #Remvoe the target column to get the features dataframe
    features_dataframe = dataframe.loc[:, dataframe.columns != variable_to_predict]
    X_train, X_test, y_train, y_test = train_test_split(features_dataframe, dataframe[variable_to_predict],
                                                    train_size=0.75, test_size=0.25)
    #Run the TPOT pipeline
    tpot = TPOTClassifier(generations= number_generations, population_size=20, verbosity=2)
    tpot.fit(X_train, y_train)
    print(tpot.score(X_test, y_test))
    tpot.export(file_to_export_pipeline_to)

 #################################################################################################
 #Run in main block
 run_tpot_automl(dataframe =  df_label_encoded, 
                    variable_to_predict = 'Class', 
                    number_generations =10)
	def run_tpot_automl(dataframe,
	variable_to_predict,
	number_generations,
	file_to_export_pipeline_to = 'tpot_classifier_pipeline.py'):
	"""
	This function runs a TPOT classifier on the dataset, after splitting into
	a training and test set, and then oversampling the training set.
	Args:
	dataframe: pandas dataframe. Master dataframe containing the feature and target
	data
	variable_to_predict: String. Name of the target variable that we want to predict.
	number_of_generations: Int. Number of generations to iterate through.
	Outputs:
	File containing the machine learning pipeline for the best performing model.
	"""
	#Remvoe the target column to get the features dataframe
	features_dataframe = dataframe.loc[:, dataframe.columns != variable_to_predict]
	X_train, X_test, y_train, y_test = train_test_split(features_dataframe, dataframe[variable_to_predict],
	train_size=0.75, test_size=0.25)
	#Run the TPOT pipeline
	tpot = TPOTClassifier(generations= number_generations, population_size=20, verbosity=2)
	tpot.fit(X_train, y_train)
	print(tpot.score(X_test, y_test))
	tpot.export(file_to_export_pipeline_to)

	#################################################################################################
	#Run in main block
	run_tpot_automl(dataframe = df_label_encoded,
	variable_to_predict = 'Class',
	number_generations =10)