kperry2215 · October 23, 2019 01:41
diff --git a/tpot_automated_pipeline.py b/tpot_automated_pipeline.py
 import numpy as np
 import pandas as pd
 from sklearn.ensemble import ExtraTreesClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import BernoulliNB
 from sklearn.pipeline import make_pipeline, make_union
 from sklearn.preprocessing import RobustScaler
 from tpot.builtins import StackingEstimator

 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1).values
 training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=None)

 # Average CV score on the training set was:0.7615725359911407
 exported_pipeline = make_pipeline(
    StackingEstimator(estimator=BernoulliNB(alpha=0.001, fit_prior=False)),
    RobustScaler(),
    RobustScaler(),
    StackingEstimator(estimator=ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.35000000000000003, min_samples_leaf=17, min_samples_split=6, n_estimators=100)),
    StackingEstimator(estimator=BernoulliNB(alpha=1.0, fit_prior=False)),
    RobustScaler(),
    ExtraTreesClassifier(bootstrap=False, criterion="entropy", max_features=0.55, min_samples_leaf=6, min_samples_split=19, n_estimators=100))

 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
	import numpy as np
	import pandas as pd
	from sklearn.ensemble import ExtraTreesClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import BernoulliNB
	from sklearn.pipeline import make_pipeline, make_union
	from sklearn.preprocessing import RobustScaler
	from tpot.builtins import StackingEstimator

	# NOTE: Make sure that the class is labeled 'target' in the data file
	tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
	features = tpot_data.drop('target', axis=1).values
	training_features, testing_features, training_target, testing_target = \
	train_test_split(features, tpot_data['target'].values, random_state=None)

	# Average CV score on the training set was:0.7615725359911407
	exported_pipeline = make_pipeline(
	StackingEstimator(estimator=BernoulliNB(alpha=0.001, fit_prior=False)),
	RobustScaler(),
	RobustScaler(),
	StackingEstimator(estimator=ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.35000000000000003, min_samples_leaf=17, min_samples_split=6, n_estimators=100)),
	StackingEstimator(estimator=BernoulliNB(alpha=1.0, fit_prior=False)),
	RobustScaler(),
	ExtraTreesClassifier(bootstrap=False, criterion="entropy", max_features=0.55, min_samples_leaf=6, min_samples_split=19, n_estimators=100))

	exported_pipeline.fit(training_features, training_target)
	results = exported_pipeline.predict(testing_features)