Last active
October 30, 2017 00:28
-
-
Save X4/8d57caedae6197f9905f73b554ea449d to your computer and use it in GitHub Desktop.
Building a SciKit-Learn Pipeline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make ML-Pipeline Runs Reproducible | |
random_state=7 | |
# Testing our Pipeline with an ML-Ready Dataset | |
from sklearn.datasets import load_iris | |
iris = load_iris() | |
# Assign ML-Input and Ml-Target | |
Xi = iris.data # Input | |
yi = iris.target # Output | |
# Split Dataset into train/test for input/output | |
Xi_train, Xi_test, yi_train, yi_test = train_test_split(Xi, yi, random_state) | |
# loading ml libraries | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import RandomForestClassifier | |
# loading visualisation libraries | |
%matplotlib inline | |
import matplotlib.pyplot as plt | |
import scikitplot.plotters as skplt | |
# instantiate learning models | |
knn = KNeighborsClassifier(n_neighbors=2, random_state) | |
tree = DecisionTreeClassifier(max_leaf_nodes=3, random_state) | |
forest = RandomForestClassifier(n_estimators = 500, n_jobs = -1, random_state) | |
# train the models | |
knn.fit(Xi_train, yi_train) | |
tree.fit(Xi_train, yi_train) | |
forest.fit(Xi_train, yi_train) | |
# create prediction probabilities | |
pred1 = knn.predict(Xi_test) | |
pred1 = tree.predict(Xi_test) | |
pred1 = forest.predict(Xi_test) | |
# visualize learning curve | |
skplt.plot_learning_curve(knn, Xi_test, yi_test) | |
skplt.plot_learning_curve(tree, Xi_test, yi_test) | |
skplt.plot_learning_curve(forest, Xi_test, yi_test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment