Last active
June 27, 2020 17:06
-
-
Save tobitech/411d6b63dde22b3932bdc92ea58c4581 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.tree import DecisionTreeClassifier | |
music_data = pd.read_csv('music.csv') | |
X = music_data.drop(columns=['genre']) | |
y = music_data['genre'] | |
model = DecisionTreeClassifier() | |
model.fit(X, y) | |
predictions = model.predict([ [21, 1], [22, 0] ]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score | |
music_data = pd.read_csv('music.csv') # this returns a data frame similar to an excel spreadsheet | |
X = music_data.drop(columns=['genre']) | |
y = music_data['genre'] # using `[]` we can get all the values in a given column | |
# we are allocating 20% of our data for testing. this returns a tuple | |
# we will unpack the tuple | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | |
model = DecisionTreeClassifier() | |
# now we need to train the model | |
# model.fit(X, y) # this takes two parameters: input set and output set. | |
# unlike before, instead of passing the entire data set, we want to pass only the training data set | |
model.fit(X_train, y_train) | |
# this takes two dimensional array. | |
# we are asking our model to make two predictios at the same time. | |
# passing it two input set, 21 yr old male and 22 yr old female | |
# predictions = model.predict([ [21, 1], [22, 0] ]) | |
predictions = model.predict(X_test) # X_test is the data set that contains input values for testing | |
# to calculate accuracy | |
# we need to compare the preditions we have with our actual value from output set for testing | |
# this take two arguments, the expected values and the predictions | |
# this returns an accuracy score between 0 to 1 | |
score = accuracy_score(y_test, predictions) | |
score # returns 1.0 which is 100% accurate |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.tree import DecisionTreeClassifier | |
import joblib | |
# import data for model | |
music_data = pd.read_csv('music.csv') | |
X = music_data.drop(columns=['genre']) | |
y = music_data['genre'] | |
# train it | |
model = DecisionTreeClassifier() | |
model.fit(X, y) | |
# persist the model | |
joblib.dump(model, 'music-recommender.joblib') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.tree import DecisionTreeClassifier | |
import joblib | |
# this returns our trained model | |
model = joblib.load('music-recommender.joblib') | |
# ask it to make predictions | |
predictions = model.predict([ [21, 1] ]) | |
predictions |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Visualize model decision tree in a graph | |
import pandas as pd | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn import tree | |
music_data = pd.read_csv('music.csv') | |
X = music_data.drop(columns=['genre']) | |
y = music_data['genre'] | |
model = DecisionTreeClassifier() | |
model.fit(X, y) | |
# .dot format is the graph description language | |
tree.export_graphviz( | |
model, out_file='music-recommender.dot', | |
feature_names=['age', 'gender'], | |
class_names=sorted(y.unique()), | |
label='all', | |
rounded=True, | |
filled=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment