tobitech · June 27, 2020 17:06
diff --git a/music1.txt b/music1.txt
 import pandas as pd
 from sklearn.tree import DecisionTreeClassifier

 music_data = pd.read_csv('music.csv')
 X = music_data.drop(columns=['genre'])
 y = music_data['genre']

 model = DecisionTreeClassifier()
 model.fit(X, y)

 predictions = model.predict([ [21, 1], [22, 0] ])
diff --git a/music2.txt b/music2.txt
 import pandas as pd
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score

 music_data = pd.read_csv('music.csv')  # this returns a data frame similar to an excel spreadsheet
 X = music_data.drop(columns=['genre'])
 y = music_data['genre']  # using `[]` we can get all the values in a given column

 # we are allocating 20% of our data for testing. this returns a tuple
 # we will unpack the tuple
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


 model = DecisionTreeClassifier()
 # now we need to train the model
 # model.fit(X, y)  # this takes two parameters: input set and output set.

 # unlike before, instead of passing the entire data set, we want to pass only the training data set
 model.fit(X_train, y_train)

 # this takes two dimensional array. 
 # we are asking our model to make two predictios at the same time.
 # passing it two input set, 21 yr old male and 22 yr old female
 # predictions = model.predict([ [21, 1], [22, 0] ])
 predictions = model.predict(X_test)  # X_test is the data set that contains input values for testing

 # to calculate accuracy
 # we need to compare the preditions we have with our actual value from output set for testing
 # this take two arguments, the expected values and the predictions
 # this returns an accuracy score between 0 to 1
 score = accuracy_score(y_test, predictions)
 score  # returns 1.0 which is 100% accurate
diff --git a/music3.txt b/music3.txt
 import pandas as pd
 from sklearn.tree import DecisionTreeClassifier
 import joblib

 # import data for model
 music_data = pd.read_csv('music.csv')
 X = music_data.drop(columns=['genre'])
 y = music_data['genre']

 # train it
 model = DecisionTreeClassifier()
 model.fit(X, y)

 # persist the model
 joblib.dump(model, 'music-recommender.joblib')
diff --git a/music4.txt b/music4.txt
 import pandas as pd
 from sklearn.tree import DecisionTreeClassifier
 import joblib

 # this returns our trained model
 model = joblib.load('music-recommender.joblib')

 # ask it to make predictions
 predictions = model.predict([ [21, 1] ])
 predictions
diff --git a/music5.txt b/music5.txt
 # Visualize model decision tree in a graph

 import pandas as pd
 from sklearn.tree import DecisionTreeClassifier
 from sklearn import tree

 music_data = pd.read_csv('music.csv')
 X = music_data.drop(columns=['genre'])
 y = music_data['genre']

 model = DecisionTreeClassifier()
 model.fit(X, y)

 # .dot format is the graph description language
 tree.export_graphviz(
    model, out_file='music-recommender.dot', 
    feature_names=['age', 'gender'], 
    class_names=sorted(y.unique()), 
    label='all', 
    rounded=True, 
    filled=True)
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier

	music_data = pd.read_csv('music.csv')
	X = music_data.drop(columns=['genre'])
	y = music_data['genre']

	model = DecisionTreeClassifier()
	model.fit(X, y)

	predictions = model.predict([ [21, 1], [22, 0] ])
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score

	music_data = pd.read_csv('music.csv') # this returns a data frame similar to an excel spreadsheet
	X = music_data.drop(columns=['genre'])
	y = music_data['genre'] # using `[]` we can get all the values in a given column

	# we are allocating 20% of our data for testing. this returns a tuple
	# we will unpack the tuple
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


	model = DecisionTreeClassifier()
	# now we need to train the model
	# model.fit(X, y) # this takes two parameters: input set and output set.

	# unlike before, instead of passing the entire data set, we want to pass only the training data set
	model.fit(X_train, y_train)

	# this takes two dimensional array.
	# we are asking our model to make two predictios at the same time.
	# passing it two input set, 21 yr old male and 22 yr old female
	# predictions = model.predict([ [21, 1], [22, 0] ])
	predictions = model.predict(X_test) # X_test is the data set that contains input values for testing

	# to calculate accuracy
	# we need to compare the preditions we have with our actual value from output set for testing
	# this take two arguments, the expected values and the predictions
	# this returns an accuracy score between 0 to 1
	score = accuracy_score(y_test, predictions)
	score # returns 1.0 which is 100% accurate
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	import joblib

	# import data for model
	music_data = pd.read_csv('music.csv')
	X = music_data.drop(columns=['genre'])
	y = music_data['genre']

	# train it
	model = DecisionTreeClassifier()
	model.fit(X, y)

	# persist the model
	joblib.dump(model, 'music-recommender.joblib')
	# Visualize model decision tree in a graph

	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn import tree

	music_data = pd.read_csv('music.csv')
	X = music_data.drop(columns=['genre'])
	y = music_data['genre']

	model = DecisionTreeClassifier()
	model.fit(X, y)

	# .dot format is the graph description language
	tree.export_graphviz(
	model, out_file='music-recommender.dot',
	feature_names=['age', 'gender'],
	class_names=sorted(y.unique()),
	label='all',
	rounded=True,
	filled=True)