sithu · April 20, 2022 02:46
diff --git a/randomforest.py b/randomforest.py
 import pandas as pd
 import numpy as np
 from matplotlib import pyplot as plt
 import utils

 from sklearn.tree import DecisionTreeClassifier
 from sklearn import tree

 np.random.seed(0)

 # Spam Email dataset
 emails = np.array([
    [7,8,1],
    [3,2,0],
    [8,4,1],
    [2,6,0],
    [6,5,1],
    [9,6,1],
    [8,5,0],
    [7,1,0],
    [1,9,1],
    [4,7,0],
    [1,3,0],
    [3,10,1],
    [2,2,1],
    [9,3,0],
    [5,3,0],
    [10,1,0],
    [5,9,1],
    [10,8,1],
 ])
 spam_dataset = pd.DataFrame(data=emails, columns=["Lottery", "Sale", "Spam"])
 spam_dataset

 # Data plot
 features = spam_dataset[['Lottery', 'Sale']]
 labels = spam_dataset['Spam']
 utils.plot_points(features, labels)

 # Decision Tree
 decision_tree_classifier = DecisionTreeClassifier(random_state=0)
 decision_tree_classifier.fit(features, labels)
 decision_tree_classifier.score(features, labels)

 # Draw decision tree
 utils.display_tree(decision_tree_classifier)

 # Decision tree as map
 utils.plot_model(features, labels, decision_tree_classifier)

 # Training a Random Forest
 from sklearn.ensemble import RandomForestClassifier
 random_forest_classifier = RandomForestClassifier(random_state=0, n_estimators=5, max_depth=1)
 random_forest_classifier.fit(features, labels)
 random_forest_classifier.score(features, labels)

 # plot
 utils.plot_model(features, labels, random_forest_classifier)

 utils.plot_points(features, labels)

 #
 for dt in random_forest_classifier.estimators_:
    print("*"*30, "Estimator", "*"*30)
    tree.plot_tree(dt, rounded=True)
    plt.show()
    utils.plot_model(features, labels, dt)
    plt.show()
    
 # Ada Boosting
 from sklearn.ensemble import AdaBoostClassifier
 # Set the random_state so that we always get the same results
 adaboost_classifier = AdaBoostClassifier(random_state=0, n_estimators=6)
 adaboost_classifier.fit(features, labels)
 adaboost_classifier.score(features, labels)

 utils.plot_model(features, labels, adaboost_classifier)
	import pandas as pd
	import numpy as np
	from matplotlib import pyplot as plt
	import utils

	from sklearn.tree import DecisionTreeClassifier
	from sklearn import tree

	np.random.seed(0)

	# Spam Email dataset
	emails = np.array([
	[7,8,1],
	[3,2,0],
	[8,4,1],
	[2,6,0],
	[6,5,1],
	[9,6,1],
	[8,5,0],
	[7,1,0],
	[1,9,1],
	[4,7,0],
	[1,3,0],
	[3,10,1],
	[2,2,1],
	[9,3,0],
	[5,3,0],
	[10,1,0],
	[5,9,1],
	[10,8,1],
	])
	spam_dataset = pd.DataFrame(data=emails, columns=["Lottery", "Sale", "Spam"])
	spam_dataset

	# Data plot
	features = spam_dataset[['Lottery', 'Sale']]
	labels = spam_dataset['Spam']
	utils.plot_points(features, labels)

	# Decision Tree
	decision_tree_classifier = DecisionTreeClassifier(random_state=0)
	decision_tree_classifier.fit(features, labels)
	decision_tree_classifier.score(features, labels)

	# Draw decision tree
	utils.display_tree(decision_tree_classifier)

	# Decision tree as map
	utils.plot_model(features, labels, decision_tree_classifier)

	# Training a Random Forest
	from sklearn.ensemble import RandomForestClassifier
	random_forest_classifier = RandomForestClassifier(random_state=0, n_estimators=5, max_depth=1)
	random_forest_classifier.fit(features, labels)
	random_forest_classifier.score(features, labels)

	# plot
	utils.plot_model(features, labels, random_forest_classifier)

	utils.plot_points(features, labels)

	#
	for dt in random_forest_classifier.estimators_:
	print(""30, "Estimator", ""30)
	tree.plot_tree(dt, rounded=True)
	plt.show()
	utils.plot_model(features, labels, dt)
	plt.show()

	# Ada Boosting
	from sklearn.ensemble import AdaBoostClassifier
	# Set the random_state so that we always get the same results
	adaboost_classifier = AdaBoostClassifier(random_state=0, n_estimators=6)
	adaboost_classifier.fit(features, labels)
	adaboost_classifier.score(features, labels)

	utils.plot_model(features, labels, adaboost_classifier)