Skip to content

Instantly share code, notes, and snippets.

@michelkana
Created April 27, 2021 06:29
Show Gist options
  • Save michelkana/2d9118bc93eb2bac06f79d5915cc1fab to your computer and use it in GitHub Desktop.
Save michelkana/2d9118bc93eb2bac06f79d5915cc1fab to your computer and use it in GitHub Desktop.
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
def run_single_tree(X_train, y_train, X_test, y_test, depth):
model = DecisionTreeClassifier(max_depth=depth).fit(X_train, y_train)
accuracy_train = model.score(X_train, y_train)
accuracy_test = model.score(X_test, y_test)
print('Single tree depth: ', depth)
print('Accuracy, Training Set: ', round(accuracy_train*100,5), '%')
print('Accuracy, Test Set: ', round(accuracy_test*100,5), '%')
return accuracy_train, accuracy_test
# Load data
data_train = pd.read_csv('data/Higgs_train.csv')
data_test = pd.read_csv('data/Higgs_test.csv')
# Split into NumPy arrays
X_train = data_train.iloc[:, data_train.columns != 'class'].values
y_train = data_train['class'].values
X_test = data_test.iloc[:, data_test.columns != 'class'].values
y_test = data_test['class'].values
# Single decision tree with max depth
sm_overfit_tree_depth = 20
sm_overfit_accuracy_train, sm_overfit_accuracy_test = run_single_tree(X_train, y_train,
X_test, y_test,
sm_overfit_tree_depth)
# Single decision tree with depth via cross-validation
sm_best_tree_depth = 5
sm_best_tree_accuracy_train, sm_best_tree_accuracy_test = run_single_tree(X_train, y_train,
X_test, y_test,
sm_best_tree_depth)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment