Last active
July 13, 2018 16:55
-
-
Save psykzz/957b4b51b26c042622453a4805dc8c62 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load libraries | |
import pandas | |
from pandas.plotting import scatter_matrix | |
# import matplotlib.pyplot as plt | |
from sklearn import model_selection | |
from sklearn.metrics import classification_report | |
from sklearn.metrics import confusion_matrix | |
from sklearn.metrics import accuracy_score | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.svm import SVC | |
# Auto convert | |
from sklearn.preprocessing import LabelEncoder | |
column_names = ["id", "region", "winner", "queue", "map", "season", "patch", "creation", "duration", "rank"] | |
p_column_names = ["id","match_id","region","team_id","summoner_id","role","champion_id","kills","deaths","assists","cs","first_blood","first_tower","first_inhibitor","largest_kill","largest_spree","tower_kills","inhibitor_kills","gold_earned","last_season","spell_d","spell_f","item_0","item_1","item_2","item_3","item_4","item_5","item_6","gold_0_10","gold_10_20","xp_0_10","xp_10_20","double_kills","triple_kills","quadra_kills","penta_kills","level","vision"] | |
dataset = pandas.read_csv('./matches.csv', names=column_names).merge( | |
pandas.read_csv('./matches_participants.csv', names=p_column_names), | |
how='inner', left_on=['id','region'], right_on = ['match_id','region'] | |
) | |
del dataset['id_y'] | |
del dataset['id_x'] | |
del dataset['creation'] | |
del dataset['map'] | |
del dataset['season'] | |
# del dataset['patch'] | |
del dataset['queue'] | |
for name in ['region', 'rank', 'role', 'last_season', 'first_blood', 'first_tower', 'first_inhibitor']: | |
dataset[name] = LabelEncoder().fit_transform(dataset[name].values) # convert names to numbers | |
print(dataset.head(20)) | |
# start the things | |
array = dataset.values | |
X = array[:] | |
Y = array[:,1] # 4 = rank | |
validation_size = 0.20 | |
seed = 7 | |
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed) | |
seed = 7 | |
scoring = 'accuracy' | |
# Spot Check Algorithms | |
models = [] | |
models.append(('LR', LogisticRegression())) | |
models.append(('LDA', LinearDiscriminantAnalysis())) | |
models.append(('KNN', KNeighborsClassifier())) | |
models.append(('CART', DecisionTreeClassifier())) | |
models.append(('NB', GaussianNB())) | |
# models.append(('SVM', SVC())) # this taking some time. | |
# evaluate each model in turn | |
results = [] | |
names = [] | |
for name, model in models: | |
kfold = model_selection.KFold(n_splits=10, random_state=seed) | |
cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) | |
results.append(cv_results) | |
names.append(name) | |
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()) | |
print(msg) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
numpy==1.14.5 | |
pandas==0.23.3 | |
python-dateutil==2.7.3 | |
pytz==2018.5 | |
scikit-learn==0.19.1 | |
scipy==1.1.0 | |
six==1.11.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment