Last active
August 29, 2017 21:43
-
-
Save daxaxelrod/be0b1806f15d99fc05df4947898cd151 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from keras.models import Sequential | |
from keras.layers import Dense | |
from keras.wrappers.scikit_learn import KerasRegressor | |
from sklearn.model_selection import cross_val_score | |
from sklearn.model_selection import KFold | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.model_selection import train_test_split | |
# from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import LabelEncoder | |
import keras | |
df = pd.read_excel("2015-2016_NBA.xlsx") | |
cols_categorical = ["MAIN REF", "CREW", "TEAMS", "VENUE"] | |
#encoding categorical | |
for col in cols_categorical: | |
df[col] = df[col].astype("category") | |
df[cols_categorical] = df[cols_categorical].apply(lambda x: x.cat.codes) | |
# df[cols_categorical[0]].replace(to_replace={-1: 0}, inplace=True) | |
pred_col = ["OPENING TOTAL"] | |
data_cols = ["MAIN REF", "CREW", "TEAMS", "VENUE", | |
"3P" | |
] | |
np.random.seed = 0 | |
mask = round(len(df) * .8) | |
print("mask value {}".format(mask)) | |
train = df.loc[:mask, :] | |
test = df.loc[mask:, :] | |
all_train = train.loc[:, data_cols] | |
all_test = train.loc[:, pred_col] | |
print("Training set length: {}".format(len(train))) | |
print("Test set length: {}".format(len(test))) | |
x_train = train.loc[:, data_cols] | |
y_train = train.loc[:, pred_col] | |
if len(y_train.columns) > 1: | |
raise Exception("Something fucked up somewhere. Y has more than one pred_col") | |
x_test = test.loc[:, data_cols] | |
y_test = test.loc[:, pred_col] | |
def build_nn(): | |
base_model = Sequential() | |
base_model.add(Dense(20, input_shape=(len(data_cols),), activation="relu")) # | |
base_model.add(Dense(144, activation="relu")) | |
base_model.add(Dense(64, activation="relu")) | |
base_model.compile(loss='mean_squared_error', optimizer='adam', | |
metrics=['accuracy']) | |
print(base_model.summary()) | |
return base_model | |
TL_nba_model = KerasRegressor(build_fn=build_nn, epochs=50, verbose=False) | |
TL_nba_model.fit(x_train, y_train) | |
# kfold = KFold(n_splits=10, random_state=0) | |
# model_results = cross_val_score(TL_nba_model, all_train, all_test, cv=kfold) | |
# print("Lets see what we got. Basline accuracy %.2f (%.2f) MSE" % (results.mean(), results.std())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment