daxaxelrod · August 29, 2017 21:43
diff --git a/nba_nn_keras.py b/nba_nn_keras.py
 import pandas as pd
 import numpy as np
 from keras.models import Sequential
 from keras.layers import Dense
 from keras.wrappers.scikit_learn import KerasRegressor
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import KFold
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
 # from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import LabelEncoder

 import keras

 df = pd.read_excel("2015-2016_NBA.xlsx")


 cols_categorical = ["MAIN REF", "CREW", "TEAMS", "VENUE"]

 #encoding categorical
 for col in cols_categorical:
 	df[col] = df[col].astype("category")

 df[cols_categorical] = df[cols_categorical].apply(lambda x: x.cat.codes)
 # df[cols_categorical[0]].replace(to_replace={-1: 0}, inplace=True)

 pred_col = ["OPENING TOTAL"]

 data_cols = ["MAIN REF", "CREW", "TEAMS", "VENUE",
 			 "3P"
 			]

 np.random.seed = 0

 mask = round(len(df) * .8)
 print("mask value {}".format(mask))
 train = df.loc[:mask, :]
 test = df.loc[mask:, :]
 all_train = train.loc[:, data_cols]
 all_test = train.loc[:, pred_col]
 print("Training set length: {}".format(len(train)))
 print("Test set length: {}".format(len(test)))

 x_train = train.loc[:, data_cols]
 y_train = train.loc[:, pred_col]

 if len(y_train.columns) > 1:
 	raise Exception("Something fucked up somewhere. Y has more than one pred_col")

 x_test = test.loc[:, data_cols]
 y_test = test.loc[:, pred_col]

 def build_nn():
 	base_model = Sequential()

 	base_model.add(Dense(20, input_shape=(len(data_cols),), activation="relu")) #
 	base_model.add(Dense(144, activation="relu"))
 	base_model.add(Dense(64, activation="relu"))

 	base_model.compile(loss='mean_squared_error', optimizer='adam',
 			metrics=['accuracy'])
 	print(base_model.summary())
 	return base_model

 TL_nba_model = KerasRegressor(build_fn=build_nn, epochs=50, verbose=False)
 TL_nba_model.fit(x_train, y_train)

 # kfold = KFold(n_splits=10, random_state=0)
 # model_results = cross_val_score(TL_nba_model, all_train, all_test, cv=kfold)
 # print("Lets see what we got. Basline accuracy %.2f (%.2f) MSE" % (results.mean(), results.std()))
	import pandas as pd
	import numpy as np
	from keras.models import Sequential
	from keras.layers import Dense
	from keras.wrappers.scikit_learn import KerasRegressor
	from sklearn.model_selection import cross_val_score
	from sklearn.model_selection import KFold
	from sklearn.preprocessing import StandardScaler
	from sklearn.model_selection import train_test_split
	# from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import LabelEncoder

	import keras

	df = pd.read_excel("2015-2016_NBA.xlsx")


	cols_categorical = ["MAIN REF", "CREW", "TEAMS", "VENUE"]

	#encoding categorical
	for col in cols_categorical:
	df[col] = df[col].astype("category")

	df[cols_categorical] = df[cols_categorical].apply(lambda x: x.cat.codes)
	# df[cols_categorical[0]].replace(to_replace={-1: 0}, inplace=True)

	pred_col = ["OPENING TOTAL"]

	data_cols = ["MAIN REF", "CREW", "TEAMS", "VENUE",
	"3P"
	]

	np.random.seed = 0

	mask = round(len(df) * .8)
	print("mask value {}".format(mask))
	train = df.loc[:mask, :]
	test = df.loc[mask:, :]
	all_train = train.loc[:, data_cols]
	all_test = train.loc[:, pred_col]
	print("Training set length: {}".format(len(train)))
	print("Test set length: {}".format(len(test)))

	x_train = train.loc[:, data_cols]
	y_train = train.loc[:, pred_col]

	if len(y_train.columns) > 1:
	raise Exception("Something fucked up somewhere. Y has more than one pred_col")

	x_test = test.loc[:, data_cols]
	y_test = test.loc[:, pred_col]

	def build_nn():
	base_model = Sequential()

	base_model.add(Dense(20, input_shape=(len(data_cols),), activation="relu")) #
	base_model.add(Dense(144, activation="relu"))
	base_model.add(Dense(64, activation="relu"))

	base_model.compile(loss='mean_squared_error', optimizer='adam',
	metrics=['accuracy'])
	print(base_model.summary())
	return base_model

	TL_nba_model = KerasRegressor(build_fn=build_nn, epochs=50, verbose=False)
	TL_nba_model.fit(x_train, y_train)

	# kfold = KFold(n_splits=10, random_state=0)
	# model_results = cross_val_score(TL_nba_model, all_train, all_test, cv=kfold)
	# print("Lets see what we got. Basline accuracy %.2f (%.2f) MSE" % (results.mean(), results.std()))