Eryk Lewinson erykml

Data Scientist, Author of the Python for Finance Cookbook (published by Packt).

erykml / sequential_approach.py

Created January 25, 2019 21:52

	# create class Flatten to flatten the tensor
	class Flatten(nn.Module):
	def forward(self, x):
	return x.view(x.size()[0], -1)

	# use OrderedDict to give meaningful names for layers
	model = nn.Sequential(OrderedDict([
	('conv_1', nn.Conv2d(3, 16, 3, padding=1)),
	('relu_1', nn.ReLU()),
	('max_pool_1', nn.MaxPool2d(2, 2)),

erykml / train_cnn.py

Last active January 25, 2019 21:56

	def train_cnn(model, train_loader, valid_loader,
	criterion, optimizer, n_epochs = 30, train_on_gpu = False,
	save_model_on_improvement = True, plot_loss = True):
	'''
	Function for training the CNN given input parameters. Can be run on CPU or GPU.
	The function automatically verifies whether the selected criterion is Binary cross-entropy and if so
	converts tensors to appropriate type.

	Inputs:
	model - architecture of the neural network defined using either Class approach or Sequential

erykml / model_train.py

Created January 25, 2019 21:57

	model = train_cnn(model, train_loader, valid_loader,
	criterion, optimizer, n_epochs = 30, train_on_gpu = True)

erykml / evaluate_model.py

Last active January 25, 2019 22:04

	# define sigmoid function
	f_sigmoid = nn.Sigmoid()

	# define test loss
	test_loss = 0.0

	# keeping track for incorrectly classified images
	incorrect_images = []
	true_label = []
	incorrect_label = []

erykml / load_boston.py

Last active August 7, 2021 13:21

	import pandas as pd
	from sklearn.datasets import load_boston
	from sklearn.model_selection import train_test_split

	boston = load_boston()
	y = boston.target
	X = pd.DataFrame(boston.data, columns = boston.feature_names)
	np.random.seed(seed = 42)
	X['random'] = np.random.random(size = len(X))
	X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2, random_state = 42)

erykml / rf.py

Created February 11, 2019 21:49

	from sklearn.ensemble import RandomForestRegressor

	rf = RandomForestRegressor(n_estimators = 100,
	n_jobs = -1,
	oob_score = True,
	bootstrap = True,
	random_state = 42)
	rf.fit(X_train, y_train)

	print('R^2 Training Score: {:.2f} \nOOB Score: {:.2f} \nR^2 Validation Score: {:.2f}'.format(rf.score(X_train, y_train),

erykml / feat_imp_rfpimp.py

Created February 11, 2019 21:59

	from sklearn.metrics import r2_score
	from rfpimp import permutation_importances

	def r2(rf, X_train, y_train):
	return r2_score(y_train, rf.predict(X_train))

	perm_imp_rfpimp = permutation_importances(rf, X_train, y_train, r2)

erykml / feat_imp_eli5.py

Created February 11, 2019 22:06

	import eli5
	from eli5.sklearn import PermutationImportance

	perm = PermutationImportance(rf, cv = None, refit = False, n_iter = 50).fit(X_train, y_train)
	perm_imp_eli5 = imp_df(X_train.columns, perm.feature_importances_)

erykml / drop_col_feat_imp.py

Created February 11, 2019 22:11

	from sklearn.base import clone

	def drop_col_feat_imp(model, X_train, y_train, random_state = 42):

	# clone the model to have the exact same specification as the one initially trained
	model_clone = clone(model)
	# set random_state for comparability
	model_clone.random_state = random_state
	# training and scoring the benchmark model
	model_clone.fit(X_train, y_train)

erykml / treeinterpreter.py

Created February 11, 2019 22:24

	from treeinterpreter import treeinterpreter as ti, utils

	selected_rows = [31, 85]
	selected_df = X_train.iloc[selected_rows,:].values
	prediction, bias, contributions = ti.predict(rf, selected_df)

	for i in range(len(selected_rows)):
	print("Row", selected_rows[i])
	print("Prediction:", prediction[i][0], 'Actual Value:', y_train[selected_rows[i]])
	print("Bias (trainset mean)", bias[i])