Skip to content

Instantly share code, notes, and snippets.

View erykml's full-sized avatar

Eryk Lewinson erykml

View GitHub Profile
# create class Flatten to flatten the tensor
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.size()[0], -1)
# use OrderedDict to give meaningful names for layers
model = nn.Sequential(OrderedDict([
('conv_1', nn.Conv2d(3, 16, 3, padding=1)),
('relu_1', nn.ReLU()),
('max_pool_1', nn.MaxPool2d(2, 2)),
def train_cnn(model, train_loader, valid_loader,
criterion, optimizer, n_epochs = 30, train_on_gpu = False,
save_model_on_improvement = True, plot_loss = True):
'''
Function for training the CNN given input parameters. Can be run on CPU or GPU.
The function automatically verifies whether the selected criterion is Binary cross-entropy and if so
converts tensors to appropriate type.
Inputs:
model - architecture of the neural network defined using either Class approach or Sequential
model = train_cnn(model, train_loader, valid_loader,
criterion, optimizer, n_epochs = 30, train_on_gpu = True)
# define sigmoid function
f_sigmoid = nn.Sigmoid()
# define test loss
test_loss = 0.0
# keeping track for incorrectly classified images
incorrect_images = []
true_label = []
incorrect_label = []
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
boston = load_boston()
y = boston.target
X = pd.DataFrame(boston.data, columns = boston.feature_names)
np.random.seed(seed = 42)
X['random'] = np.random.random(size = len(X))
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2, random_state = 42)
@erykml
erykml / rf.py
Created February 11, 2019 21:49
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100,
n_jobs = -1,
oob_score = True,
bootstrap = True,
random_state = 42)
rf.fit(X_train, y_train)
print('R^2 Training Score: {:.2f} \nOOB Score: {:.2f} \nR^2 Validation Score: {:.2f}'.format(rf.score(X_train, y_train),
from sklearn.metrics import r2_score
from rfpimp import permutation_importances
def r2(rf, X_train, y_train):
return r2_score(y_train, rf.predict(X_train))
perm_imp_rfpimp = permutation_importances(rf, X_train, y_train, r2)
import eli5
from eli5.sklearn import PermutationImportance
perm = PermutationImportance(rf, cv = None, refit = False, n_iter = 50).fit(X_train, y_train)
perm_imp_eli5 = imp_df(X_train.columns, perm.feature_importances_)
from sklearn.base import clone
def drop_col_feat_imp(model, X_train, y_train, random_state = 42):
# clone the model to have the exact same specification as the one initially trained
model_clone = clone(model)
# set random_state for comparability
model_clone.random_state = random_state
# training and scoring the benchmark model
model_clone.fit(X_train, y_train)
from treeinterpreter import treeinterpreter as ti, utils
selected_rows = [31, 85]
selected_df = X_train.iloc[selected_rows,:].values
prediction, bias, contributions = ti.predict(rf, selected_df)
for i in range(len(selected_rows)):
print("Row", selected_rows[i])
print("Prediction:", prediction[i][0], 'Actual Value:', y_train[selected_rows[i]])
print("Bias (trainset mean)", bias[i])