This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ratings = pd.read_csv('ratings_small.csv') # loading data from csv | |
""" | |
ratings_small.csv has 4 columns - userId, movieId, ratings, and timestammp | |
it is most generic data format for CF related data | |
""" | |
val_indx = get_cv_idxs(len(ratings)) # index for validation set | |
wd = 2e-4 # weight decay | |
n_factors = 50 # n_factors - dimension of embedding matrix (D) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Split data into 5 folds | |
data.split(n_folds=5) | |
from surprise import SVD, evaluate | |
from surprise import NMF | |
# svd | |
algo = SVD() | |
evaluate(algo, data, measures=['RMSE']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://surprise.readthedocs.io/en/stable/getting_started.html | |
# I believe in loading all the datasets from pandas df | |
# you can also load dataset from csv and whatever suits | |
ratings = pd.read_csv('ratings_small.csv') # reading data in pandas df | |
from surprise import Reader, Dataset | |
# to load dataset from pandas df, we need `load_fromm_df` method in surprise lib |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# plotting after prediction | |
xa = np.array(x.x) # column name of x is x | |
order = np.argsort(xa) | |
xs = np.array(xa)[order] | |
ys = np.array(predf)[order] | |
#epreds = np.array(epred[:,None])[order] | |
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize = (13,2.5)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# predictions by ith decisision tree | |
predi = np.zeros(n) | |
np.put(predi, left_idx, np.repeat(np.mean(yi[left_idx]), r)) # replace left side mean y | |
np.put(predi, right_idx, np.repeat(np.mean(yi[right_idx]), n-r)) # right side mean y | |
predi = predi[:,None] # make long vector (nx1) in compatible with y | |
predf = predf + predi # final prediction will be previous prediction value + new prediction of residual | |
ei = y - predf # needed originl y here as residual always from original y |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xi = x # initialization of input | |
yi = y # initialization of target | |
# x,y --> use where no need to change original y | |
ei = 0 # initialization of error | |
n = len(yi) # number of rows | |
predf = 0 # initial prediction 0 | |
for i in range(30): # loop will make 30 trees (n_estimators). | |
tree = DecisionTree(xi,yi) # DecisionTree scratch code can be found in shared github/kaggle link. | |
# It just create a single decision tree with provided min. sample leaf |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
x = np.arange(0,50) | |
x = pd.DataFrame({'x':x}) | |
# just random uniform distributions in differnt range | |
y1 = np.random.uniform(10,15,10) | |
y2 = np.random.uniform(20,25,10) | |
y3 = np.random.uniform(0,5,10) | |
y4 = np.random.uniform(30,32,10) | |
y5 = np.random.uniform(13,17,10) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from waterfallcharts import quick_charts as qc | |
a = [‘Bias’, ‘Age’, ‘Sex’, ‘Blood Pressure’] | |
b = [0.3, 0.6, -0.1, -0.2] | |
plot = qc.waterfall(a,b, Title= ‘Patient A’, y_lab= ‘Predicted probability’, x_lab= ‘Contributing features (path)’, | |
net_label = ‘Final Prediction’) | |
plot.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Pseudo code: | |
def pred_ci(model, x_val, percentile = 95, n_pnt): | |
""" | |
x_val = validation input | |
percentile = required confidence level | |
model = random forest model | |
""" | |
allTree_preds = np.stack([t.predict(x_val) for t in model.estimators_], axis = 0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# defining rmse as scoring criteria (any other criteria can be used in a similar manner) | |
def score(x1,x2): | |
return metrics.mean_squared_error(x1,x2) | |
# defining feature importance function based on above logic | |
def feat_imp(m, x, y, small_good = True): | |
""" | |
m: random forest model | |
x: matrix of independent variables | |
y: output variable |
NewerOlder