This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
shap_values = model.get_feature_importance(Pool(X_test, label=y_test,cat_features=categorical_features_indices), | |
type="ShapValues") | |
expected_value = shap_values[0,-1] | |
shap_values = shap_values[:,:-1] | |
shap.initjs() | |
shap.force_plot(expected_value, shap_values[3,:], X_test.iloc[3,:]) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cb = CatBoostRegressor() | |
cb.get_feature_importance(type= "___") | |
"type" possible values: | |
- PredictionValuesChange | |
- LossFunctionChange | |
- FeatureImportance | |
PredictionValuesChange for non-ranking metrics and LossFunctionChange for ranking metrics | |
- ShapValues | |
Calculate SHAP Values for every object |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catboost import * | |
train_data = [["a", 1, 1], [ "b", 3, 0], [ "a", 3, 1]] | |
test_data = [[ "a", 1, 2]] | |
train_labels = [10, 20, 30] | |
model = CatBoostRegressor(iterations=10) | |
model.fit(train_data, train_labels) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
infersent = torch.load('InferSent/encoder/infersent.allnli.pickle', map_location=lambda storage, loc: storage) | |
infersent.set_glove_path("InferSent/dataset/GloVe/glove.840B.300d.txt") | |
infersent.build_vocab(sentences, tokenize=True) | |
dict_embeddings = {} | |
for i in range(len(sentences)): | |
print(i) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import metrics | |
import numpy as np | |
y_true = np.concatenate((np.ones(100), np.zeros(900))) | |
a = np.random.uniform(0.5,1, 5) | |
b = np.random.uniform(0,0.5, 995) | |
y_pred1 = np.concatenate((a,b)) | |
a = np.random.uniform(0.5,1, 90) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.translate.bleu_score import sentence_bleu | |
reference = [['the', 'cat',"is","sitting","on","the","mat"]] | |
candidate = ["on",'the',"mat","is","a","cat"] | |
score = sentence_bleu( reference, candidate) | |
print(score) | |
from nltk.translate.bleu_score import sentence_bleu | |
reference = [['the', 'cat',"is","sitting","on","the","mat"]] | |
candidate = ["there",'is',"cat","sitting","cat"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn import datasets, linear_model | |
def metrics(m,X,y): | |
yhat = m.predict(X) | |
print(yhat) | |
SS_Residual = sum((y-yhat)**2) | |
SS_Total = sum((y-np.mean(y))**2) | |
r_squared = 1 - (float(SS_Residual))/SS_Total |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import catboost as cb | |
cat_features_index = [0,1,2,3,4,5,6] | |
def auc(m, train, test): | |
return (metrics.roc_auc_score(y_train,m.predict_proba(train)[:,1]), | |
metrics.roc_auc_score(y_test,m.predict_proba(test)[:,1])) | |
params = {'depth': [4, 7, 10], | |
'learning_rate' : [0.03, 0.1, 0.15], | |
'l2_leaf_reg': [1,4,9], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd, numpy as np, time | |
from sklearn.model_selection import train_test_split | |
data = pd.read_csv("flights.csv") | |
data = data.sample(frac = 0.1, random_state=10) | |
data = data[["MONTH","DAY","DAY_OF_WEEK","AIRLINE","FLIGHT_NUMBER","DESTINATION_AIRPORT", | |
"ORIGIN_AIRPORT","AIR_TIME", "DEPARTURE_TIME","DISTANCE","ARRIVAL_DELAY"]] | |
data.dropna(inplace=True) |
NewerOlder