This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xgboost as xgb | |
from sklearn import metrics | |
def auc(m, train, test): | |
return (metrics.roc_auc_score(y_train,m.predict_proba(train)[:,1]), | |
metrics.roc_auc_score(y_test,m.predict_proba(test)[:,1])) | |
# Parameter Tuning | |
model = xgb.XGBClassifier() | |
param_dist = {"max_depth": [10,30,50], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lightgbm as lgb | |
from sklearn import metrics | |
def auc2(m, train, test): | |
return (metrics.roc_auc_score(y_train,m.predict(train)), | |
metrics.roc_auc_score(y_test,m.predict(test))) | |
lg = lgb.LGBMClassifier(silent=False) | |
param_dist = {"max_depth": [25,50, 75], | |
"learning_rate" : [0.01,0.05,0.1], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd, numpy as np, time | |
from sklearn.model_selection import train_test_split | |
data = pd.read_csv("flights.csv") | |
data = data.sample(frac = 0.1, random_state=10) | |
data = data[["MONTH","DAY","DAY_OF_WEEK","AIRLINE","FLIGHT_NUMBER","DESTINATION_AIRPORT", | |
"ORIGIN_AIRPORT","AIR_TIME", "DEPARTURE_TIME","DISTANCE","ARRIVAL_DELAY"]] | |
data.dropna(inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import catboost as cb | |
cat_features_index = [0,1,2,3,4,5,6] | |
def auc(m, train, test): | |
return (metrics.roc_auc_score(y_train,m.predict_proba(train)[:,1]), | |
metrics.roc_auc_score(y_test,m.predict_proba(test)[:,1])) | |
params = {'depth': [4, 7, 10], | |
'learning_rate' : [0.03, 0.1, 0.15], | |
'l2_leaf_reg': [1,4,9], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn import datasets, linear_model | |
def metrics(m,X,y): | |
yhat = m.predict(X) | |
print(yhat) | |
SS_Residual = sum((y-yhat)**2) | |
SS_Total = sum((y-np.mean(y))**2) | |
r_squared = 1 - (float(SS_Residual))/SS_Total |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.translate.bleu_score import sentence_bleu | |
reference = [['the', 'cat',"is","sitting","on","the","mat"]] | |
candidate = ["on",'the',"mat","is","a","cat"] | |
score = sentence_bleu( reference, candidate) | |
print(score) | |
from nltk.translate.bleu_score import sentence_bleu | |
reference = [['the', 'cat',"is","sitting","on","the","mat"]] | |
candidate = ["there",'is',"cat","sitting","cat"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import metrics | |
import numpy as np | |
y_true = np.concatenate((np.ones(100), np.zeros(900))) | |
a = np.random.uniform(0.5,1, 5) | |
b = np.random.uniform(0,0.5, 995) | |
y_pred1 = np.concatenate((a,b)) | |
a = np.random.uniform(0.5,1, 90) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
infersent = torch.load('InferSent/encoder/infersent.allnli.pickle', map_location=lambda storage, loc: storage) | |
infersent.set_glove_path("InferSent/dataset/GloVe/glove.840B.300d.txt") | |
infersent.build_vocab(sentences, tokenize=True) | |
dict_embeddings = {} | |
for i in range(len(sentences)): | |
print(i) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catboost import * | |
train_data = [["a", 1, 1], [ "b", 3, 0], [ "a", 3, 1]] | |
test_data = [[ "a", 1, 2]] | |
train_labels = [10, 20, 30] | |
model = CatBoostRegressor(iterations=10) | |
model.fit(train_data, train_labels) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cb = CatBoostRegressor() | |
cb.get_feature_importance(type= "___") | |
"type" possible values: | |
- PredictionValuesChange | |
- LossFunctionChange | |
- FeatureImportance | |
PredictionValuesChange for non-ranking metrics and LossFunctionChange for ranking metrics | |
- ShapValues | |
Calculate SHAP Values for every object |
OlderNewer