This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import cross_val_predict | |
from sklearn.metrics import precision_recall_curve | |
# First you need to get the precisions, recalls, thresholds for your classifier. | |
y_scores = cross_val_predict(classifier, X_train, y_train, cv=3, method='decision_function') | |
precisions, recalls, thresholds = precision_recall_curve(y_train, y_scores) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_precision_vs_recall(precisions, recalls, metric_name=None, metric_perc=None): | |
plt.figure(figsize=(15, 10)) | |
plt.plot(recalls, precisions, 'b-', linewidth=2) | |
plt.xlabel("Recall", fontsize=15) | |
plt.ylabel("Precision", fontsize=15) | |
plt.axis([0, 1, 0, 1]) | |
if metric_name=='precision': | |
# tradedoff recall & threshold at percentage precision we want. | |
recall_atperc_precision = recalls[np.argmax(precisions >= metric_perc)] | |
threshold_atperc_precision = thresholds[np.argmax(precisions >= metric_perc)] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In[1] y_scores = clf.decision_function([instance_from_test_set]) | |
no_threshold = 0 | |
y_some_prediction = (y_scores > no_hreshold) | |
y_some_prediction | |
Out[1] array([True]) | |
In[2] # Threshold returned from plotting func | |
y_some_prediction = (y_score > threshold) | |
y_some_prediction | |
Out[2] array([False]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def determine_outlier_thresholds_iqr(dataframe, col_name, th1=0.25, th3=0.75): | |
quartile1 = dataframe[col_name].quantile(th1) | |
quartile3 = dataframe[col_name].quantile(th3) | |
iqr = quartile3 - quartile1 | |
upper_limit = quartile3 + 1.5 * iqr | |
lower_limit = quartile1 - 1.5 * iqr | |
return lower_limit, upper_limit |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def check_outliers_iqr(dataframe, col_name): | |
lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name) | |
if dataframe[(dataframe[col_name] > upper_limit) | (dataframe[col_name] < lower_limit)].any(axis=None): | |
return True | |
else: | |
return False | |
def replace_with_thresholds_iqr(dataframe,cols, th1=0.05, th3=0.95, replace=False): | |
from tabulate import tabulate | |
data = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def determine_outlier_thresholds_std(dataframe, col_name): | |
upper_boundary = dataframe[col_name].mean() + 3 * dataframe[col_name].std() | |
lower_boundary = dataframe[col_name].mean() - 3 * dataframe[col_name].std() | |
return lower_boundary, upper_boundary |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def check_outliers_std(dataframe, col_name): | |
lower_boundary, upper_boundary = determine_outlier_thresholds_std(dataframe, col_name) | |
if dataframe[(dataframe[col_name] > upper_boundary) | (dataframe[col_name] < lower_boundary)].any(axis=None): | |
return True | |
else: | |
return False | |
def replace_with_thresholds_std(dataframe, cols, replace=False): | |
from tabulate import tabulate | |
data = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from tensorflow import keras |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = keras.models.Sequential() | |
model.add(keras.layers.Flatten(input_shape=[28, 28])) | |
model.add(keras.layers.Dense(300, activation="relu")) | |
model.add(keras.layers.Dense(100, activation="relu")) | |
model.add(keras.layers.Dense(10, activation="softmax")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = keras.models.Sequential([ | |
keras.layers.Flatten(input_shape=[28, 28]), | |
keras.layers.Dense(300, activation="relu"), | |
keras.layers.Dense(100, activation="relu"), | |
keras.layers.Dense(10, activation="softmax") | |
]) |